From c84d0538be3b5f6739c6968f3bb1b8e4abe8eadf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Thu, 4 Aug 2022 10:57:15 +0200 Subject: [PATCH 001/546] [rubygems/rubygems] Fix unused variable warning ``` /Users/deivid/Code/rubygems/rubygems/test/rubygems/test_gem_resolver_installer_set.rb:55: warning: assigned but unused variable - a_1_local ``` https://github.com/rubygems/rubygems/commit/9ea4534800 --- test/rubygems/test_gem_resolver_installer_set.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rubygems/test_gem_resolver_installer_set.rb b/test/rubygems/test_gem_resolver_installer_set.rb index 32e1faa28d4368..ffa6b13ea4d599 100644 --- a/test/rubygems/test_gem_resolver_installer_set.rb +++ b/test/rubygems/test_gem_resolver_installer_set.rb @@ -52,7 +52,7 @@ def test_add_always_install_platform end def test_add_always_install_index_spec_platform - a_1_local, a_1_local_gem = util_gem "a", 1 do |s| + _, a_1_local_gem = util_gem "a", 1 do |s| s.platform = Gem::Platform.local end From 8bab09983046351453c7c86c003cfadad3dac01a Mon Sep 17 00:00:00 2001 From: git Date: Fri, 5 Aug 2022 00:09:30 +0900 Subject: [PATCH 002/546] * 2022-08-05 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 313c1feaf9e490..bcd3b29f5095f8 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 4 +#define RUBY_RELEASE_DAY 5 #include "ruby/version.h" #include "ruby/internal/abi.h" From 7f5f9d19c54d3d5e0c2b2947785d8821b752641d Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Thu, 4 Aug 2022 08:18:24 -0700 Subject: [PATCH 003/546] YJIT: Add known_* helpers for Type (#6208) * YJIT: Add known_* helpers for Type This adds a few helpers to Type which all return Options representing what is known, from a Ruby perspective, about the type. This includes: * known_class_of: If known, the class represented by this type * known_value_type: If known, the T_ value type * known_exact_value: If known, the exact VALUE represented by this type (currently this is only available for true/false/nil) * known_truthy: If known, whether or not this value evaluates as true (not false or nil) The goal of this is to abstract away the specifics of the mappings between types wherever possible from the codegen. For example previously by introducing Type::CString as a more specific version of Type::TString, uses of Type::TString in codegen needed to be updated to check either case. Now by using known_value_type, at least in theory we can introduce new types with minimal (if any) codegen changes. I think rust's Option type allows us to represent this uncertainty fairly well, and should help avoid mistakes, and the matching using this turned out pretty cleanly. * YJIT: Use known_value_type for checktype * YJIT: Use known_value_type for T_STRING check * YJIT: Use known_class_of in guard_known_klass * YJIT: Use known truthyness in jit_rb_obj_not * YJIT: Rename known_class_of => known_class --- yjit/src/codegen.rs | 159 ++++++++++++++++++++------------------------ yjit/src/core.rs | 54 +++++++++++++++ 2 files changed, 125 insertions(+), 88 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 0acd1972c39df1..818e3fbb41edee 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2218,22 +2218,16 @@ fn gen_checktype( let val = ctx.stack_pop(1); // Check if we know from type information - match (type_val, val_type) { - (RUBY_T_STRING, Type::TString) - | (RUBY_T_STRING, Type::CString) - | (RUBY_T_ARRAY, Type::Array) - | (RUBY_T_HASH, Type::Hash) => { - // guaranteed type match - let stack_ret = ctx.stack_push(Type::True); - mov(cb, stack_ret, uimm_opnd(Qtrue.as_u64())); - return KeepCompiling; - } - _ if val_type.is_imm() || val_type.is_specific() => { - // guaranteed not to match T_STRING/T_ARRAY/T_HASH - let stack_ret = ctx.stack_push(Type::False); - mov(cb, stack_ret, uimm_opnd(Qfalse.as_u64())); - return KeepCompiling; - } + match val_type.known_value_type() { + Some(value_type) => { + if value_type == type_val { + jit_putobject(jit, ctx, cb, Qtrue); + return KeepCompiling; + } else { + jit_putobject(jit, ctx, cb, Qfalse); + return KeepCompiling; + } + }, _ => (), } @@ -2502,7 +2496,7 @@ fn gen_equality_specialized( // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard) let btype = ctx.get_opnd_type(StackOpnd(0)); - if btype != Type::TString && btype != Type::CString { + if btype.known_value_type() != Some(RUBY_T_STRING) { mov(cb, REG0, C_ARG_REGS[1]); // Note: any T_STRING is valid here, but we check for a ::String for simplicity // To pass a mutable static variable (rb_cString) requires an unsafe block @@ -3405,78 +3399,70 @@ fn jit_guard_known_klass( ) { let val_type = ctx.get_opnd_type(insn_opnd); + if val_type.known_class() == Some(known_klass) { + // We already know from type information that this is a match + return; + } + if unsafe { known_klass == rb_cNilClass } { assert!(!val_type.is_heap()); - if val_type != Type::Nil { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is nil"); - cmp(cb, REG0, imm_opnd(Qnil.into())); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + add_comment(cb, "guard object is nil"); + cmp(cb, REG0, imm_opnd(Qnil.into())); + jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Nil); - } + ctx.upgrade_opnd_type(insn_opnd, Type::Nil); } else if unsafe { known_klass == rb_cTrueClass } { assert!(!val_type.is_heap()); - if val_type != Type::True { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is true"); - cmp(cb, REG0, imm_opnd(Qtrue.into())); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + add_comment(cb, "guard object is true"); + cmp(cb, REG0, imm_opnd(Qtrue.into())); + jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::True); - } + ctx.upgrade_opnd_type(insn_opnd, Type::True); } else if unsafe { known_klass == rb_cFalseClass } { assert!(!val_type.is_heap()); - if val_type != Type::False { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is false"); - assert!(Qfalse.as_i32() == 0); - test(cb, REG0, REG0); - jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); + add_comment(cb, "guard object is false"); + assert!(Qfalse.as_i32() == 0); + test(cb, REG0, REG0); + jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::False); - } + ctx.upgrade_opnd_type(insn_opnd, Type::False); } else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() { - assert!(!val_type.is_heap()); // We will guard fixnum and bignum as though they were separate classes // BIGNUM can be handled by the general else case below - if val_type != Type::Fixnum || !val_type.is_imm() { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is fixnum"); - test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG as i64)); - jit_chain_guard(JCC_JZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); - } + add_comment(cb, "guard object is fixnum"); + test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG as i64)); + jit_chain_guard(JCC_JZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); + ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); } else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() { assert!(!val_type.is_heap()); // We will guard STATIC vs DYNAMIC as though they were separate classes // DYNAMIC symbols can be handled by the general else case below - if val_type != Type::ImmSymbol || !val_type.is_imm() { - assert!(val_type.is_unknown()); - - add_comment(cb, "guard object is static symbol"); - assert!(RUBY_SPECIAL_SHIFT == 8); - cmp(cb, REG0_8, uimm_opnd(RUBY_SYMBOL_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); - } + assert!(val_type.is_unknown()); + + add_comment(cb, "guard object is static symbol"); + assert!(RUBY_SPECIAL_SHIFT == 8); + cmp(cb, REG0_8, uimm_opnd(RUBY_SYMBOL_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); } else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() { assert!(!val_type.is_heap()); - if val_type != Type::Flonum || !val_type.is_imm() { - assert!(val_type.is_unknown()); - - // We will guard flonum vs heap float as though they were separate classes - add_comment(cb, "guard object is flonum"); - mov(cb, REG1, REG0); - and(cb, REG1, uimm_opnd(RUBY_FLONUM_MASK as u64)); - cmp(cb, REG1, uimm_opnd(RUBY_FLONUM_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); - } + assert!(val_type.is_unknown()); + + // We will guard flonum vs heap float as though they were separate classes + add_comment(cb, "guard object is flonum"); + mov(cb, REG1, REG0); + and(cb, REG1, uimm_opnd(RUBY_FLONUM_MASK as u64)); + cmp(cb, REG1, uimm_opnd(RUBY_FLONUM_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); } else if unsafe { FL_TEST(known_klass, VALUE(RUBY_FL_SINGLETON as usize)) != VALUE(0) && sample_instance == rb_attr_get(known_klass, id__attached__ as ID) @@ -3496,11 +3482,6 @@ fn jit_guard_known_klass( jit_mov_gc_ptr(jit, cb, REG1, sample_instance); cmp(cb, REG0, REG1); jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); - } else if val_type == Type::CString && unsafe { known_klass == rb_cString } { - // guard elided because the context says we've already checked - unsafe { - assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String") - }; } else { assert!(!val_type.is_imm()); @@ -3576,23 +3557,25 @@ fn jit_rb_obj_not( ) -> bool { let recv_opnd = ctx.get_opnd_type(StackOpnd(0)); - if recv_opnd == Type::Nil || recv_opnd == Type::False { - add_comment(cb, "rb_obj_not(nil_or_false)"); - ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::True); - mov(cb, out_opnd, uimm_opnd(Qtrue.into())); - } else if recv_opnd.is_heap() || recv_opnd.is_specific() { - // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. - add_comment(cb, "rb_obj_not(truthy)"); - ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::False); - mov(cb, out_opnd, uimm_opnd(Qfalse.into())); - } else { - // jit_guard_known_klass() already ran on the receiver which should - // have deduced deduced the type of the receiver. This case should be - // rare if not unreachable. - return false; + match recv_opnd.known_truthy() { + Some(false) => { + add_comment(cb, "rb_obj_not(nil_or_false)"); + ctx.stack_pop(1); + let out_opnd = ctx.stack_push(Type::True); + mov(cb, out_opnd, uimm_opnd(Qtrue.into())); + }, + Some(true) => { + // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. + add_comment(cb, "rb_obj_not(truthy)"); + ctx.stack_pop(1); + let out_opnd = ctx.stack_push(Type::False); + mov(cb, out_opnd, uimm_opnd(Qfalse.into())); + }, + _ => { + return false; + }, } + true } diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 8242c9477ea946..64585653d94e47 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -126,6 +126,60 @@ impl Type { } } + /// Returns an Option with the T_ value type if it is known, otherwise None + pub fn known_value_type(&self) -> Option { + match self { + Type::Nil => Some(RUBY_T_NIL), + Type::True => Some(RUBY_T_TRUE), + Type::False => Some(RUBY_T_FALSE), + Type::Fixnum => Some(RUBY_T_FIXNUM), + Type::Flonum => Some(RUBY_T_FLOAT), + Type::Array => Some(RUBY_T_ARRAY), + Type::Hash => Some(RUBY_T_HASH), + Type::ImmSymbol | Type::HeapSymbol => Some(RUBY_T_SYMBOL), + Type::TString | Type::CString => Some(RUBY_T_STRING), + Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None + } + } + + /// Returns an Option with the class if it is known, otherwise None + pub fn known_class(&self) -> Option { + unsafe { + match self { + Type::Nil => Some(rb_cNilClass), + Type::True => Some(rb_cTrueClass), + Type::False => Some(rb_cFalseClass), + Type::Fixnum => Some(rb_cInteger), + Type::Flonum => Some(rb_cFloat), + Type::ImmSymbol | Type::HeapSymbol => Some(rb_cSymbol), + Type::CString => Some(rb_cString), + _ => None, + } + } + } + + /// Returns an Option with the exact value if it is known, otherwise None + #[allow(unused)] // not yet used + pub fn known_exact_value(&self) -> Option { + match self { + Type::Nil => Some(Qnil), + Type::True => Some(Qtrue), + Type::False => Some(Qfalse), + _ => None, + } + } + + /// Returns an Option with the exact value if it is known, otherwise None + pub fn known_truthy(&self) -> Option { + match self { + Type::Nil => Some(false), + Type::False => Some(false), + Type::UnknownHeap => Some(true), + Type::Unknown | Type::UnknownImm => None, + _ => Some(true) + } + } + /// Compute a difference between two value types /// Returns 0 if the two are the same /// Returns > 0 if different but compatible From 1e7a2415a4c69aa64c9c2a561197bf9cfc5a91f8 Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Thu, 4 Aug 2022 17:19:14 +0100 Subject: [PATCH 004/546] YJIT: Allow str-concat arg to be any string subtype, not just rb_cString (#6205) Allow str-concat arg to be any string subtype, not just rb_cString --- yjit/src/codegen.rs | 69 ++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 818e3fbb41edee..119477f50516ad 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1338,6 +1338,31 @@ fn guard_object_is_array( jne_ptr(cb, side_exit); } +fn guard_object_is_string( + cb: &mut CodeBlock, + object_reg: X86Opnd, + flags_reg: X86Opnd, + side_exit: CodePtr, +) { + add_comment(cb, "guard object is string"); + + // Pull out the type mask + mov( + cb, + flags_reg, + mem_opnd( + 8 * SIZEOF_VALUE as u8, + object_reg, + RUBY_OFFSET_RBASIC_FLAGS, + ), + ); + and(cb, flags_reg, uimm_opnd(RUBY_T_MASK as u64)); + + // Compare the result with T_STRING + cmp(cb, flags_reg, uimm_opnd(RUBY_T_STRING as u64)); + jne_ptr(cb, side_exit); +} + // push enough nils onto the stack to fill out an array fn gen_expandarray( jit: &mut JITState, @@ -3730,7 +3755,7 @@ fn jit_rb_str_to_s( false } -// Codegen for rb_str_concat() +// Codegen for rb_str_concat() -- *not* String#concat // Frequently strings are concatenated using "out_str << next_str". // This is common in Erb and similar templating languages. fn jit_rb_str_concat( @@ -3744,14 +3769,12 @@ fn jit_rb_str_concat( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { + // The << operator can accept integer codepoints for characters + // as the argument. We only specially optimise string arguments. + // If the peeked-at compile time argument is something other than + // a string, assume it won't be a string later either. let comptime_arg = jit_peek_at_stack(jit, ctx, 0); - let comptime_arg_type = ctx.get_opnd_type(StackOpnd(0)); - - // String#<< can take an integer codepoint as an argument, but we don't optimise that. - // Also, a non-string argument would have to call .to_str on itself before being treated - // as a string, and that would require saving pc/sp, which we don't do here. - // TODO: figure out how we should optimise a string-subtype argument here - if comptime_arg_type != Type::CString && comptime_arg.class_of() != unsafe { rb_cString } { + if ! unsafe { RB_TYPE_P(comptime_arg, RUBY_T_STRING) } { return false; } @@ -3759,19 +3782,25 @@ fn jit_rb_str_concat( let side_exit = get_side_exit(jit, ocb, ctx); // Guard that the argument is of class String at runtime. + let insn_opnd = StackOpnd(0); let arg_opnd = ctx.stack_opnd(0); mov(cb, REG0, arg_opnd); - jit_guard_known_klass( - jit, - ctx, - cb, - ocb, - unsafe { rb_cString }, - StackOpnd(0), - comptime_arg, - SEND_MAX_DEPTH, - side_exit, - ); + let arg_type = ctx.get_opnd_type(insn_opnd); + + if arg_type != Type::CString && arg_type != Type::TString { + if !arg_type.is_heap() { + add_comment(cb, "guard arg not immediate"); + test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64)); + jnz_ptr(cb, side_exit); + cmp(cb, REG0, imm_opnd(Qnil.into())); + jbe_ptr(cb, side_exit); + + ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); + } + guard_object_is_string(cb, REG0, REG1, side_exit); + // We know this has type T_STRING, but not necessarily that it's a ::String + ctx.upgrade_opnd_type(insn_opnd, Type::TString); + } let concat_arg = ctx.stack_pop(1); let recv = ctx.stack_pop(1); @@ -3794,7 +3823,7 @@ fn jit_rb_str_concat( test(cb, REG0, uimm_opnd(RUBY_ENCODING_MASK as u64)); let enc_mismatch = cb.new_label("enc_mismatch".to_string()); - jne_label(cb, enc_mismatch); + jnz_label(cb, enc_mismatch); // If encodings match, call the simple append function and jump to return call_ptr(cb, REG0, rb_yjit_str_simple_append as *const u8); From 70b60d24b9c3859a859853ddb2e17c603bd3485b Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Thu, 28 Jul 2022 16:41:46 -0700 Subject: [PATCH 005/546] Fix inconsistency with opt_aref_with opt_aref_with is an optimized instruction for accessing a Hash using a non-frozen string key (ie. from a file without frozen_string_literal). It attempts to avoid allocating the string, and instead silently using a frozen string (hash string keys are always fstrings). Because this is just an optimization, it should be invisible to the user. However, previously this optimization was could be seen via hashes with default procs. For example, previously: h = Hash.new { |h, k| k.frozen? } str = "foo" h[str] # false h["foo"] # true when optimizations enabled This commit checks that the Hash doesn't have a default proc when using opt_aref_with. --- test/ruby/test_hash.rb | 14 ++++++++++++++ vm_insnhelper.c | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index 91423f81ea2067..83d16d462effe1 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -304,6 +304,20 @@ def test_AREF_fstring_key assert_equal before, ObjectSpace.count_objects[:T_STRING] end + def test_AREF_fstring_key_default_proc + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + h = Hash.new do |h, k| + k.frozen? + end + + str = "foo" + refute str.frozen? # assumes this file is frozen_string_literal: false + refute h[str] + refute h["foo"] + end; + end + def test_ASET_fstring_key a, b = {}, {} assert_equal 1, a["abc"] = 1 diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 2ff48d26626eeb..2c0a369a439001 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -5458,7 +5458,8 @@ vm_opt_aref_with(VALUE recv, VALUE key) { if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG) && - rb_hash_compare_by_id_p(recv) == Qfalse) { + rb_hash_compare_by_id_p(recv) == Qfalse && + !FL_TEST(recv, RHASH_PROC_DEFAULT)) { return rb_hash_aref(recv, key); } else { From 87d8d25796df3865b5a0c9069c604e475a28027f Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 5 Aug 2022 02:40:49 +0900 Subject: [PATCH 006/546] Use configured GIT --- common.mk | 4 +-- defs/gmake.mk | 68 +++++++++++++++++++++++++-------------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/common.mk b/common.mk index 4c49690e4abcdf..1a1260907bf3f4 100644 --- a/common.mk +++ b/common.mk @@ -1374,8 +1374,8 @@ update-bundled_gems: PHONY $(tooldir)/update-bundled_gems.rb \ "$(srcdir)/gems/bundled_gems" | \ $(IFCHANGE) "$(srcdir)/gems/bundled_gems" - - git -C "$(srcdir)" diff --no-ext-diff --ignore-submodules --exit-code || \ - git -C "$(srcdir)" commit -m "Update bundled_gems" gems/bundled_gems + $(GIT) -C "$(srcdir)" diff --no-ext-diff --ignore-submodules --exit-code || \ + $(GIT) -C "$(srcdir)" commit -m "Update bundled_gems" gems/bundled_gems PRECHECK_BUNDLED_GEMS = test-bundled-gems-precheck test-bundled-gems-precheck: $(TEST_RUNNABLE)-test-bundled-gems-precheck diff --git a/defs/gmake.mk b/defs/gmake.mk index 4019eb3854b2aa..a55edfb2864724 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -183,8 +183,8 @@ commit: $(if $(filter commit,$(MAKECMDGOALS)),$(filter-out commit,$(MAKECMDGOALS GITHUB_RUBY_URL = https://github.com/ruby/ruby PR = -COMMIT_GPG_SIGN = $(shell git -C "$(srcdir)" config commit.gpgsign) -REMOTE_GITHUB_URL = $(shell git -C "$(srcdir)" config remote.github.url) +COMMIT_GPG_SIGN = $(shell $(GIT) -C "$(srcdir)" config commit.gpgsign) +REMOTE_GITHUB_URL = $(shell $(GIT) -C "$(srcdir)" config remote.github.url) COMMITS_NOTES = commits .PHONY: fetch-github @@ -197,21 +197,21 @@ define fetch-github exit 1; \ ) $(eval REMOTE_GITHUB_URL := $(REMOTE_GITHUB_URL)) - $(if $(REMOTE_GITHUB_URL),, \ - echo adding $(GITHUB_RUBY_URL) as remote github; \ - git -C "$(srcdir)" remote add github $(GITHUB_RUBY_URL); \ - git -C "$(srcdir)" config --add remote.github.fetch +refs/notes/$(COMMITS_NOTES):refs/notes/$(COMMITS_NOTES) - $(eval REMOTE_GITHUB_URL := $(GITHUB_RUBY_URL)) \ + $(if $(REMOTE_GITHUB_URL),, + echo adding $(GITHUB_RUBY_URL) as remote github + $(GIT) -C "$(srcdir)" remote add github $(GITHUB_RUBY_URL) + $(GIT) -C "$(srcdir)" config --add remote.github.fetch +refs/notes/$(COMMITS_NOTES):refs/notes/$(COMMITS_NOTES) + $(eval REMOTE_GITHUB_URL := $(GITHUB_RUBY_URL)) ) - $(if $(git -C "$(srcdir)" rev-parse "github/pull/$(1)/head" -- 2> /dev/null), \ - git -C "$(srcdir)" branch -f "gh-$(1)" "github/pull/$(1)/head", \ - git -C "$(srcdir)" fetch -f github "pull/$(1)/head:gh-$(1)" \ + $(if $(shell $(GIT) -C "$(srcdir)" rev-parse "github/pull/$(1)/head" -- 2> /dev/null), + $(GIT) -C "$(srcdir)" branch -f "gh-$(1)" "github/pull/$(1)/head", + $(GIT) -C "$(srcdir)" fetch -f github "pull/$(1)/head:gh-$(1)" ) endef .PHONY: checkout-github checkout-github: fetch-github - git -C "$(srcdir)" checkout "gh-$(PR)" + $(GIT) -C "$(srcdir)" checkout "gh-$(PR)" .PHONY: update-github update-github: fetch-github @@ -224,31 +224,31 @@ update-github: fetch-github $(eval PR_BRANCH := $(word 2,$(PULL_REQUEST_FORK_BRANCH))) $(eval GITHUB_UPDATE_WORKTREE := $(shell mktemp -d "$(srcdir)/gh-$(PR)-XXXXXX")) - git -C "$(srcdir)" worktree add $(notdir $(GITHUB_UPDATE_WORKTREE)) "gh-$(PR)" - git -C "$(GITHUB_UPDATE_WORKTREE)" merge master --no-edit + $(GIT) -C "$(srcdir)" worktree add $(notdir $(GITHUB_UPDATE_WORKTREE)) "gh-$(PR)" + $(GIT) -C "$(GITHUB_UPDATE_WORKTREE)" merge master --no-edit @$(BASERUBY) -e 'print "Are you sure to push this to PR=$(PR)? [Y/n]: "; exit(gets.chomp != "n")' - git -C "$(srcdir)" remote add fork-$(PR) git@github.com:$(FORK_REPO).git - git -C "$(GITHUB_UPDATE_WORKTREE)" push fork-$(PR) gh-$(PR):$(PR_BRANCH) - git -C "$(srcdir)" remote rm fork-$(PR) - git -C "$(srcdir)" worktree remove $(notdir $(GITHUB_UPDATE_WORKTREE)) - git -C "$(srcdir)" branch -D gh-$(PR) + $(GIT) -C "$(srcdir)" remote add fork-$(PR) git@github.com:$(FORK_REPO).git + $(GIT) -C "$(GITHUB_UPDATE_WORKTREE)" push fork-$(PR) gh-$(PR):$(PR_BRANCH) + $(GIT) -C "$(srcdir)" remote rm fork-$(PR) + $(GIT) -C "$(srcdir)" worktree remove $(notdir $(GITHUB_UPDATE_WORKTREE)) + $(GIT) -C "$(srcdir)" branch -D gh-$(PR) .PHONY: pull-github pull-github: fetch-github $(call pull-github,$(PR)) define pull-github - $(eval GITHUB_MERGE_BASE := $(shell git -C "$(srcdir)" log -1 --format=format:%H)) - $(eval GITHUB_MERGE_BRANCH := $(shell git -C "$(srcdir)" symbolic-ref --short HEAD)) + $(eval GITHUB_MERGE_BASE := $(shell $(GIT) -C "$(srcdir)" log -1 --format=format:%H)) + $(eval GITHUB_MERGE_BRANCH := $(shell $(GIT) -C "$(srcdir)" symbolic-ref --short HEAD)) $(eval GITHUB_MERGE_WORKTREE := $(shell mktemp -d "$(srcdir)/gh-$(1)-XXXXXX")) - git -C "$(srcdir)" worktree prune - git -C "$(srcdir)" worktree add $(notdir $(GITHUB_MERGE_WORKTREE)) "gh-$(1)" - git -C "$(GITHUB_MERGE_WORKTREE)" rebase $(GITHUB_MERGE_BRANCH) + $(GIT) -C "$(srcdir)" worktree prune + $(GIT) -C "$(srcdir)" worktree add $(notdir $(GITHUB_MERGE_WORKTREE)) "gh-$(1)" + $(GIT) -C "$(GITHUB_MERGE_WORKTREE)" rebase $(GITHUB_MERGE_BRANCH) $(eval COMMIT_GPG_SIGN := $(COMMIT_GPG_SIGN)) $(if $(filter true,$(COMMIT_GPG_SIGN)), \ - git -C "$(GITHUB_MERGE_WORKTREE)" rebase --exec "git commit --amend --no-edit -S" "$(GITHUB_MERGE_BASE)"; \ + $(GIT) -C "$(GITHUB_MERGE_WORKTREE)" rebase --exec "$(GIT) commit --amend --no-edit -S" "$(GITHUB_MERGE_BASE)"; \ ) - git -C "$(GITHUB_MERGE_WORKTREE)" rebase --exec "git notes add --message 'Merged: $(GITHUB_RUBY_URL)/pull/$(1)'" "$(GITHUB_MERGE_BASE)" + $(GIT) -C "$(GITHUB_MERGE_WORKTREE)" rebase --exec "$(GIT) notes add --message 'Merged: $(GITHUB_RUBY_URL)/pull/$(1)'" "$(GITHUB_MERGE_BASE)" endef .PHONY: fetch-github-% @@ -257,7 +257,7 @@ fetch-github-%: .PHONY: checkout-github-% checkout-github-%: fetch-github-% - git -C "$(srcdir)" checkout "gh-$*" + $(GIT) -C "$(srcdir)" checkout "gh-$*" .PHONY: pr-% pull-github-% pr-% pull-github-%: fetch-github-% @@ -351,7 +351,7 @@ REVISION_IN_HEADER := none REVISION_LATEST := update else REVISION_IN_HEADER := $(shell sed -n 's/^\#define RUBY_FULL_REVISION "\(.*\)"/\1/p' $(srcdir)/revision.h 2>/dev/null) -REVISION_LATEST := $(shell $(CHDIR) $(srcdir) && git log -1 --format=%H 2>/dev/null) +REVISION_LATEST := $(shell $(CHDIR) $(srcdir) && $(GIT) log -1 --format=%H 2>/dev/null) endif ifneq ($(REVISION_IN_HEADER),$(REVISION_LATEST)) # GNU make treat the target as unmodified when its dependents get @@ -403,19 +403,19 @@ endif update-deps: $(eval update_deps := $(shell date +update-deps-%Y%m%d)) $(eval deps_dir := $(shell mktemp -d)/$(update_deps)) - $(eval GIT_DIR := $(shell git -C $(srcdir) rev-parse --absolute-git-dir)) - git --git-dir=$(GIT_DIR) worktree add $(deps_dir) + $(eval GIT_DIR := $(shell $(GIT) -C $(srcdir) rev-parse --absolute-git-dir)) + $(GIT) --git-dir=$(GIT_DIR) worktree add $(deps_dir) cp $(tooldir)/config.guess $(tooldir)/config.sub $(deps_dir)/tool [ -f config.status ] && cp config.status $(deps_dir) cd $(deps_dir) && autoconf && \ exec ./configure -q -C --enable-load-relative --disable-install-doc --disable-rubygems 'optflags=-O0' 'debugflags=-save-temps=obj -g' $(RUNRUBY) -C $(deps_dir) tool/update-deps --fix - git -C $(deps_dir) diff --no-ext-diff --ignore-submodules --exit-code || \ - git -C $(deps_dir) commit --all --message='Update dependencies' - git --git-dir=$(GIT_DIR) worktree remove $(deps_dir) + $(GIT) -C $(deps_dir) diff --no-ext-diff --ignore-submodules --exit-code || \ + $(GIT) -C $(deps_dir) commit --all --message='Update dependencies' + $(GIT) --git-dir=$(GIT_DIR) worktree remove $(deps_dir) $(RMDIR) $(dir $(deps_dir)) - git --git-dir=$(GIT_DIR) merge --no-edit --ff-only $(update_deps) - git --git-dir=$(GIT_DIR) branch --delete $(update_deps) + $(GIT) --git-dir=$(GIT_DIR) merge --no-edit --ff-only $(update_deps) + $(GIT) --git-dir=$(GIT_DIR) branch --delete $(update_deps) # order-only-prerequisites doesn't work for $(RUBYSPEC_CAPIEXT) # because the same named directory exists in the source tree. From 6a8f1a9e5cd1c9c2b3c6925d8d3fa76a29dabf73 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 30 Jul 2022 21:08:00 +0900 Subject: [PATCH 007/546] Copy from bundled gem source for test --- common.mk | 13 +++++++--- defs/gmake.mk | 4 +++ tool/gem-unpack.rb | 63 ++++++++++++++++++++++++++++++---------------- 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/common.mk b/common.mk index 1a1260907bf3f4..515236b1840478 100644 --- a/common.mk +++ b/common.mk @@ -1344,7 +1344,7 @@ update-config_files: PHONY refresh-gems: update-bundled_gems prepare-gems prepare-gems: $(HAVE_BASERUBY:yes=update-gems) $(HAVE_BASERUBY:yes=extract-gems) -update-gems$(gnumake:yes=-nongnumake): PHONY +update-gems$(gnumake:yes=-sequential): PHONY $(ECHO) Downloading bundled gem files... $(Q) $(BASERUBY) -C "$(srcdir)" \ -I./tool -rdownloader -answ \ @@ -1358,15 +1358,20 @@ update-gems$(gnumake:yes=-nongnumake): PHONY -e 'FileUtils.rm_rf(old.map{'"|n|"'n.chomp(".gem")})' \ gems/bundled_gems -extract-gems$(gnumake:yes=-nongnumake): PHONY +extract-gems$(gnumake:yes=-sequential): PHONY $(ECHO) Extracting bundled gem files... $(Q) $(RUNRUBY) -C "$(srcdir)" \ -Itool -rfileutils -rgem-unpack -answ \ -e 'BEGIN {d = ".bundle/gems"}' \ - -e 'gem, ver = *$$F' \ + -e 'gem, ver, _, rev = *$$F' \ -e 'next if !ver or /^#/=~gem' \ -e 'g = "#{gem}-#{ver}"' \ - -e 'File.directory?("#{d}/#{g}") or Gem.unpack("gems/#{g}.gem", ".bundle")' \ + -e 'if File.directory?("#{d}/#{g}")' \ + -e 'elsif rev and File.exist?(gs = "gems/src/#{gem}/#{gem}.gemspec")' \ + -e 'Gem.copy(gs, ".bundle")' \ + -e 'else' \ + -e 'Gem.unpack("gems/#{g}.gem", ".bundle")' \ + -e 'end' \ gems/bundled_gems update-bundled_gems: PHONY diff --git a/defs/gmake.mk b/defs/gmake.mk index a55edfb2864724..9d7bf029e2ddb3 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -292,6 +292,9 @@ gems/%.gem: -e 'File.unlink(*old) and' \ -e 'FileUtils.rm_rf(old.map{'"|n|"'n.chomp(".gem")})' +ifeq (,) +extract-gems: extract-gems-sequential +else extract-gems: | $(patsubst %,.bundle/gems/%,$(bundled-gems)) .bundle/gems/%: gems/%.gem | .bundle/gems @@ -302,6 +305,7 @@ extract-gems: | $(patsubst %,.bundle/gems/%,$(bundled-gems)) $(srcdir)/.bundle/gems: $(MAKEDIRS) $@ +endif ifneq ($(filter update-bundled_gems refresh-gems,$(MAKECMDGOALS)),) update-gems: update-bundled_gems diff --git a/tool/gem-unpack.rb b/tool/gem-unpack.rb index c50d47f7978213..6310c3f92aef9f 100644 --- a/tool/gem-unpack.rb +++ b/tool/gem-unpack.rb @@ -5,30 +5,49 @@ # This library is used by "make extract-gems" to # unpack bundled gem files. -def Gem.unpack(file, dir = ".") - pkg = Gem::Package.new(file) - spec = pkg.spec - target = spec.full_name - Gem.ensure_gem_subdirectories(dir) - gem_dir = File.join(dir, "gems", target) - pkg.extract_files gem_dir - spec_dir = spec.extensions.empty? ? "specifications" : File.join("gems", target) - File.binwrite(File.join(dir, spec_dir, "#{target}.gemspec"), spec.to_ruby) - unless spec.extensions.empty? - spec.dependencies.clear - File.binwrite(File.join(dir, spec_dir, ".bundled.#{target}.gemspec"), spec.to_ruby) +class << Gem + def unpack(file, *rest) + pkg = Gem::Package.new(file) + prepare_test(pkg.spec, *rest) {|dir| pkg.extract_files(dir)} + puts "Unpacked #{file}" end - if spec.bindir and spec.executables - bindir = File.join(dir, "bin") - Dir.mkdir(bindir) rescue nil - spec.executables.each do |exe| - File.open(File.join(bindir, exe), "wb", 0o777) {|f| - f.print "#!ruby\n", - %[load File.realpath("../gems/#{target}/#{spec.bindir}/#{exe}", __dir__)\n] - } + + def copy(path, *rest) + spec = Gem::Specification.load(path) + path = File.dirname(path) + prepare_test(spec, *rest) do |dir| + FileUtils.rm_rf(dir) + files = spec.files.reject {|f| f.start_with?(".git")} + dirs = files.map {|f| File.dirname(f) if f.include?("/")}.uniq + FileUtils.mkdir_p(dirs.map {|d| d ? "#{dir}/#{d}" : dir}.sort_by {|d| d.count("/")}) + files.each do |f| + File.copy_stream(File.join(path, f), File.join(dir, f)) + end end + puts "Copied #{path}" end - FileUtils.rm_rf(Dir.glob("#{gem_dir}/.git*")) - puts "Unpacked #{file}" + def prepare_test(spec, dir = ".") + target = spec.full_name + Gem.ensure_gem_subdirectories(dir) + gem_dir = File.join(dir, "gems", target) + yield gem_dir + spec_dir = spec.extensions.empty? ? "specifications" : File.join("gems", target) + File.binwrite(File.join(dir, spec_dir, "#{target}.gemspec"), spec.to_ruby) + unless spec.extensions.empty? + spec.dependencies.clear + File.binwrite(File.join(dir, spec_dir, ".bundled.#{target}.gemspec"), spec.to_ruby) + end + if spec.bindir and spec.executables + bindir = File.join(dir, "bin") + Dir.mkdir(bindir) rescue nil + spec.executables.each do |exe| + File.open(File.join(bindir, exe), "wb", 0o777) {|f| + f.print "#!ruby\n", + %[load File.realpath("../gems/#{target}/#{spec.bindir}/#{exe}", __dir__)\n] + } + end + end + FileUtils.rm_rf(Dir.glob("#{gem_dir}/.git*")) + end end From 41516b35418d6108c75d9f2190a846ded6a47108 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 30 Jul 2022 21:12:48 +0900 Subject: [PATCH 008/546] Extract bundled gems by BASERUBY --- common.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common.mk b/common.mk index 515236b1840478..bce2e628ae52e3 100644 --- a/common.mk +++ b/common.mk @@ -1360,7 +1360,7 @@ update-gems$(gnumake:yes=-sequential): PHONY extract-gems$(gnumake:yes=-sequential): PHONY $(ECHO) Extracting bundled gem files... - $(Q) $(RUNRUBY) -C "$(srcdir)" \ + $(Q) $(BASERUBY) -C "$(srcdir)" \ -Itool -rfileutils -rgem-unpack -answ \ -e 'BEGIN {d = ".bundle/gems"}' \ -e 'gem, ver, _, rev = *$$F' \ @@ -1388,7 +1388,7 @@ yes-test-bundled-gems-precheck: main no-test-bundled-gems-precheck: test-bundled-gems-fetch: yes-test-bundled-gems-fetch -yes-test-bundled-gems-fetch: $(PREP) +yes-test-bundled-gems-fetch: $(ACTIONS_GROUP) $(Q) $(BASERUBY) -C $(srcdir)/gems ../tool/fetch-bundled_gems.rb src bundled_gems $(ACTIONS_ENDGROUP) From 44a0a66559ee4a03a84c27feca05e9b1b0f59df8 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 31 Jul 2022 00:04:16 +0900 Subject: [PATCH 009/546] Move to tool/lib/bundled_gem.rb --- common.mk | 6 +++--- defs/gmake.mk | 4 ++-- tool/{gem-unpack.rb => lib/bundled_gem.rb} | 4 +++- 3 files changed, 8 insertions(+), 6 deletions(-) rename tool/{gem-unpack.rb => lib/bundled_gem.rb} (97%) diff --git a/common.mk b/common.mk index bce2e628ae52e3..7e7e0bb67a8ad2 100644 --- a/common.mk +++ b/common.mk @@ -1361,16 +1361,16 @@ update-gems$(gnumake:yes=-sequential): PHONY extract-gems$(gnumake:yes=-sequential): PHONY $(ECHO) Extracting bundled gem files... $(Q) $(BASERUBY) -C "$(srcdir)" \ - -Itool -rfileutils -rgem-unpack -answ \ + -Itool/lib -rfileutils -rbundled_gem -answ \ -e 'BEGIN {d = ".bundle/gems"}' \ -e 'gem, ver, _, rev = *$$F' \ -e 'next if !ver or /^#/=~gem' \ -e 'g = "#{gem}-#{ver}"' \ -e 'if File.directory?("#{d}/#{g}")' \ -e 'elsif rev and File.exist?(gs = "gems/src/#{gem}/#{gem}.gemspec")' \ - -e 'Gem.copy(gs, ".bundle")' \ + -e 'BundledGem.copy(gs, ".bundle")' \ -e 'else' \ - -e 'Gem.unpack("gems/#{g}.gem", ".bundle")' \ + -e 'BundledGem.unpack("gems/#{g}.gem", ".bundle")' \ -e 'end' \ gems/bundled_gems diff --git a/defs/gmake.mk b/defs/gmake.mk index 9d7bf029e2ddb3..202858eda915cd 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -300,8 +300,8 @@ extract-gems: | $(patsubst %,.bundle/gems/%,$(bundled-gems)) .bundle/gems/%: gems/%.gem | .bundle/gems $(ECHO) Extracting bundle gem $*... $(Q) $(BASERUBY) -C "$(srcdir)" \ - -Itool -rgem-unpack \ - -e 'Gem.unpack("gems/$(@F).gem", ".bundle")' + -Itool/lib -rbundled_gem \ + -e 'BundledGem.unpack("gems/$(@F).gem", ".bundle")' $(srcdir)/.bundle/gems: $(MAKEDIRS) $@ diff --git a/tool/gem-unpack.rb b/tool/lib/bundled_gem.rb similarity index 97% rename from tool/gem-unpack.rb rename to tool/lib/bundled_gem.rb index 6310c3f92aef9f..0b7d52b86a63b1 100644 --- a/tool/gem-unpack.rb +++ b/tool/lib/bundled_gem.rb @@ -5,7 +5,9 @@ # This library is used by "make extract-gems" to # unpack bundled gem files. -class << Gem +module BundledGem + module_function + def unpack(file, *rest) pkg = Gem::Package.new(file) prepare_test(pkg.spec, *rest) {|dir| pkg.extract_files(dir)} From 661536ab168ab99e1dae31d661ff4807e7734ae4 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 4 Aug 2022 17:03:15 +0900 Subject: [PATCH 010/546] Copy from cloned gem sources in parallel --- defs/gmake.mk | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/defs/gmake.mk b/defs/gmake.mk index 202858eda915cd..49811f7cf85480 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -271,9 +271,22 @@ HELP_EXTRA_TASKS = \ extract-gems: $(HAVE_BASERUBY:yes=update-gems) -bundled-gems := $(shell sed '/^[ ]*\#/d;/^[ ]*$$/d;s/[ ][ ]*/-/;s/[ ].*//' $(srcdir)/gems/bundled_gems) - -update-gems: | $(patsubst %,gems/%.gem,$(bundled-gems)) +# 1. squeeze spaces +# 2. strip and skip comment/empty lines +# 3. "gem x.y.z URL xxxxxx" -> "gem/x.y.z/xxxxxx" +# 4. "gem x.y.z URL" -> "gem-x.y.z" +bundled-gems := $(shell sed \ + -e 's/[ ][ ]*/ /g' \ + -e 's/^ //;/\#/d;s/ *$$//;/^$$/d' \ + -e 's:\([^ ][^ ]*\) \([^ ][^ ]*\) [^ ][^ ]* :\1/\2/:' \ + -e 's/ /-/;s/ .*//' \ + $(srcdir)/gems/bundled_gems) + +bundled-gems-rev := $(filter-out $(subst /,,$(bundled-gems)),$(bundled-gems)) +bundled-gems := $(filter-out $(bundled-gems-rev),$(bundled-gems)) + +update-gems: | $(patsubst %,$(srcdir)/gems/%.gem,$(bundled-gems)) +update-gems: | $(foreach g,$(bundled-gems-rev),$(srcdir)/gems/src/$(word 1,$(subst /, ,$(value g)))) test-bundler-precheck: | $(srcdir)/.bundle/cache @@ -281,7 +294,7 @@ $(srcdir)/.bundle/cache: $(MAKEDIRS) $(@D) $(CACHE_DIR) $(LN_S) ../.downloaded-cache $@ -gems/%.gem: +$(srcdir)/gems/%.gem: $(ECHO) Downloading bundled gem $*... $(Q) $(BASERUBY) -C "$(srcdir)" \ -I./tool -rdownloader \ @@ -292,20 +305,33 @@ gems/%.gem: -e 'File.unlink(*old) and' \ -e 'FileUtils.rm_rf(old.map{'"|n|"'n.chomp(".gem")})' -ifeq (,) -extract-gems: extract-gems-sequential -else -extract-gems: | $(patsubst %,.bundle/gems/%,$(bundled-gems)) +extract-gems: | $(patsubst %,$(srcdir)/.bundle/gems/%,$(bundled-gems)) +extract-gems: | $(foreach g,$(bundled-gems-rev), \ + $(srcdir)/.bundle/gems/$(word 1,$(subst /, ,$(value g)))-$(word 2,$(subst /, ,$(value g)))) -.bundle/gems/%: gems/%.gem | .bundle/gems +$(srcdir)/.bundle/gems/%: $(srcdir)/gems/%.gem | .bundle/gems $(ECHO) Extracting bundle gem $*... $(Q) $(BASERUBY) -C "$(srcdir)" \ -Itool/lib -rbundled_gem \ -e 'BundledGem.unpack("gems/$(@F).gem", ".bundle")' +define copy-gem +$(srcdir)/.bundle/gems/$(1)-$(2): | $(srcdir)/gems/src/$(1) .bundle/gems + $(ECHO) Copying $(1)@$(3) to $$(@F) + $(Q) $(GIT) -C "$(srcdir)/gems/src/$(1)" checkout $(3) + $(Q) $(BASERUBY) -C "$(srcdir)" \ + -Itool/lib -rbundled_gem \ + -e 'BundledGem.copy("gems/src/$(1)/$(1).gemspec", ".bundle")' + +endef +define copy-gem-0 +$(call copy-gem,$(word 1,$(1)),$(word 2,$(1)),$(word 3,$(1))) +endef + +$(foreach g,$(bundled-gems-rev),$(eval $(call copy-gem-0,$(subst /, ,$(value g))))) + $(srcdir)/.bundle/gems: $(MAKEDIRS) $@ -endif ifneq ($(filter update-bundled_gems refresh-gems,$(MAKECMDGOALS)),) update-gems: update-bundled_gems From 57911712011e285d76cf8132dd080b5727f43bf3 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 4 Aug 2022 17:30:03 +0900 Subject: [PATCH 011/546] Load gemspec file at that directory Gemspec files generated by old bundler run `git` without changing the working directory. Or some gemspec files expect an owned file at the top exists ath the current working directory. --- tool/lib/bundled_gem.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tool/lib/bundled_gem.rb b/tool/lib/bundled_gem.rb index 0b7d52b86a63b1..38c331183d8c69 100644 --- a/tool/lib/bundled_gem.rb +++ b/tool/lib/bundled_gem.rb @@ -15,8 +15,8 @@ def unpack(file, *rest) end def copy(path, *rest) - spec = Gem::Specification.load(path) - path = File.dirname(path) + path, n = File.split(path) + spec = Dir.chdir(path) {Gem::Specification.load(n)} or raise "Cannot load #{path}" prepare_test(spec, *rest) do |dir| FileUtils.rm_rf(dir) files = spec.files.reject {|f| f.start_with?(".git")} From 8fae120912b01a95b90d1c4e2fb9e7b41ba0f511 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 5 Aug 2022 01:03:54 +0900 Subject: [PATCH 012/546] Clone upstream gem repositories for test --- defs/gmake.mk | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/defs/gmake.mk b/defs/gmake.mk index 49811f7cf85480..77c19e12b5a3e5 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -273,20 +273,20 @@ extract-gems: $(HAVE_BASERUBY:yes=update-gems) # 1. squeeze spaces # 2. strip and skip comment/empty lines -# 3. "gem x.y.z URL xxxxxx" -> "gem/x.y.z/xxxxxx" +# 3. "gem x.y.z URL xxxxxx" -> "gem|x.y.z|xxxxxx|URL" # 4. "gem x.y.z URL" -> "gem-x.y.z" bundled-gems := $(shell sed \ -e 's/[ ][ ]*/ /g' \ -e 's/^ //;/\#/d;s/ *$$//;/^$$/d' \ - -e 's:\([^ ][^ ]*\) \([^ ][^ ]*\) [^ ][^ ]* :\1/\2/:' \ + -e 's/^\(.*\) \(.*\) \(.*\) \(.*\)/\1|\2|\4|\3/' \ -e 's/ /-/;s/ .*//' \ $(srcdir)/gems/bundled_gems) -bundled-gems-rev := $(filter-out $(subst /,,$(bundled-gems)),$(bundled-gems)) +bundled-gems-rev := $(filter-out $(subst |,,$(bundled-gems)),$(bundled-gems)) bundled-gems := $(filter-out $(bundled-gems-rev),$(bundled-gems)) update-gems: | $(patsubst %,$(srcdir)/gems/%.gem,$(bundled-gems)) -update-gems: | $(foreach g,$(bundled-gems-rev),$(srcdir)/gems/src/$(word 1,$(subst /, ,$(value g)))) +update-gems: | $(foreach g,$(bundled-gems-rev),$(srcdir)/gems/src/$(word 1,$(subst |, ,$(value g)))) test-bundler-precheck: | $(srcdir)/.bundle/cache @@ -307,7 +307,7 @@ $(srcdir)/gems/%.gem: extract-gems: | $(patsubst %,$(srcdir)/.bundle/gems/%,$(bundled-gems)) extract-gems: | $(foreach g,$(bundled-gems-rev), \ - $(srcdir)/.bundle/gems/$(word 1,$(subst /, ,$(value g)))-$(word 2,$(subst /, ,$(value g)))) + $(srcdir)/.bundle/gems/$(word 1,$(subst |, ,$(value g)))-$(word 2,$(subst |, ,$(value g)))) $(srcdir)/.bundle/gems/%: $(srcdir)/gems/%.gem | .bundle/gems $(ECHO) Extracting bundle gem $*... @@ -316,8 +316,13 @@ $(srcdir)/.bundle/gems/%: $(srcdir)/gems/%.gem | .bundle/gems -e 'BundledGem.unpack("gems/$(@F).gem", ".bundle")' define copy-gem +$(srcdir)/gems/src/$(1): | $(srcdir)/gems/src + $(ECHO) Cloning $(4) + $(Q) $(GIT) clone $(4) $$(@) + $(srcdir)/.bundle/gems/$(1)-$(2): | $(srcdir)/gems/src/$(1) .bundle/gems $(ECHO) Copying $(1)@$(3) to $$(@F) + $(Q) $(GIT) -C "$(srcdir)/gems/src/$(1)" fetch origin $(3) $(Q) $(GIT) -C "$(srcdir)/gems/src/$(1)" checkout $(3) $(Q) $(BASERUBY) -C "$(srcdir)" \ -Itool/lib -rbundled_gem \ @@ -325,10 +330,13 @@ $(srcdir)/.bundle/gems/$(1)-$(2): | $(srcdir)/gems/src/$(1) .bundle/gems endef define copy-gem-0 -$(call copy-gem,$(word 1,$(1)),$(word 2,$(1)),$(word 3,$(1))) +$(call copy-gem,$(word 1,$(1)),$(word 2,$(1)),$(word 3,$(1)),$(word 4,$(1))) endef -$(foreach g,$(bundled-gems-rev),$(eval $(call copy-gem-0,$(subst /, ,$(value g))))) +$(foreach g,$(bundled-gems-rev),$(eval $(call copy-gem-0,$(subst |, ,$(value g))))) + +$(srcdir)/gems/src: + $(MAKEDIRS) $@ $(srcdir)/.bundle/gems: $(MAKEDIRS) $@ From 73f0573cb3be4e1c7e315ee61833f3c9c3252be9 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 5 Aug 2022 09:55:49 +0900 Subject: [PATCH 013/546] Ignore revision to test unless HAVE_GIT --- defs/gmake.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/defs/gmake.mk b/defs/gmake.mk index 77c19e12b5a3e5..34f7f49c1035c1 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -278,7 +278,9 @@ extract-gems: $(HAVE_BASERUBY:yes=update-gems) bundled-gems := $(shell sed \ -e 's/[ ][ ]*/ /g' \ -e 's/^ //;/\#/d;s/ *$$//;/^$$/d' \ + $(if $(filter yes,$(HAVE_GIT)), \ -e 's/^\(.*\) \(.*\) \(.*\) \(.*\)/\1|\2|\4|\3/' \ + ) \ -e 's/ /-/;s/ .*//' \ $(srcdir)/gems/bundled_gems) From 5a9db23734e76c2915f1ccb8f87e992bc831598b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Tue, 2 Aug 2022 17:45:28 +0200 Subject: [PATCH 014/546] [rubygems/rubygems] Automatically remove "ruby" from lockfile if incomplete https://github.com/rubygems/rubygems/commit/69d0b4e10b --- lib/bundler/definition.rb | 12 ++++ lib/bundler/spec_set.rb | 4 ++ .../install/gemfile/specific_platform_spec.rb | 71 +++++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 34c23796e8fb78..ccb3ab1cec9942 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -484,6 +484,7 @@ def unlocking? def reresolve last_resolve = converge_locked_specs + remove_ruby_from_platforms_if_necessary!(dependencies) expanded_dependencies = expand_dependencies(dependencies + metadata_dependencies, true) Resolver.resolve(expanded_dependencies, source_requirements, last_resolve, gem_version_promoter, additional_base_requirements_for_resolve, platforms) end @@ -865,6 +866,17 @@ def additional_base_requirements_for_resolve end end + def remove_ruby_from_platforms_if_necessary!(dependencies) + return if Bundler.frozen_bundle? || + Bundler.local_platform == Gem::Platform::RUBY || + !platforms.include?(Gem::Platform::RUBY) || + (@new_platform && platforms.last == Gem::Platform::RUBY) || + !@originally_locked_specs.incomplete_ruby_specs?(dependencies) + + remove_platform(Gem::Platform::RUBY) + add_current_platform + end + def source_map @source_map ||= SourceMap.new(sources, dependencies, @locked_specs) end diff --git a/lib/bundler/spec_set.rb b/lib/bundler/spec_set.rb index 735cdac126c916..d7239f5c8c1d29 100644 --- a/lib/bundler/spec_set.rb +++ b/lib/bundler/spec_set.rb @@ -91,6 +91,10 @@ def materialized_for_resolution SpecSet.new(materialized) end + def incomplete_ruby_specs?(deps) + self.class.new(self.for(deps, true, [Gem::Platform::RUBY])).incomplete_specs.any? + end + def missing_specs @specs.select {|s| s.is_a?(LazySpecification) } end diff --git a/spec/bundler/install/gemfile/specific_platform_spec.rb b/spec/bundler/install/gemfile/specific_platform_spec.rb index 48349aaef4353a..fe1c3b71fe1913 100644 --- a/spec/bundler/install/gemfile/specific_platform_spec.rb +++ b/spec/bundler/install/gemfile/specific_platform_spec.rb @@ -374,6 +374,77 @@ ERROR end + it "automatically fixes the lockfile if RUBY platform is locked and some gem has no RUBY variant available" do + build_repo4 do + build_gem("sorbet-static-and-runtime", "0.5.10160") do |s| + s.add_runtime_dependency "sorbet", "= 0.5.10160" + s.add_runtime_dependency "sorbet-runtime", "= 0.5.10160" + end + + build_gem("sorbet", "0.5.10160") do |s| + s.add_runtime_dependency "sorbet-static", "= 0.5.10160" + end + + build_gem("sorbet-runtime", "0.5.10160") + + build_gem("sorbet-static", "0.5.10160") do |s| + s.platform = Gem::Platform.local + end + end + + gemfile <<~G + source "#{file_uri_for(gem_repo4)}" + + gem "sorbet-static-and-runtime" + G + + lockfile <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + sorbet (0.5.10160) + sorbet-static (= 0.5.10160) + sorbet-runtime (0.5.10160) + sorbet-static (0.5.10160-#{Gem::Platform.local}) + sorbet-static-and-runtime (0.5.10160) + sorbet (= 0.5.10160) + sorbet-runtime (= 0.5.10160) + + PLATFORMS + #{lockfile_platforms_for([specific_local_platform, "ruby"])} + + DEPENDENCIES + sorbet-static-and-runtime + + BUNDLED WITH + #{Bundler::VERSION} + L + + bundle "update" + + expect(lockfile).to eq <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + sorbet (0.5.10160) + sorbet-static (= 0.5.10160) + sorbet-runtime (0.5.10160) + sorbet-static (0.5.10160-#{Gem::Platform.local}) + sorbet-static-and-runtime (0.5.10160) + sorbet (= 0.5.10160) + sorbet-runtime (= 0.5.10160) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + sorbet-static-and-runtime + + BUNDLED WITH + #{Bundler::VERSION} + L + end + it "can fallback to a source gem when platform gems are incompatible with current ruby version" do setup_multiplatform_gem_with_source_gem From f310ac1cb2964f635f582862763b2155aacf2c12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Wed, 3 Aug 2022 10:47:40 +0200 Subject: [PATCH 015/546] [rubygems/rubygems] Include backtrace with crashes by default https://github.com/rubygems/rubygems/commit/3cc3bfd371 --- lib/rubygems/config_file.rb | 2 +- test/rubygems/test_gem_config_file.rb | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/lib/rubygems/config_file.rb b/lib/rubygems/config_file.rb index d711a51bd08ec4..8d64b58cb964cd 100644 --- a/lib/rubygems/config_file.rb +++ b/lib/rubygems/config_file.rb @@ -39,7 +39,7 @@ class Gem::ConfigFile include Gem::UserInteraction - DEFAULT_BACKTRACE = false + DEFAULT_BACKTRACE = true DEFAULT_BULK_THRESHOLD = 1000 DEFAULT_VERBOSITY = true DEFAULT_UPDATE_SOURCES = true diff --git a/test/rubygems/test_gem_config_file.rb b/test/rubygems/test_gem_config_file.rb index fbc7c85757e98a..e23773a133c9d1 100644 --- a/test/rubygems/test_gem_config_file.rb +++ b/test/rubygems/test_gem_config_file.rb @@ -35,7 +35,7 @@ def teardown def test_initialize assert_equal @temp_conf, @cfg.config_file_name - assert_equal false, @cfg.backtrace + assert_equal true, @cfg.backtrace assert_equal true, @cfg.update_sources assert_equal Gem::ConfigFile::DEFAULT_BULK_THRESHOLD, @cfg.bulk_threshold assert_equal true, @cfg.verbose @@ -239,6 +239,12 @@ def test_handle_arguments end def test_handle_arguments_backtrace + File.open @temp_conf, "w" do |fp| + fp.puts ":backtrace: false" + end + + util_config_file %W[--config-file=#{@temp_conf}] + assert_equal false, @cfg.backtrace args = %w[--backtrace] @@ -275,6 +281,12 @@ def test_handle_arguments_override end def test_handle_arguments_traceback + File.open @temp_conf, "w" do |fp| + fp.puts ":backtrace: false" + end + + util_config_file %W[--config-file=#{@temp_conf}] + assert_equal false, @cfg.backtrace args = %w[--traceback] @@ -288,7 +300,7 @@ def test_handle_arguments_norc assert_equal @temp_conf, @cfg.config_file_name File.open @temp_conf, "w" do |fp| - fp.puts ":backtrace: true" + fp.puts ":backtrace: false" fp.puts ":update_sources: false" fp.puts ":bulk_threshold: 10" fp.puts ":verbose: false" @@ -300,7 +312,7 @@ def test_handle_arguments_norc util_config_file args - assert_equal false, @cfg.backtrace + assert_equal true, @cfg.backtrace assert_equal true, @cfg.update_sources assert_equal Gem::ConfigFile::DEFAULT_BULK_THRESHOLD, @cfg.bulk_threshold assert_equal true, @cfg.verbose @@ -386,7 +398,7 @@ def test_rubygems_api_key_equals_bad_permission end def test_write - @cfg.backtrace = true + @cfg.backtrace = false @cfg.update_sources = false @cfg.bulk_threshold = 10 @cfg.verbose = false @@ -398,7 +410,7 @@ def test_write util_config_file # These should not be written out to the config file. - assert_equal false, @cfg.backtrace, "backtrace" + assert_equal true, @cfg.backtrace, "backtrace" assert_equal Gem::ConfigFile::DEFAULT_BULK_THRESHOLD, @cfg.bulk_threshold, "bulk_threshold" assert_equal true, @cfg.update_sources, "update_sources" From 74817f3d37bb7153385f682f75e37713c4c8009d Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 5 Aug 2022 10:40:48 +0100 Subject: [PATCH 016/546] [DOC] Process._fork does not get called by Process.daemon As discussed in [Bug #18911], I'm adding some documentation to `Process._fork` to clarify that it is not expected to cover calls to `Process.daemon`. [Bug #18911]: https://bugs.ruby-lang.org/issues/18911 Co-authored-by: Yusuke Endoh --- process.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/process.c b/process.c index 5d16a3854a6b18..fb31418e0058f7 100644 --- a/process.c +++ b/process.c @@ -4353,6 +4353,13 @@ rb_call_proc__fork(void) * This method is not for casual code but for application monitoring * libraries. You can add custom code before and after fork events * by overriding this method. + * + * Note: Process.daemon may be implemented using fork(2) BUT does not go + * through this method. + * Thus, depending on your reason to hook into this method, you + * may also want to hook into that one. + * See {this issue}[https://bugs.ruby-lang.org/issues/18911] for a + * more detailed discussion of this. */ VALUE rb_proc__fork(VALUE _obj) From 2f0d9e74c82c12d253ac808b636385ba27a4b115 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 5 Aug 2022 17:21:05 +0900 Subject: [PATCH 017/546] Get rid of `-C` option for very old `git` --- defs/gmake.mk | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/defs/gmake.mk b/defs/gmake.mk index 34f7f49c1035c1..9d055d0f1aa2a6 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -324,8 +324,7 @@ $(srcdir)/gems/src/$(1): | $(srcdir)/gems/src $(srcdir)/.bundle/gems/$(1)-$(2): | $(srcdir)/gems/src/$(1) .bundle/gems $(ECHO) Copying $(1)@$(3) to $$(@F) - $(Q) $(GIT) -C "$(srcdir)/gems/src/$(1)" fetch origin $(3) - $(Q) $(GIT) -C "$(srcdir)/gems/src/$(1)" checkout $(3) + $(Q) $(CHDIR) "$(srcdir)/gems/src/$(1)" && $(GIT) fetch origin $(3) && $(GIT) checkout $(3) $(Q) $(BASERUBY) -C "$(srcdir)" \ -Itool/lib -rbundled_gem \ -e 'BundledGem.copy("gems/src/$(1)/$(1).gemspec", ".bundle")' From 412da2c2213ccf52254467b8c1816b34f1179493 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Fri, 5 Aug 2022 13:05:38 -0500 Subject: [PATCH 018/546] Sync new doc in Date (#6215) --- tool/sync_default_gems.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index e3b940e2d5a20c..78620e15084e8b 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -228,6 +228,7 @@ def sync_default_gems(gem) `git checkout ext/etc/depend` when "date" rm_rf(%w[ext/date test/date]) + cp_r("#{upstream}/doc/date", "doc") cp_r("#{upstream}/ext/date", "ext") cp_r("#{upstream}/lib", "ext/date") cp_r("#{upstream}/test/date", "test") From ce6dc9f66079f44df009b96f8467cd014bc5dd4c Mon Sep 17 00:00:00 2001 From: git Date: Sat, 6 Aug 2022 03:05:55 +0900 Subject: [PATCH 019/546] * 2022-08-06 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index bcd3b29f5095f8..5fc7661c95693b 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 5 +#define RUBY_RELEASE_DAY 6 #include "ruby/version.h" #include "ruby/internal/abi.h" From f1057393da7a98e447ee7679db69aeec8f4d1650 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Wed, 3 Aug 2022 10:47:12 -0400 Subject: [PATCH 020/546] [DOC] Clarify that `IO.read` uses text mode See: https://bugs.ruby-lang.org/issues/18882#note-13 [Bug #18882] --- io.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/io.c b/io.c index 1fe927895779f1..c91899a6fe14ad 100644 --- a/io.c +++ b/io.c @@ -11776,12 +11776,16 @@ seek_before_access(VALUE argp) * IO.read('| cat t.txt') * # => "First line\nSecond line\n\nThird line\nFourth line\n" * - * With only argument +path+ given, reads and returns the entire content + * With only argument +path+ given, reads in text mode and returns the entire content * of the file at the given path: * * IO.read('t.txt') * # => "First line\nSecond line\n\nThird line\nFourth line\n" * + * On Windows, text mode can terminate reading and leave bytes in the file + * unread when encountering certain special bytes. Consider using + * IO.binread if all bytes in the file should be read. + * * For both forms, command and path, the remaining arguments are the same. * * With argument +length+, returns +length+ bytes if available: From ca8daf70fa7045aa1e505514497d1ac4720a6b16 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Wed, 3 Aug 2022 10:48:43 -0400 Subject: [PATCH 021/546] [DOC] Mention Windows text mode EOF marker interpretation I don't think this is super well known so it's worth mentioning as it can be a pitfall. See: https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/fopen-wfopen?view=msvc-170 Co-authored-by: Nobuyoshi Nakada --- io.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/io.c b/io.c index c91899a6fe14ad..f2c64989df75f8 100644 --- a/io.c +++ b/io.c @@ -14504,9 +14504,11 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * Either of the following may be suffixed to any of the string read/write modes above: * * - 't': Text data; sets the default external encoding to +Encoding::UTF_8+; - * on Windows, enables conversion between EOL and CRLF. + * on Windows, enables conversion between EOL and CRLF and enables interpreting +0x1A+ + * as an end-of-file marker. * - 'b': Binary data; sets the default external encoding to +Encoding::ASCII_8BIT+; - * on Windows, suppresses conversion between EOL and CRLF. + * on Windows, suppresses conversion between EOL and CRLF and disables interpreting +0x1A+ + * as an end-of-file marker. * * If neither is given, the stream defaults to text data. * From c433d36b5bd0b6a649a0c4112a9868bd5190cbc6 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Wed, 3 Aug 2022 11:12:18 -0400 Subject: [PATCH 022/546] Test that File.read defaults to text mode Co-authored-by: Nobuyoshi Nakada --- spec/ruby/core/io/read_spec.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/spec/ruby/core/io/read_spec.rb b/spec/ruby/core/io/read_spec.rb index 841e693f373c47..28cab13340def9 100644 --- a/spec/ruby/core/io/read_spec.rb +++ b/spec/ruby/core/io/read_spec.rb @@ -104,6 +104,14 @@ str = IO.read(@fname, encoding: Encoding::ISO_8859_1) str.encoding.should == Encoding::ISO_8859_1 end + + platform_is :windows do + it "reads the file in text mode" do + # 0x1A is CTRL+Z and is EOF in Windows text mode. + File.binwrite(@fname, "\x1Abbb") + IO.read(@fname).should.empty? + end + end end describe "IO.read from a pipe" do From 58c8b6e86273ccb7a1b903d9ab35956b69b3b1bf Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 6 Aug 2022 10:13:20 +0900 Subject: [PATCH 023/546] Adjust styles [ci skip] --- enumerator.c | 24 ++++++++++++++++-------- marshal.c | 3 ++- ruby.c | 3 ++- thread_sync.c | 6 ++++-- vm_backtrace.c | 6 ++++-- win32/win32.c | 6 ++++-- 6 files changed, 32 insertions(+), 16 deletions(-) diff --git a/enumerator.c b/enumerator.c index ce2eacbd2a8d8a..d7546ee9e82aa5 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1796,7 +1796,8 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) * Expands +lazy+ enumerator to an array. * See Enumerable#to_a. */ -static VALUE lazy_to_a(VALUE self) +static VALUE +lazy_to_a(VALUE self) { } #endif @@ -2753,7 +2754,8 @@ lazy_with_index(int argc, VALUE *argv, VALUE obj) * * Like Enumerable#chunk, but chains operation to be lazy-evaluated. */ -static VALUE lazy_chunk(VALUE self) +static VALUE +lazy_chunk(VALUE self) { } @@ -2763,7 +2765,8 @@ static VALUE lazy_chunk(VALUE self) * * Like Enumerable#chunk_while, but chains operation to be lazy-evaluated. */ -static VALUE lazy_chunk_while(VALUE self) +static VALUE +lazy_chunk_while(VALUE self) { } @@ -2774,7 +2777,8 @@ static VALUE lazy_chunk_while(VALUE self) * * Like Enumerable#slice_after, but chains operation to be lazy-evaluated. */ -static VALUE lazy_slice_after(VALUE self) +static VALUE +lazy_slice_after(VALUE self) { } @@ -2785,7 +2789,8 @@ static VALUE lazy_slice_after(VALUE self) * * Like Enumerable#slice_before, but chains operation to be lazy-evaluated. */ -static VALUE lazy_slice_before(VALUE self) +static VALUE +lazy_slice_before(VALUE self) { } @@ -2795,7 +2800,8 @@ static VALUE lazy_slice_before(VALUE self) * * Like Enumerable#slice_when, but chains operation to be lazy-evaluated. */ -static VALUE lazy_slice_when(VALUE self) +static VALUE +lazy_slice_when(VALUE self) { } # endif @@ -3562,7 +3568,8 @@ product_each(VALUE obj, struct product_state *pstate) VALUE eobj = RARRAY_AREF(enums, pstate->index); rb_block_call(eobj, id_each_entry, 0, NULL, product_each_i, (VALUE)pstate); - } else { + } + else { rb_funcallv(pstate->block, id_call, pstate->argc, pstate->argv); } @@ -3677,7 +3684,8 @@ enumerator_s_product(VALUE klass, VALUE enums) if (rb_block_given_p()) { return enum_product_run(obj, rb_block_proc()); - } else { + } + else { return obj; } } diff --git a/marshal.c b/marshal.c index 43102a54c5b78b..325d5f126eccec 100644 --- a/marshal.c +++ b/marshal.c @@ -2260,7 +2260,8 @@ rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze) return v; } -static VALUE marshal_load(rb_execution_context_t *ec, VALUE mod, VALUE source, VALUE proc, VALUE freeze) +static VALUE +marshal_load(rb_execution_context_t *ec, VALUE mod, VALUE source, VALUE proc, VALUE freeze) { return rb_marshal_load_with_proc(source, proc, RTEST(freeze)); } diff --git a/ruby.c b/ruby.c index 9b9bfb54c7795b..991c9031dea98b 100644 --- a/ruby.c +++ b/ruby.c @@ -1529,7 +1529,8 @@ void rb_call_builtin_inits(void); #if RBIMPL_HAS_ATTRIBUTE(weak) __attribute__((weak)) #endif -void Init_extra_exts(void) +void +Init_extra_exts(void) { } diff --git a/thread_sync.c b/thread_sync.c index 0359ac2214d808..63db1c43922f52 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -624,7 +624,8 @@ rb_mutex_synchronize_m(VALUE self) return rb_mutex_synchronize(self, rb_yield, Qundef); } -void rb_mutex_allow_trap(VALUE self, int val) +void +rb_mutex_allow_trap(VALUE self, int val) { Check_TypedStruct(self, &mutex_data_type); @@ -714,7 +715,8 @@ queue_ptr(VALUE obj) #define QUEUE_CLOSED FL_USER5 static rb_hrtime_t -queue_timeout2hrtime(VALUE timeout) { +queue_timeout2hrtime(VALUE timeout) +{ if (NIL_P(timeout)) { return (rb_hrtime_t)0; } diff --git a/vm_backtrace.c b/vm_backtrace.c index 5bd588df127e85..2e898507dfb995 100644 --- a/vm_backtrace.c +++ b/vm_backtrace.c @@ -1176,12 +1176,14 @@ rb_vm_thread_backtrace_locations(int argc, const VALUE *argv, VALUE thval) return thread_backtrace_to_ary(argc, argv, thval, 0); } -VALUE rb_vm_backtrace(int argc, const VALUE * argv, struct rb_execution_context_struct * ec) +VALUE +rb_vm_backtrace(int argc, const VALUE * argv, struct rb_execution_context_struct * ec) { return ec_backtrace_to_ary(ec, argc, argv, 0, 0, 1); } -VALUE rb_vm_backtrace_locations(int argc, const VALUE * argv, struct rb_execution_context_struct * ec) +VALUE +rb_vm_backtrace_locations(int argc, const VALUE * argv, struct rb_execution_context_struct * ec) { return ec_backtrace_to_ary(ec, argc, argv, 0, 0, 0); } diff --git a/win32/win32.c b/win32/win32.c index 6d3e368565c4a5..edf89be4b18305 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -5406,7 +5406,8 @@ wrename(const WCHAR *oldpath, const WCHAR *newpath) } /* License: Ruby's */ -int rb_w32_urename(const char *from, const char *to) +int +rb_w32_urename(const char *from, const char *to) { WCHAR *wfrom; WCHAR *wto; @@ -5425,7 +5426,8 @@ int rb_w32_urename(const char *from, const char *to) } /* License: Ruby's */ -int rb_w32_rename(const char *from, const char *to) +int +rb_w32_rename(const char *from, const char *to) { WCHAR *wfrom; WCHAR *wto; From 27173e3735ff509dc8d9cc9b410baff84adf57dc Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 28 Jul 2022 16:50:26 +0900 Subject: [PATCH 024/546] Allow `RUBY_DEBUG_LOG` format to be empty GCC warns of empty format strings, perhaps because they have no effects in printf() and there are better ways than sprintf(). However, ruby_debug_log() adds informations other than the format, this warning is not the case. --- thread.c | 10 +++++----- thread_win32.c | 4 ++-- vm_debug.h | 9 +++++++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/thread.c b/thread.c index feb89d435271ae..b2711281938ac4 100644 --- a/thread.c +++ b/thread.c @@ -1353,14 +1353,14 @@ sleep_hrtime_until(rb_thread_t *th, rb_hrtime_t end, unsigned int fl) void rb_thread_sleep_forever(void) { - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); sleep_forever(GET_THREAD(), SLEEP_SPURIOUS_CHECK); } void rb_thread_sleep_deadly(void) { - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); sleep_forever(GET_THREAD(), SLEEP_DEADLOCKABLE|SLEEP_SPURIOUS_CHECK); } @@ -1384,7 +1384,7 @@ rb_thread_sleep_deadly_allow_spurious_wakeup(VALUE blocker, VALUE timeout, rb_hr rb_fiber_scheduler_block(scheduler, blocker, timeout); } else { - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); if (end) { sleep_hrtime_until(GET_THREAD(), end, SLEEP_SPURIOUS_CHECK); } @@ -1481,7 +1481,7 @@ blocking_region_begin(rb_thread_t *th, struct rb_blocking_region_buffer *region, th->status = THREAD_STOPPED; rb_ractor_blocking_threads_inc(th->ractor, __FILE__, __LINE__); - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); RB_GC_SAVE_MACHINE_CONTEXT(th); thread_sched_to_waiting(TH_SCHED(th)); @@ -1509,7 +1509,7 @@ blocking_region_end(rb_thread_t *th, struct rb_blocking_region_buffer *region) th->status = region->prev_status; } - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); VM_ASSERT(th == GET_THREAD()); } diff --git a/thread_win32.c b/thread_win32.c index cbb01d5d84f513..e9deff23cc1a3e 100644 --- a/thread_win32.c +++ b/thread_win32.c @@ -746,7 +746,7 @@ static unsigned long __stdcall timer_thread_func(void *dummy) { rb_vm_t *vm = GET_VM(); - RUBY_DEBUG_LOG("%s", "start"); + RUBY_DEBUG_LOG("start"); rb_w32_set_thread_description(GetCurrentThread(), L"ruby-timer-thread"); while (WaitForSingleObject(timer_thread.lock, TIME_QUANTUM_USEC/1000) == WAIT_TIMEOUT) { @@ -754,7 +754,7 @@ timer_thread_func(void *dummy) ruby_sigchld_handler(vm); /* probably no-op */ rb_threadptr_check_signal(vm->ractor.main_thread); } - RUBY_DEBUG_LOG("%s", "end"); + RUBY_DEBUG_LOG("end"); return 0; } diff --git a/vm_debug.h b/vm_debug.h index 59561056488001..9c7fc65f7c1f92 100644 --- a/vm_debug.h +++ b/vm_debug.h @@ -86,6 +86,15 @@ void ruby_debug_log(const char *file, int line, const char *func_name, const cha void ruby_debug_log_print(unsigned int n); bool ruby_debug_log_filter(const char *func_name, const char *file_name); +#if RBIMPL_COMPILER_IS(GCC) && defined(__OPTIMIZE__) +# define ruby_debug_log(...) \ + RB_GNUC_EXTENSION_BLOCK( \ + RBIMPL_WARNING_PUSH(); \ + RBIMPL_WARNING_IGNORED(-Wformat-zero-length); \ + ruby_debug_log(__VA_ARGS__); \ + RBIMPL_WARNING_POP()) +#endif + // convenient macro to log even if the USE_RUBY_DEBUG_LOG macro is not specified. // You can use this macro for temporary usage (you should not commit it). #define _RUBY_DEBUG_LOG(...) ruby_debug_log(__FILE__, __LINE__, RUBY_FUNCTION_NAME_STRING, "" __VA_ARGS__) From af40af45b28a9576d5ae6a46d9678c0e3d6be005 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Mon, 17 Jan 2022 21:47:32 +0100 Subject: [PATCH 025/546] [rubygems/rubygems] Extract `SourceList#get_with_fallback` https://github.com/rubygems/rubygems/commit/9dbc4757a8 --- lib/bundler/definition.rb | 2 +- lib/bundler/source_list.rb | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index ccb3ab1cec9942..30e1820eeccd75 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -743,7 +743,7 @@ def converge_specs(specs) # Replace the locked dependency's source with the equivalent source from the Gemfile dep = @dependencies.find {|d| s.satisfies?(d) } - s.source = (dep && dep.source) || sources.get(s.source) || sources.default_source + s.source = (dep && dep.source) || sources.get_with_fallback(s.source) next if @unlock[:sources].include?(s.source.name) diff --git a/lib/bundler/source_list.rb b/lib/bundler/source_list.rb index a4773397c7df4c..6ea2910d185203 100644 --- a/lib/bundler/source_list.rb +++ b/lib/bundler/source_list.rb @@ -101,6 +101,10 @@ def get(source) source_list_for(source).find {|s| equivalent_source?(source, s) } end + def get_with_fallback(source) + get(source) || default_source + end + def lock_sources lock_other_sources + lock_rubygems_sources end From 4ea521f6c7a288d985775ab0a380620db865fac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Mon, 17 Jan 2022 22:34:32 +0100 Subject: [PATCH 026/546] [rubygems/rubygems] Remove unclear comment https://github.com/rubygems/rubygems/commit/3a843c1ac7 --- lib/bundler/definition.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 30e1820eeccd75..71322d6d41932f 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -747,9 +747,6 @@ def converge_specs(specs) next if @unlock[:sources].include?(s.source.name) - # If the spec is from a path source and it doesn't exist anymore - # then we unlock it. - # Path sources have special logic if s.source.instance_of?(Source::Path) || s.source.instance_of?(Source::Gemspec) new_specs = begin From 8dd63b89d97a0ab149288f2e46d814fb60cb3ba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Mon, 17 Jan 2022 23:01:01 +0100 Subject: [PATCH 027/546] [rubygems/rubygems] Move comment where the actual replacement happens https://github.com/rubygems/rubygems/commit/d60acdf80d --- lib/bundler/definition.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 71322d6d41932f..536099a20a3cee 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -740,9 +740,9 @@ def converge_specs(specs) end specs.each do |s| - # Replace the locked dependency's source with the equivalent source from the Gemfile dep = @dependencies.find {|d| s.satisfies?(d) } + # Replace the locked dependency's source with the equivalent source from the Gemfile s.source = (dep && dep.source) || sources.get_with_fallback(s.source) next if @unlock[:sources].include?(s.source.name) From 466a760e1807e629d0ec9f9ebf160d3c3f649d04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Wed, 3 Aug 2022 19:03:50 +0200 Subject: [PATCH 028/546] [rubygems/rubygems] Fix yanked gems being unintentionally update when other gems are unlocked This is a regression from a change intended to raise errors when user puts a gem under an incorrect source in the Gemfile by mistake. To fix the issue, we revert the change that caused it and implement it in a different way that restores the resolver independency from real specifications. Now it deals only with names and versions and does not try to materialize anything into real specifications before resolving. https://github.com/rubygems/rubygems/commit/d2bf1b86eb --- lib/bundler/definition.rb | 22 ++++++++++- lib/bundler/lazy_specification.rb | 8 ---- lib/bundler/resolver.rb | 3 +- lib/bundler/spec_set.rb | 9 ----- spec/bundler/install/yanked_spec.rb | 57 +++++++++++++++++++++++++++++ spec/bundler/resolver/basic_spec.rb | 2 +- 6 files changed, 81 insertions(+), 20 deletions(-) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 536099a20a3cee..38bd01f08f8ea7 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -739,11 +739,22 @@ def converge_specs(specs) specs[dep].any? {|s| s.satisfies?(dep) && (!dep.source || s.source.include?(dep.source)) } end + @specs_that_changed_sources = [] + specs.each do |s| dep = @dependencies.find {|d| s.satisfies?(d) } # Replace the locked dependency's source with the equivalent source from the Gemfile - s.source = (dep && dep.source) || sources.get_with_fallback(s.source) + s.source = if dep && dep.source + gemfile_source = dep.source + lockfile_source = s.source + + @specs_that_changed_sources << s if gemfile_source != lockfile_source + + gemfile_source + else + sources.get_with_fallback(s.source) + end next if @unlock[:sources].include?(s.source.name) @@ -821,9 +832,18 @@ def source_requirements end source_requirements[:default_bundler] = source_requirements["bundler"] || sources.default_source source_requirements["bundler"] = sources.metadata_source # needs to come last to override + verify_changed_sources! source_requirements end + def verify_changed_sources! + @specs_that_changed_sources.each do |s| + if s.source.specs.search(s.name).empty? + raise GemNotFound, "Could not find gem '#{s.name}' in #{s.source}" + end + end + end + def requested_groups values = groups - Bundler.settings[:without] - @optional_groups + Bundler.settings[:with] values &= Bundler.settings[:only] unless Bundler.settings[:only].empty? diff --git a/lib/bundler/lazy_specification.rb b/lib/bundler/lazy_specification.rb index 9f75c7bab263a5..5b40bec5a814a2 100644 --- a/lib/bundler/lazy_specification.rb +++ b/lib/bundler/lazy_specification.rb @@ -93,14 +93,6 @@ def materialize_for_installation __materialize__(candidates) end - def materialize_for_resolution - return self unless Gem::Platform.match_spec?(self) - - candidates = source.specs.search(self) - - __materialize__(candidates) - end - def __materialize__(candidates) @specification = begin search = candidates.reverse.find do |spec| diff --git a/lib/bundler/resolver.rb b/lib/bundler/resolver.rb index 40bc247b32253d..ca1bdbda7bcdf7 100644 --- a/lib/bundler/resolver.rb +++ b/lib/bundler/resolver.rb @@ -28,10 +28,11 @@ def self.resolve(requirements, source_requirements = {}, base = [], gem_version_ def initialize(source_requirements, base, gem_version_promoter, additional_base_requirements, platforms, metadata_requirements) @source_requirements = source_requirements @metadata_requirements = metadata_requirements + @base = base @resolver = Molinillo::Resolver.new(self, self) @search_for = {} @base_dg = Molinillo::DependencyGraph.new - @base = base.materialized_for_resolution do |ls| + base.each do |ls| dep = Dependency.new(ls.name, ls.version) @base_dg.add_vertex(ls.name, DepProxy.get_proxy(dep, ls.platform), true) end diff --git a/lib/bundler/spec_set.rb b/lib/bundler/spec_set.rb index d7239f5c8c1d29..14733269d611d2 100644 --- a/lib/bundler/spec_set.rb +++ b/lib/bundler/spec_set.rb @@ -82,15 +82,6 @@ def materialized_for_all_platforms end end - def materialized_for_resolution - materialized = @specs.map do |s| - spec = s.materialize_for_resolution - yield spec if spec - spec - end.compact - SpecSet.new(materialized) - end - def incomplete_ruby_specs?(deps) self.class.new(self.for(deps, true, [Gem::Platform::RUBY])).incomplete_specs.any? end diff --git a/spec/bundler/install/yanked_spec.rb b/spec/bundler/install/yanked_spec.rb index 44fbb0bda3a5f1..09a5ba0be1fa60 100644 --- a/spec/bundler/install/yanked_spec.rb +++ b/spec/bundler/install/yanked_spec.rb @@ -43,6 +43,63 @@ end end +RSpec.context "when resolving a bundle that includes yanked gems, but unlocking an unrelated gem" do + before(:each) do + build_repo4 do + build_gem "foo", "10.0.0" + + build_gem "bar", "1.0.0" + build_gem "bar", "2.0.0" + end + + lockfile <<-L + GEM + remote: #{file_uri_for(gem_repo4)} + specs: + foo (9.0.0) + bar (1.0.0) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + foo + bar + + BUNDLED WITH + #{Bundler::VERSION} + L + + gemfile <<-G + source "#{file_uri_for(gem_repo4)}" + gem "foo" + gem "bar" + G + end + + it "does not update the yanked gem" do + bundle "lock --update bar" + + expect(lockfile).to eq <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + bar (2.0.0) + foo (9.0.0) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + bar + foo + + BUNDLED WITH + #{Bundler::VERSION} + L + end +end + RSpec.context "when using gem before installing" do it "does not suggest the author has yanked the gem" do gemfile <<-G diff --git a/spec/bundler/resolver/basic_spec.rb b/spec/bundler/resolver/basic_spec.rb index 7182d1e29c6ffc..ee62dc3577aba8 100644 --- a/spec/bundler/resolver/basic_spec.rb +++ b/spec/bundler/resolver/basic_spec.rb @@ -233,7 +233,7 @@ it "resolves foo only to latest patch - changing dependency declared case" do # bar is locked AND a declared dependency in the Gemfile, so it will not move, and therefore # foo can only move up to 1.4.4. - @base << Bundler::LazySpecification.new("bar", "2.0.3", nil) + @base << build_spec("bar", "2.0.3").first should_conservative_resolve_and_include :patch, ["foo"], %w[foo-1.4.4 bar-2.0.3] end From f245b425af2e560a85edac85ad4426a04b01e4de Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 6 Aug 2022 23:53:13 +0900 Subject: [PATCH 029/546] Fix the sizes comparison `proc_syswait` will be called with a `VALUE` argument. --- process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/process.c b/process.c index fb31418e0058f7..0ba555562c920e 100644 --- a/process.c +++ b/process.c @@ -3773,7 +3773,7 @@ rb_exec_atfork(void* arg, char *errmsg, size_t errmsg_buflen) return rb_exec_async_signal_safe(arg, errmsg, errmsg_buflen); /* hopefully async-signal-safe */ } -#if SIZEOF_INT == SIZEOF_LONG +#if SIZEOF_INT == SIZEOF_VALUE #define proc_syswait (VALUE (*)(VALUE))rb_syswait #else static VALUE From e545cfad20eccf72f0b7d02660ceb56ec0f1dcf7 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 7 Aug 2022 00:02:38 +0900 Subject: [PATCH 030/546] Stop using casted `rb_syswait` as `proc_syswait` The argument of `rb_syswait` is now `rb_pid_t` which may differ from `int`. Also it is an undefined behavior to take the result of casted void function (in `rb_protect`). --- process.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/process.c b/process.c index 0ba555562c920e..0e4dbc40f4d26c 100644 --- a/process.c +++ b/process.c @@ -3773,16 +3773,12 @@ rb_exec_atfork(void* arg, char *errmsg, size_t errmsg_buflen) return rb_exec_async_signal_safe(arg, errmsg, errmsg_buflen); /* hopefully async-signal-safe */ } -#if SIZEOF_INT == SIZEOF_VALUE -#define proc_syswait (VALUE (*)(VALUE))rb_syswait -#else static VALUE proc_syswait(VALUE pid) { - rb_syswait((int)pid); + rb_syswait((rb_pid_t)pid); return Qnil; } -#endif static int move_fds_to_avoid_crash(int *fdp, int n, VALUE fds) From 6d742c9412d444650d705b65bc2d5c850054c226 Mon Sep 17 00:00:00 2001 From: git Date: Sun, 7 Aug 2022 00:03:21 +0900 Subject: [PATCH 031/546] * 2022-08-07 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 5fc7661c95693b..da964b81aa474c 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 6 +#define RUBY_RELEASE_DAY 7 #include "ruby/version.h" #include "ruby/internal/abi.h" From 591ee9d068de174460f0e6fe81b36e7625b90f6c Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 7 Aug 2022 13:09:46 +0900 Subject: [PATCH 032/546] [DOC] Add return values of rb_enc_mbcput --- include/ruby/internal/encoding/encoding.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/ruby/internal/encoding/encoding.h b/include/ruby/internal/encoding/encoding.h index 22deb8f8c908e5..4748ca806b17ca 100644 --- a/include/ruby/internal/encoding/encoding.h +++ b/include/ruby/internal/encoding/encoding.h @@ -643,10 +643,12 @@ rb_enc_code_to_mbclen(int c, rb_encoding *enc) * Identical to rb_enc_uint_chr(), except it writes back to the passed buffer * instead of allocating one. * - * @param[in] c Code point. - * @param[out] buf Return buffer. - * @param[in] enc Target encoding scheme. - * @post `c` is encoded according to `enc`, then written to `buf`. + * @param[in] c Code point. + * @param[out] buf Return buffer. + * @param[in] enc Target encoding scheme. + * @retval <= 0 `c` is invalid in `enc`. + * @return otherwise Number of bytes written to `buf`. + * @post `c` is encoded according to `enc`, then written to `buf`. * * @internal * From f2423be49d6f1b6ae42f542c43b8b02d5115dc0c Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 7 Aug 2022 14:46:59 +0900 Subject: [PATCH 033/546] Duplicate libruby self path When LOAD_RELATIVE, as `sopath` is truncated to the prefix path, make the duplicate before it. Also make `rb_libruby_selfpath` frozen and hidden. --- ruby.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ruby.c b/ruby.c index 991c9031dea98b..7c6742cac47905 100644 --- a/ruby.c +++ b/ruby.c @@ -629,7 +629,13 @@ ruby_init_loadpath(void) #if defined(LOAD_RELATIVE) || defined(__MACH__) VALUE libruby_path = runtime_libruby_path(); # if defined(__MACH__) - rb_libruby_selfpath = libruby_path; + VALUE selfpath = libruby_path; +# if defined(LOAD_RELATIVE) + selfpath = rb_str_dup(selfpath); +# endif + rb_obj_hide(selfpath); + OBJ_FREEZE_RAW(selfpath); + rb_libruby_selfpath = selfpath; rb_gc_register_address(&rb_libruby_selfpath); # endif #endif From 0bb5525eb2966b5821eb714c898b1c9c33221660 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 7 Aug 2022 17:01:22 +0900 Subject: [PATCH 034/546] Fix files for gemspec files direct-under lib Collected `files` lacked `lib` prefix. --- tool/rbinstall.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tool/rbinstall.rb b/tool/rbinstall.rb index ab3193f9e0f9d5..ba0d911fc23130 100755 --- a/tool/rbinstall.rb +++ b/tool/rbinstall.rb @@ -773,7 +773,7 @@ def ruby_libraries remove_prefix(prefix, ruby_source) end else - [File.basename(@gemspec, '.gemspec') + '.rb'] + [@gemspec[%r[(?:[^/]+/)?[^/]+(?=\.gemspec\z)]] + '.rb'] end case File.basename(@gemspec, ".gemspec") From 03f86565a69687b335d16d873e2b1f35cb356bec Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 7 Aug 2022 17:57:52 +0900 Subject: [PATCH 035/546] Silent backtrace from cve_2019_8325_spec.rb Since the change at f310ac1cb2964f635f582862763b2155aacf2c12 to show the backtraces by default, this test started to show the backtraces. As the backtraces are not the subject of this test, silence them by using Gem::SilentUI. --- spec/ruby/security/cve_2019_8325_spec.rb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spec/ruby/security/cve_2019_8325_spec.rb b/spec/ruby/security/cve_2019_8325_spec.rb index 04692e01fec2ba..7c5e216568e4f1 100644 --- a/spec/ruby/security/cve_2019_8325_spec.rb +++ b/spec/ruby/security/cve_2019_8325_spec.rb @@ -5,8 +5,16 @@ describe "CVE-2019-8325 is resisted by" do describe "sanitising error message components" do + silent_ui = Module.new do + attr_accessor :ui + def self.extended(obj) + obj.ui = Gem::SilentUI.new + end + end + it "for the 'while executing' message" do manager = Gem::CommandManager.new + manager.extend(silent_ui) def manager.process_args(args, build_args) raise StandardError, "\e]2;nyan\a" end @@ -26,6 +34,7 @@ def manager.terminate_interaction(n) it "for the 'loading command' message" do manager = Gem::CommandManager.new + manager.extend(silent_ui) def manager.require(x) raise 'foo' end From e5e6b87e265b95620821f7abac56b5ab90d4c1fd Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 6 Aug 2022 20:56:23 +0900 Subject: [PATCH 036/546] Create temporary file exclusively and clean --- ext/extmk.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ext/extmk.rb b/ext/extmk.rb index 1624ec9099eb40..40fc10ea1c4258 100755 --- a/ext/extmk.rb +++ b/ext/extmk.rb @@ -66,12 +66,17 @@ def system(*args) def atomic_write_open(filename) filename_new = filename + ".new.#$$" - open(filename_new, "wb") do |f| + clean = false + File.open(filename_new, "wbx") do |f| + clean = true yield f end if File.binread(filename_new) != (File.binread(filename) rescue nil) File.rename(filename_new, filename) - else + clean = false + end +ensure + if clean File.unlink(filename_new) end end From 23a84d53c682e8db1d9d5b9b33fc20dc475179c2 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Sun, 7 Aug 2022 07:07:12 -0500 Subject: [PATCH 037/546] [ruby/rdoc] [DOC] Removes remaining old Markup Reference (https://github.com/ruby/rdoc/pull/910) https://github.com/ruby/rdoc/commit/4e44c9c6cf --- doc/rdoc/markup_reference.rb | 31 ++++ lib/rdoc/markup.rb | 349 ----------------------------------- 2 files changed, 31 insertions(+), 349 deletions(-) diff --git a/doc/rdoc/markup_reference.rb b/doc/rdoc/markup_reference.rb index 49ad996c2d22e0..c59f12cfe3c59e 100644 --- a/doc/rdoc/markup_reference.rb +++ b/doc/rdoc/markup_reference.rb @@ -662,6 +662,37 @@ # # [Two words] Two words in labeled list item. # # ====== Two words in heading # +# ==== Escaping Text Markup +# +# Text markup can be escaped with a backslash, as in \, which was obtained +# with \\. Except in verbatim sections and between \ tags, +# to produce a backslash you have to double it unless it is followed by a +# space, tab or newline. Otherwise, the HTML formatter will discard it, as it +# is used to escape potential links: +# +# * The \ must be doubled if not followed by white space: \\. +# * But not in \ tags: in a Regexp, \S matches non-space. +# * This is a link to {ruby-lang}[https://www.ruby-lang.org]. +# * This is not a link, however: \{ruby-lang.org}[https://www.ruby-lang.org]. +# * This will not be linked to \RDoc::RDoc#document +# +# generates: +# +# * The \ must be doubled if not followed by white space: \\. +# * But not in \ tags: in a Regexp, \S matches non-space. +# * This is a link to {ruby-lang}[https://www.ruby-lang.org] +# * This is not a link, however: \{ruby-lang.org}[https://www.ruby-lang.org] +# * This will not be linked to \RDoc::RDoc#document +# +# Inside \ tags, more precisely, leading backslashes are removed only if +# followed by a markup character (<*_+), a backslash, or a known link +# reference (a known class or method). So in the example above, the backslash +# of \S would be removed if there was a class or module named +S+ in +# the current context. +# +# This behavior is inherited from RDoc version 1, and has been kept for +# compatibility with existing RDoc documentation. +# # ==== Character Conversions # # Certain combinations of characters may be converted to special characters; diff --git a/lib/rdoc/markup.rb b/lib/rdoc/markup.rb index 7fec1c686970cf..6e9303096509b0 100644 --- a/lib/rdoc/markup.rb +++ b/lib/rdoc/markup.rb @@ -99,355 +99,6 @@ # # See RDoc::MarkupReference. # -# === Escaping Text Markup -# -# Text markup can be escaped with a backslash, as in \, which was obtained -# with \\. Except in verbatim sections and between \ tags, -# to produce a backslash you have to double it unless it is followed by a -# space, tab or newline. Otherwise, the HTML formatter will discard it, as it -# is used to escape potential links: -# -# * The \ must be doubled if not followed by white space: \\. -# * But not in \ tags: in a Regexp, \S matches non-space. -# * This is a link to {ruby-lang}[https://www.ruby-lang.org]. -# * This is not a link, however: \{ruby-lang.org}[https://www.ruby-lang.org]. -# * This will not be linked to \RDoc::RDoc#document -# -# generates: -# -# * The \ must be doubled if not followed by white space: \\. -# * But not in \ tags: in a Regexp, \S matches non-space. -# * This is a link to {ruby-lang}[https://www.ruby-lang.org] -# * This is not a link, however: \{ruby-lang.org}[https://www.ruby-lang.org] -# * This will not be linked to \RDoc::RDoc#document -# -# Inside \ tags, more precisely, leading backslashes are removed only if -# followed by a markup character (<*_+), a backslash, or a known link -# reference (a known class or method). So in the example above, the backslash -# of \S would be removed if there was a class or module named +S+ in -# the current context. -# -# This behavior is inherited from RDoc version 1, and has been kept for -# compatibility with existing RDoc documentation. -# -# === Conversion of characters -# -# HTML will convert two/three dashes to an em-dash. Other common characters are -# converted as well: -# -# em-dash:: -- or --- -# ellipsis:: ... -# -# single quotes:: 'text' or `text' -# double quotes:: "text" or ``text'' -# -# copyright:: (c) -# registered trademark:: (r) -# -# produces: -# -# em-dash:: -- or --- -# ellipsis:: ... -# -# single quotes:: 'text' or `text' -# double quotes:: "text" or ``text'' -# -# copyright:: (c) -# registered trademark:: (r) -# -# -# == Documenting Source Code -# -# Comment blocks can be written fairly naturally, either using # on -# successive lines of the comment, or by including the comment in -# a =begin/=end block. If you use the latter form, -# the =begin line _must_ be flagged with an +rdoc+ tag: -# -# =begin rdoc -# Documentation to be processed by RDoc. -# -# ... -# =end -# -# RDoc stops processing comments if it finds a comment line starting -# with -- right after the # character (otherwise, -# it will be treated as a rule if it has three dashes or more). -# This can be used to separate external from internal comments, -# or to stop a comment being associated with a method, class, or module. -# Commenting can be turned back on with a line that starts with ++. -# -# ## -# # Extract the age and calculate the date-of-birth. -# #-- -# # FIXME: fails if the birthday falls on February 29th -# #++ -# # The DOB is returned as a Time object. -# -# def get_dob(person) -# # ... -# end -# -# Names of classes, files, and any method names containing an underscore or -# preceded by a hash character are automatically linked from comment text to -# their description. This linking works inside the current class or module, -# and with ancestor methods (in included modules or in the superclass). -# -# Method parameter lists are extracted and displayed with the method -# description. If a method calls +yield+, then the parameters passed to yield -# will also be displayed: -# -# def fred -# ... -# yield line, address -# -# This will get documented as: -# -# fred() { |line, address| ... } -# -# You can override this using a comment containing ':yields: ...' immediately -# after the method definition -# -# def fred # :yields: index, position -# # ... -# -# yield line, address -# -# which will get documented as -# -# fred() { |index, position| ... } -# -# +:yields:+ is an example of a documentation directive. These appear -# immediately after the start of the document element they are modifying. -# -# RDoc automatically cross-references words with underscores or camel-case. -# To suppress cross-references, prefix the word with a \ character. To -# include special characters like "\n", you'll need to use -# two \ characters in normal text, but only one in \ text: -# -# "\\n" or "\n" -# -# produces: -# -# "\\n" or "\n" -# -# == Directives -# -# Directives are keywords surrounded by ":" characters. -# -# === Controlling what is documented -# -# [+:nodoc:+ / :nodoc: all] -# This directive prevents documentation for the element from -# being generated. For classes and modules, methods, aliases, -# constants, and attributes directly within the affected class or -# module also will be omitted. By default, though, modules and -# classes within that class or module _will_ be documented. This is -# turned off by adding the +all+ modifier. -# -# module MyModule # :nodoc: -# class Input -# end -# end -# -# module OtherModule # :nodoc: all -# class Output -# end -# end -# -# In the above code, only class MyModule::Input will be documented. -# -# The +:nodoc:+ directive, like +:enddoc:+, +:stopdoc:+ and +:startdoc:+ -# presented below, is local to the current file: if you do not want to -# document a module that appears in several files, specify +:nodoc:+ on each -# appearance, at least once per file. -# -# [+:stopdoc:+ / +:startdoc:+] -# Stop and start adding new documentation elements to the current container. -# For example, if a class has a number of constants that you don't want to -# document, put a +:stopdoc:+ before the first, and a +:startdoc:+ after the -# last. If you don't specify a +:startdoc:+ by the end of the container, -# disables documentation for the rest of the current file. -# -# [+:doc:+] -# Forces a method or attribute to be documented even if it wouldn't be -# otherwise. Useful if, for example, you want to include documentation of a -# particular private method. -# -# [+:enddoc:+] -# Document nothing further at the current level: directives +:startdoc:+ and -# +:doc:+ that appear after this will not be honored for the current container -# (file, class or module), in the current file. -# -# [+:notnew:+ / +:not_new:+ / +:not-new:+ ] -# Only applicable to the +initialize+ instance method. Normally RDoc -# assumes that the documentation and parameters for +initialize+ are -# actually for the +new+ method, and so fakes out a +new+ for the class. -# The +:notnew:+ directive stops this. Remember that +initialize+ is private, -# so you won't see the documentation unless you use the +-a+ command line -# option. -# -# === Method arguments -# -# [+:arg:+ or +:args:+ _parameters_] -# Overrides the default argument handling with exactly these parameters. -# -# ## -# # :args: a, b -# -# def some_method(*a) -# end -# -# [+:yield:+ or +:yields:+ _parameters_] -# Overrides the default yield discovery with these parameters. -# -# ## -# # :yields: key, value -# -# def each_thing &block -# @things.each(&block) -# end -# -# [+:call-seq:+] -# Lines up to the next blank line or lines with a common prefix in the -# comment are treated as the method's calling sequence, overriding the -# default parsing of method parameters and yield arguments. -# -# Multiple lines may be used. -# -# # :call-seq: -# # ARGF.readlines(sep=$/) -> array -# # ARGF.readlines(limit) -> array -# # ARGF.readlines(sep, limit) -> array -# # -# # ARGF.to_a(sep=$/) -> array -# # ARGF.to_a(limit) -> array -# # ARGF.to_a(sep, limit) -> array -# # -# # The remaining lines are documentation ... -# -# === Sections -# -# Sections allow you to group methods in a class into sensible containers. If -# you use the sections 'Public', 'Internal' and 'Deprecated' (the three -# allowed method statuses from TomDoc) the sections will be displayed in that -# order placing the most useful methods at the top. Otherwise, sections will -# be displayed in alphabetical order. -# -# [+:category:+ _section_] -# Adds this item to the named +section+ overriding the current section. Use -# this to group methods by section in RDoc output while maintaining a -# sensible ordering (like alphabetical). -# -# # :category: Utility Methods -# # -# # CGI escapes +text+ -# -# def convert_string text -# CGI.escapeHTML text -# end -# -# An empty category will place the item in the default category: -# -# # :category: -# # -# # This method is in the default category -# -# def some_method -# # ... -# end -# -# Unlike the :section: directive, :category: is not sticky. The category -# only applies to the item immediately following the comment. -# -# Use the :section: directive to provide introductory text for a section of -# documentation. -# -# [+:section:+ _title_] -# Provides section introductory text in RDoc output. The title following -# +:section:+ is used as the section name and the remainder of the comment -# containing the section is used as introductory text. A section's comment -# block must be separated from following comment blocks. Use an empty title -# to switch to the default section. -# -# The :section: directive is sticky, so subsequent methods, aliases, -# attributes, and classes will be contained in this section until the -# section is changed. The :category: directive will override the :section: -# directive. -# -# A :section: comment block may have one or more lines before the :section: -# directive. These will be removed, and any identical lines at the end of -# the block are also removed. This allows you to add visual cues to the -# section. -# -# Example: -# -# # ---------------------------------------- -# # :section: My Section -# # This is the section that I wrote. -# # See it glisten in the noon-day sun. -# # ---------------------------------------- -# -# ## -# # Comment for some_method -# -# def some_method -# # ... -# end -# -# === Other directives -# -# [+:markup:+ _type_] -# Overrides the default markup type for this comment with the specified -# markup type. For Ruby files, if the first comment contains this directive -# it is applied automatically to all comments in the file. -# -# Unless you are converting between markup formats you should use a -# .rdoc_options file to specify the default documentation -# format for your entire project. See RDoc::Options@Saved+Options for -# instructions. -# -# At the top of a file the +:markup:+ directive applies to the entire file: -# -# # coding: UTF-8 -# # :markup: TomDoc -# -# # TomDoc comment here ... -# -# class MyClass -# # ... -# -# For just one comment: -# -# # ... -# end -# -# # :markup: RDoc -# # -# # This is a comment in RDoc markup format ... -# -# def some_method -# # ... -# -# See Markup@CONTRIBUTING for instructions on adding a new markup format. -# -# [+:include:+ _filename_] -# Include the contents of the named file at this point. This directive -# must appear alone on one line, possibly preceded by spaces. In this -# position, it can be escaped with a \ in front of the first colon. -# -# The file will be searched for in the directories listed by the +--include+ -# option, or in the current directory by default. The contents of the file -# will be shifted to have the same indentation as the ':' at the start of -# the +:include:+ directive. -# -# [+:title:+ _text_] -# Sets the title for the document. Equivalent to the --title -# command line parameter. (The command line parameter overrides any :title: -# directive in the source). -# -# [+:main:+ _name_] -# Equivalent to the --main command line parameter. -# #-- # Original Author:: Dave Thomas, dave@pragmaticprogrammer.com # License:: Ruby license From 5beb75ce8dac1964f88dc82f4216892bf4256aed Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 8 Aug 2022 00:32:37 +0900 Subject: [PATCH 038/546] [ruby/rdoc] Allow multiple footnotes without in-between blank lines https://github.com/ruby/ruby/commit/e4e054e3ce40 used four footnotes without blank lines. And the ChangeLog generated from that commit resulted in ``undefined method `parts' for nil`` error. For now, let a footnote terminated by the next footnote mark. Also refined the error message when undefined footnote is used. https://github.com/ruby/rdoc/commit/a7f290130b --- lib/rdoc/markdown.rb | 34 ++++++++++++++++++++++++--------- test/rdoc/test_rdoc_markdown.rb | 1 - 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/lib/rdoc/markdown.rb b/lib/rdoc/markdown.rb index 25a7217d3c6cd2..26b32f9b73aff4 100644 --- a/lib/rdoc/markdown.rb +++ b/lib/rdoc/markdown.rb @@ -811,7 +811,7 @@ def parse markdown @note_order.each_with_index do |ref, index| label = index + 1 - note = @footnotes[ref] + note = @footnotes[ref] or raise ParseError, "footnote [^#{ref}] not found" link = "{^#{label}}[rdoc-label:footmark-#{label}:foottext-#{label}] " note.parts.unshift link @@ -15533,7 +15533,7 @@ def _Notes return _tmp end - # RawNoteBlock = @StartList:a (!@BlankLine OptionallyIndentedLine:l { a << l })+ < @BlankLine* > { a << text } { a } + # RawNoteBlock = @StartList:a (!@BlankLine !RawNoteReference OptionallyIndentedLine:l { a << l })+ < @BlankLine* > { a << text } { a } def _RawNoteBlock _save = self.pos @@ -15556,6 +15556,14 @@ def _RawNoteBlock self.pos = _save2 break end + _save4 = self.pos + _tmp = apply(:_RawNoteReference) + _tmp = _tmp ? nil : true + self.pos = _save4 + unless _tmp + self.pos = _save2 + break + end _tmp = apply(:_OptionallyIndentedLine) l = @result unless _tmp @@ -15573,26 +15581,34 @@ def _RawNoteBlock if _tmp while true - _save4 = self.pos + _save5 = self.pos while true # sequence - _save5 = self.pos + _save6 = self.pos _tmp = _BlankLine() _tmp = _tmp ? nil : true - self.pos = _save5 + self.pos = _save6 unless _tmp - self.pos = _save4 + self.pos = _save5 + break + end + _save7 = self.pos + _tmp = apply(:_RawNoteReference) + _tmp = _tmp ? nil : true + self.pos = _save7 + unless _tmp + self.pos = _save5 break end _tmp = apply(:_OptionallyIndentedLine) l = @result unless _tmp - self.pos = _save4 + self.pos = _save5 break end @result = begin; a << l ; end _tmp = true unless _tmp - self.pos = _save4 + self.pos = _save5 end break end # end sequence @@ -16656,7 +16672,7 @@ def _DefinitionListDefinition Rules[:_Note] = rule_info("Note", "&{ notes? } @NonindentSpace RawNoteReference:ref \":\" @Sp @StartList:a RawNoteBlock:i { a.concat i } (&Indent RawNoteBlock:i { a.concat i })* { @footnotes[ref] = paragraph a nil }") Rules[:_InlineNote] = rule_info("InlineNote", "&{ notes? } \"^[\" @StartList:a (!\"]\" Inline:l { a << l })+ \"]\" { ref = [:inline, @note_order.length] @footnotes[ref] = paragraph a note_for ref }") Rules[:_Notes] = rule_info("Notes", "(Note | SkipBlock)*") - Rules[:_RawNoteBlock] = rule_info("RawNoteBlock", "@StartList:a (!@BlankLine OptionallyIndentedLine:l { a << l })+ < @BlankLine* > { a << text } { a }") + Rules[:_RawNoteBlock] = rule_info("RawNoteBlock", "@StartList:a (!@BlankLine !RawNoteReference OptionallyIndentedLine:l { a << l })+ < @BlankLine* > { a << text } { a }") Rules[:_CodeFence] = rule_info("CodeFence", "&{ github? } Ticks3 (@Sp StrChunk:format)? Spnl < ((!\"`\" Nonspacechar)+ | !Ticks3 /`+/ | Spacechar | @Newline)+ > Ticks3 @Sp @Newline* { verbatim = RDoc::Markup::Verbatim.new text verbatim.format = format.intern if format.instance_of?(String) verbatim }") Rules[:_Table] = rule_info("Table", "&{ github? } TableRow:header TableLine:line TableRow+:body { table = RDoc::Markup::Table.new(header, line, body) }") Rules[:_TableRow] = rule_info("TableRow", "TableItem+:row \"|\" @Newline { row }") diff --git a/test/rdoc/test_rdoc_markdown.rb b/test/rdoc/test_rdoc_markdown.rb index c223c44c1225f3..ca76c34f439650 100644 --- a/test/rdoc/test_rdoc_markdown.rb +++ b/test/rdoc/test_rdoc_markdown.rb @@ -761,7 +761,6 @@ def test_parse_note_multiple and an extra note.[^2] [^1]: With a footnote - [^2]: Which should be numbered correctly MD From 40f2078d48d5072d93c5116f75923a3f5b827e9c Mon Sep 17 00:00:00 2001 From: git Date: Mon, 8 Aug 2022 01:13:54 +0900 Subject: [PATCH 039/546] * 2022-08-08 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index da964b81aa474c..f4418ba47b5b02 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 7 +#define RUBY_RELEASE_DAY 8 #include "ruby/version.h" #include "ruby/internal/abi.h" From 1607c6d2814cc4f71aa65bc273d745f28514e735 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Fri, 5 Aug 2022 13:06:03 -0500 Subject: [PATCH 040/546] [DOC] New doc about Julian/Gregorian (#70) --- doc/date/calendars.rdoc | 63 +++++++++++++++++++++++++++++ ext/date/date_core.c | 87 ++++++++++------------------------------- 2 files changed, 83 insertions(+), 67 deletions(-) create mode 100644 doc/date/calendars.rdoc diff --git a/doc/date/calendars.rdoc b/doc/date/calendars.rdoc new file mode 100644 index 00000000000000..e2118d3e9dad33 --- /dev/null +++ b/doc/date/calendars.rdoc @@ -0,0 +1,63 @@ +== Julian and Gregorian Calendars + +The difference between the +{Julian calendar}[https://en.wikipedia.org/wiki/Julian_calendar] +and the +{Gregorian calendar}[https://en.wikipedia.org/wiki/Gregorian_calendar] +may matter to your program if it uses dates in the interval: + +- October 15, 1582. +- September 14, 1752. + +A date outside that interval (including all dates in modern times) +is the same in both calendars. +However, a date _within_ that interval will be different +in the two calendars. + +=== Different Calendar, Different \Date + +The reason for the difference is this: + +- On October 15, 1582, several countries changed + from the Julian calendar to the Gregorian calendar; + these included Italy, Poland, Portugal, and Spain. + Other contries in the Western world retained the Julian calendar. +- On September 14, 1752, most of the British empire + changed from the Julian calendar to the Gregorian calendar. + +When your code uses a date in this "gap" interval, +it will matter whether it considers the switchover date +to be the earlier date or the later date (or neither). + +=== Argument +start+ + +Certain methods in class \Date handle differences in the +{Julian and Gregorian calendars}[rdoc-ref:calendars.rdoc@Julian+and+Gregorian+Calendars] +by accepting an optional argument +start+, whose value may be: + +- Date::ITALY (the default): the created date is Julian + if before October 15, 1582, Gregorian otherwise: + + d = Date.new(1582, 10, 15) + d.prev_day.julian? # => true + d.julian? # => false + d.gregorian? # => true + +- Date::ENGLAND: the created date is Julian if before September 14, 1752, + Gregorian otherwise: + + d = Date.new(1752, 9, 14, Date::ENGLAND) + d.prev_day.julian? # => true + d.julian? # => false + d.gregorian? # => true + +- Date::JULIAN: the created date is Julian regardless of its value: + + d = Date.new(1582, 10, 15, Date::JULIAN) + d.julian? # => true + +- Date::GREGORIAN: the created date is Gregorian regardless of its value: + + d = Date.new(1752, 9, 14, Date::GREGORIAN) + d.prev_day.gregorian? # => true + diff --git a/ext/date/date_core.c b/ext/date/date_core.c index cee7b27faf6d08..1c0d1c4920c475 100644 --- a/ext/date/date_core.c +++ b/ext/date/date_core.c @@ -2486,7 +2486,7 @@ date_s__valid_jd_p(int argc, VALUE *argv, VALUE klass) * * Date.valid_jd?(2451944) # => true * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd. */ @@ -2580,7 +2580,7 @@ date_s__valid_civil_p(int argc, VALUE *argv, VALUE klass) * Date.valid_date?(2001, 2, 29) # => false * Date.valid_date?(2001, 2, -1) # => true * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Date.valid_date? is an alias for Date.valid_civil?. * @@ -2670,7 +2670,7 @@ date_s__valid_ordinal_p(int argc, VALUE *argv, VALUE klass) * Date.valid_ordinal?(2001, 34) # => true * Date.valid_ordinal?(2001, 366) # => false * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd, Date.ordinal. */ @@ -2760,7 +2760,7 @@ date_s__valid_commercial_p(int argc, VALUE *argv, VALUE klass) * * See Date.commercial. * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd, Date.commercial. */ @@ -3342,7 +3342,7 @@ static VALUE d_lite_plus(VALUE, VALUE); * * Date.jd(Date::ITALY - 1).julian? # => true * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.new. */ @@ -3407,7 +3407,7 @@ date_s_jd(int argc, VALUE *argv, VALUE klass) * * Raises an exception if +yday+ is zero or out of range. * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd, Date.new. */ @@ -3484,7 +3484,7 @@ date_s_civil(int argc, VALUE *argv, VALUE klass) * where +n+ is the number of days in the month; * when the argument is negative, counts backward from the end of the month. * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Date.civil is an alias for Date.new. * @@ -3592,7 +3592,7 @@ date_initialize(int argc, VALUE *argv, VALUE self) * Date.commercial(2020, 1, 1).to_s # => "2019-12-30" Date.commercial(2020, 1, 7).to_s # => "2020-01-05" * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd, Date.new, Date.ordinal. */ @@ -3777,7 +3777,7 @@ static void set_sg(union DateData *, double); * * Date.today.to_s # => "2022-07-06" * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * */ static VALUE @@ -4409,7 +4409,7 @@ date_s__strptime(int argc, VALUE *argv, VALUE klass) * {Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html]. * (Unlike Date.strftime, does not support flags and width.) * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * See also {strptime(3)}[https://man7.org/linux/man-pages/man3/strptime.3.html]. * @@ -4556,7 +4556,7 @@ date_s__parse(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._parse (returns a hash). @@ -4636,7 +4636,7 @@ date_s__iso8601(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._iso8601 (returns a hash). @@ -4707,7 +4707,7 @@ date_s__rfc3339(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._rfc3339 (returns a hash). @@ -4776,7 +4776,7 @@ date_s__xmlschema(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._xmlschema (returns a hash). @@ -4849,7 +4849,7 @@ date_s__rfc2822(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Date.rfc822 is an alias for Date.rfc2822. @@ -4919,7 +4919,7 @@ date_s__httpdate(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._httpdate (returns a hash). @@ -4991,7 +4991,7 @@ date_s__jisx0301(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._jisx0301 (returns a hash). @@ -5755,7 +5755,7 @@ d_lite_leap_p(VALUE self) * Date.new(2001, 2, 3, Date::GREGORIAN).start # => -Infinity * Date.new(2001, 2, 3, Date::JULIAN).start # => Infinity * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * */ static VALUE @@ -5830,7 +5830,7 @@ dup_obj_with_new_start(VALUE obj, double sg) * d1 = d0.new_start(Date::JULIAN) * d1.julian? # => true * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * */ static VALUE @@ -9522,54 +9522,6 @@ Init_date_core(void) * d += 1 #=> # * d.strftime('%a %d %b %Y') #=> "Sun 04 Feb 2001" * - * === Argument +start+ - * - * Certain calculations and comparisons for a \Date object - * are affected by what the object considers to have been - * the changeover date from the - * {Julian}[https://en.wikipedia.org/wiki/Julian_calendar] to the - * {Gregorian}[https://en.wikipedia.org/wiki/Gregorian_calendar] - * calendar; - * this is set by argument +start+ when the object is created: - * - * - Dates before the changeover are considered to be Julian. - * - Dates after the changeover are considered to be Gregorian. - * - * The value of the +start+ argument may be: - * - * - Date::ITALY (the default) - the changeover date is October 10, 1582: - * - * Date::ITALY # => 2299161 - * Date.jd(Date::ITALY).to_s # => "1582-10-15" - * - * # Julian base date, Julian result date. - * (Date.new(1581, 1, 1, Date::ITALY) + 365).to_s # => "1582-01-01" - * # Gregorian base date, Gregorian result date. - * (Date.new(1583, 1, 1, Date::ITALY) + 365).to_s # => "1584-01-01" - * - * # Julian base date, Gregorian result date. - * (Date.new(1582, 1, 1, Date::ITALY) + 365).to_s # => "1583-01-11" - * # Gregorian base date, Julian result date. - * (Date.new(1583, 1, 1, Date::ITALY) - 365).to_s # => "1581-12-22" - * - * - Date::ENGLAND - the changeover date is September 9, 1752: - * - * Date::ENGLAND # => 2361222 - * Date.jd(Date::ENGLAND).to_s # => "1752-09-14" - * - * # Julian base date, Julian result date. - * (Date.new(1751, 1, 1, Date::ENGLAND) + 365).to_s # => "1752-01-01" - * # Gregorian base date, Gregorian result date. - * (Date.new(1753, 1, 1, Date::ENGLAND) + 365).to_s # => "1754-01-01" - * - * # Julian base date, Gregorian result date. - * (Date.new(1752, 1, 1, Date::ENGLAND) + 365).to_s # => "1753-01-11" - * # Gregorian base date, Julian result date. - * (Date.new(1753, 1, 1, Date::ENGLAND) - 365).to_s # => "1751-12-22" - * - * - Date::JULIAN - no changeover date; all dates are Julian. - * - Date::GREGORIAN - no changeover date; all dates are Gregorian. - * * === Argument +limit+ * * Certain singleton methods in \Date that parse string arguments @@ -9584,6 +9536,7 @@ Init_date_core(void) * - Other non-numeric: raises TypeError. * */ + cDate = rb_define_class("Date", rb_cObject); /* Exception for invalid date/time */ From 289d219758336e1e1d8afe36255fe06079d8daeb Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 8 Aug 2022 21:13:52 +0900 Subject: [PATCH 041/546] [ruby/date] [DOC] Fix about calendars difference https://github.com/ruby/date/commit/0ae93e26aa --- doc/date/calendars.rdoc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/doc/date/calendars.rdoc b/doc/date/calendars.rdoc index e2118d3e9dad33..b8690841b1d45c 100644 --- a/doc/date/calendars.rdoc +++ b/doc/date/calendars.rdoc @@ -4,19 +4,16 @@ The difference between the {Julian calendar}[https://en.wikipedia.org/wiki/Julian_calendar] and the {Gregorian calendar}[https://en.wikipedia.org/wiki/Gregorian_calendar] -may matter to your program if it uses dates in the interval: +may matter to your program if it uses dates before the switchovers. - October 15, 1582. - September 14, 1752. -A date outside that interval (including all dates in modern times) -is the same in both calendars. -However, a date _within_ that interval will be different -in the two calendars. +A date will be different in the two calendars, in general. -=== Different Calendar, Different \Date +=== Different switchover dates -The reason for the difference is this: +The reasons for the difference are religious/political histories. - On October 15, 1582, several countries changed from the Julian calendar to the Gregorian calendar; @@ -25,10 +22,12 @@ The reason for the difference is this: - On September 14, 1752, most of the British empire changed from the Julian calendar to the Gregorian calendar. -When your code uses a date in this "gap" interval, +When your code uses a date before these switchover dates, it will matter whether it considers the switchover date to be the earlier date or the later date (or neither). +See also {a concrete example here}[rdoc-ref:DateTime@When+should+you+use+DateTime+and+when+should+you+use+Time-3F]. + === Argument +start+ Certain methods in class \Date handle differences in the From 1cb77f23046dbfd14703241e82bed2bcd69bf6a1 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 8 Aug 2022 11:14:05 +0200 Subject: [PATCH 042/546] Update IO::Buffer#get_value benchmark - The method was renamed from `get` to `get_value` - Comparing to `String#unpack` isn't quite equivalent, `unpack1` is closer. - Use frozen_string_literal to avoid allocating a format string every time. - Use `N` format which is equivalent to `:U32` (`uint_32_t` big-endian). - Disable experimental warnings to not mess up the output. --- benchmark/buffer_get.yml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/benchmark/buffer_get.yml b/benchmark/buffer_get.yml index e375dcf85dc04e..bb9ca7e94aca7f 100644 --- a/benchmark/buffer_get.yml +++ b/benchmark/buffer_get.yml @@ -1,9 +1,10 @@ +prelude: | + # frozen_string_literal: true + Warning[:experimental] = false + buffer = IO::Buffer.new(32, IO::Buffer::MAPPED) + string = "\0" * 32 benchmark: - - name: buffer.get - prelude: buffer = IO::Buffer.new(32, IO::Buffer::MAPPED) - script: buffer.get(:U32, 0) - loop_count: 20000000 - - name: string.unpack - prelude: string = "\0" * 32 - script: string.unpack("C") - loop_count: 20000000 + buffer.get_value: | + buffer.get_value(:U32, 0) + string.unpack1: | + string.unpack1("N") From a24c607e30b5a74ef53e9a2c1e630dea46151a84 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 8 Aug 2022 09:26:07 -0400 Subject: [PATCH 043/546] [DOC] Fix formatting issue in Enumerable --- enum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enum.c b/enum.c index e3dc177ba17729..97215b627e4c93 100644 --- a/enum.c +++ b/enum.c @@ -4808,7 +4808,7 @@ enum_compact(VALUE obj) * - #grep_v: Returns elements selected by a given object * or objects returned by a given block. * - #reduce, #inject: Returns the object formed by combining all elements. - * - #sum: Returns the sum of the elements, using method +++. + * - #sum: Returns the sum of the elements, using method +. * - #zip: Combines each element with elements from other enumerables; * returns the n-tuples or calls the block with each. * - #cycle: Calls the block with each element, cycling repeatedly. From e0dfa5967e7063da8b65dc3c062ef4652e246e34 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 8 Aug 2022 23:08:29 +0900 Subject: [PATCH 044/546] [Bug #18946] Use Gregorian dates to test --- spec/ruby/library/datetime/to_time_spec.rb | 4 ++-- spec/ruby/library/time/to_datetime_spec.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/ruby/library/datetime/to_time_spec.rb b/spec/ruby/library/datetime/to_time_spec.rb index a11b6e30e1c62c..a03ab678675703 100644 --- a/spec/ruby/library/datetime/to_time_spec.rb +++ b/spec/ruby/library/datetime/to_time_spec.rb @@ -7,10 +7,10 @@ end it "returns a Time representing the same instant" do - datetime = DateTime.civil(3, 12, 31, 23, 58, 59) + datetime = DateTime.civil(2012, 12, 31, 23, 58, 59) time = datetime.to_time.utc - time.year.should == 3 + time.year.should == 2012 time.month.should == 12 time.day.should == 31 time.hour.should == 23 diff --git a/spec/ruby/library/time/to_datetime_spec.rb b/spec/ruby/library/time/to_datetime_spec.rb index 0e37a61108b9b9..2c1e060f0f4a47 100644 --- a/spec/ruby/library/time/to_datetime_spec.rb +++ b/spec/ruby/library/time/to_datetime_spec.rb @@ -3,9 +3,9 @@ describe "Time#to_datetime" do it "returns a DateTime representing the same instant" do - time = Time.utc(3, 12, 31, 23, 58, 59) + time = Time.utc(2012, 12, 31, 23, 58, 59) datetime = time.to_datetime - datetime.year.should == 3 + datetime.year.should == 2012 datetime.month.should == 12 datetime.day.should == 31 datetime.hour.should == 23 From 43239b23b48a6c3fde6bdc4b9cc568bedac161b2 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 8 Aug 2022 23:26:21 +0900 Subject: [PATCH 045/546] [Bug #18946] New tests for fixed Time/DateTime conversions --- spec/ruby/library/datetime/to_time_spec.rb | 13 +++++++++++++ spec/ruby/library/time/to_datetime_spec.rb | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/spec/ruby/library/datetime/to_time_spec.rb b/spec/ruby/library/datetime/to_time_spec.rb index a03ab678675703..88a7aaa7390afe 100644 --- a/spec/ruby/library/datetime/to_time_spec.rb +++ b/spec/ruby/library/datetime/to_time_spec.rb @@ -18,6 +18,19 @@ time.sec.should == 59 end + version_is(Date::VERSION, '3.2.3') do + it "returns a Time representing the same instant before Gregorian" do + datetime = DateTime.civil(1582, 10, 4, 23, 58, 59) + time = datetime.to_time.utc + time.year.should == 1582 + time.month.should == 10 + time.day.should == 14 + time.hour.should == 23 + time.min.should == 58 + time.sec.should == 59 + end + end + it "preserves the same time regardless of local time or zone" do date = DateTime.new(2012, 12, 24, 12, 23, 00, '+03:00') diff --git a/spec/ruby/library/time/to_datetime_spec.rb b/spec/ruby/library/time/to_datetime_spec.rb index 2c1e060f0f4a47..c5561535b288b8 100644 --- a/spec/ruby/library/time/to_datetime_spec.rb +++ b/spec/ruby/library/time/to_datetime_spec.rb @@ -13,6 +13,19 @@ datetime.sec.should == 59 end + version_is(Date::VERSION, '3.2.3') do + it "returns a DateTime representing the same instant before Gregorian" do + time = Time.utc(1582, 10, 14, 23, 58, 59) + datetime = time.to_datetime + datetime.year.should == 1582 + datetime.month.should == 10 + datetime.day.should == 4 + datetime.hour.should == 23 + datetime.min.should == 58 + datetime.sec.should == 59 + end + end + it "roundtrips" do time = Time.utc(3, 12, 31, 23, 58, 59) datetime = time.to_datetime From e07d450deae500422b7693a30c75c5b1567601a2 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 8 Aug 2022 22:43:56 +0900 Subject: [PATCH 046/546] [ruby/date] Fix Time#to_datetime before calendar reform Time is always in the proleptic Gregorian calendar. Also DateTime#to_time should convert to the Gregorian calendar first, before extracting its components. https://bugs.ruby-lang.org/issues/18946#change-98527 https://github.com/ruby/date/commit/b2aee75248 --- ext/date/date_core.c | 13 +++++++++---- test/date/test_date_conv.rb | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/ext/date/date_core.c b/ext/date/date_core.c index 1c0d1c4920c475..c68f70e5f88f95 100644 --- a/ext/date/date_core.c +++ b/ext/date/date_core.c @@ -8811,7 +8811,7 @@ time_to_datetime(VALUE self) ret = d_complex_new_internal(cDateTime, nth, 0, 0, sf, - of, DEFAULT_SG, + of, GREGORIAN, ry, m, d, h, min, s, HAVE_CIVIL | HAVE_TIME); @@ -8915,12 +8915,17 @@ date_to_datetime(VALUE self) static VALUE datetime_to_time(VALUE self) { - volatile VALUE dup = dup_obj(self); + get_d1(self); + + if (m_julian_p(dat)) { + self = d_lite_gregorian(self); + get_d1a(self); + dat = adat; + } + { VALUE t; - get_d1(dup); - t = rb_funcall(rb_cTime, rb_intern("new"), 7, diff --git a/test/date/test_date_conv.rb b/test/date/test_date_conv.rb index d41ff45d859a6b..ed478b41bb3c8d 100644 --- a/test/date/test_date_conv.rb +++ b/test/date/test_date_conv.rb @@ -77,6 +77,11 @@ def test_to_time__from_datetime assert_equal([2004, 9, 19, 1, 2, 3, 456789], [t.year, t.mon, t.mday, t.hour, t.min, t.sec, t.usec]) + d = DateTime.new(1582, 10, 3, 1, 2, 3, 0) + 456789.to_r/86400000000 + t = d.to_time.utc + assert_equal([1582, 10, 13, 1, 2, 3, 456789], + [t.year, t.mon, t.mday, t.hour, t.min, t.sec, t.usec]) + if Time.allocate.respond_to?(:nsec) d = DateTime.new(2004, 9, 19, 1, 2, 3, 0) + 456789123.to_r/86400000000000 t = d.to_time.utc @@ -100,6 +105,10 @@ def test_to_date__from_time t = Time.utc(2004, 9, 19, 1, 2, 3, 456789) d = t.to_date assert_equal([2004, 9, 19, 0], [d.year, d.mon, d.mday, d.day_fraction]) + + t = Time.utc(1582, 10, 13, 1, 2, 3, 456789) + d = t.to_date # using ITALY + assert_equal([1582, 10, 3, 0], [d.year, d.mon, d.mday, d.day_fraction]) end def test_to_date__from_date @@ -136,6 +145,14 @@ def test_to_datetime__from_time [d.year, d.mon, d.mday, d.hour, d.min, d.sec, d.sec_fraction, d.offset]) + t = Time.utc(1582, 10, 13, 1, 2, 3, 456789) + d = t.to_datetime # using ITALY + assert_equal([1582, 10, 3, 1, 2, 3, + 456789.to_r/1000000, + 0], + [d.year, d.mon, d.mday, d.hour, d.min, d.sec, + d.sec_fraction, d.offset]) + t = Time.now d = t.to_datetime require 'time' From 5c9ce5475736756891238a3e3fe58190b0f84f0c Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 8 Aug 2022 23:58:09 +0900 Subject: [PATCH 047/546] [ruby/date] bump up to 3.2.3 https://github.com/ruby/date/commit/dff37b3dd1 --- ext/date/lib/date.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/date/lib/date.rb b/ext/date/lib/date.rb index 88984d7bd295aa..18996114404173 100644 --- a/ext/date/lib/date.rb +++ b/ext/date/lib/date.rb @@ -4,7 +4,7 @@ require 'date_core' class Date - VERSION = '3.2.2' # :nodoc: + VERSION = "3.2.3" # :nodoc: # call-seq: # infinite? -> false From dcf0bb303288f9b7ebe6d625354f2c69500a1767 Mon Sep 17 00:00:00 2001 From: git Date: Mon, 8 Aug 2022 14:59:56 +0000 Subject: [PATCH 048/546] Update default gems list at 5c9ce5475736756891238a3e3fe581 [ci skip] --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index d3fee75302d250..7d42ff2f4dbd63 100644 --- a/NEWS.md +++ b/NEWS.md @@ -158,6 +158,7 @@ Note: We're only listing outstanding class updates. * bigdecimal 3.1.2 * bundler 2.4.0.dev * cgi 0.3.2 + * date 3.2.3 * etc 1.4.0 * io-console 0.5.11 * io-nonblock 0.1.1 From 1602443c985975cfc0f6f0b83ae6ac661a3c93bc Mon Sep 17 00:00:00 2001 From: git Date: Tue, 9 Aug 2022 00:00:15 +0900 Subject: [PATCH 049/546] * 2022-08-09 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index f4418ba47b5b02..4b9ca6f5ff2cbc 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 8 +#define RUBY_RELEASE_DAY 9 #include "ruby/version.h" #include "ruby/internal/abi.h" From c851bced390204d6624e58c47941f34d0858b352 Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Fri, 22 Jul 2022 15:49:05 -0400 Subject: [PATCH 050/546] [ruby/psych] Clarify tests about parsing aliases https://github.com/ruby/psych/commit/0bc30cb4cb --- test/psych/test_safe_load.rb | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/test/psych/test_safe_load.rb b/test/psych/test_safe_load.rb index b52d6048b30b9a..e57dbcb2f56152 100644 --- a/test/psych/test_safe_load.rb +++ b/test/psych/test_safe_load.rb @@ -19,18 +19,31 @@ class Foo; end end end - def test_no_recursion - x = [] - x << x + def test_raises_when_alias_found_if_alias_parsing_not_enabled + yaml_with_aliases = <<~YAML + --- + a: &ABC + k1: v1 + k2: v2 + b: *ABC + YAML + assert_raise(Psych::BadAlias) do - Psych.safe_load Psych.dump(x) + Psych.safe_load(yaml_with_aliases) end end - def test_explicit_recursion - x = [] - x << x - assert_equal(x, Psych.safe_load(Psych.dump(x), permitted_classes: [], permitted_symbols: [], aliases: true)) + def test_aliases_are_parsed_when_alias_parsing_is_enabled + yaml_with_aliases = <<~YAML + --- + a: &ABC + k1: v1 + k2: v2 + b: *ABC + YAML + + result = Psych.safe_load(yaml_with_aliases, aliases: true) + assert_same result.fetch("a"), result.fetch("b") end def test_permitted_symbol From 0b7cfdca09651f2eae5cd0e8ae4efed5033493f2 Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Fri, 22 Jul 2022 16:45:03 -0400 Subject: [PATCH 051/546] [ruby/psych] Test that recursive refs dump as aliases https://github.com/ruby/psych/commit/d9f7289190 --- test/psych/test_array.rb | 13 +++++++++++++ test/psych/test_hash.rb | 12 ++++++++++++ test/psych/test_object.rb | 12 ++++++++++++ 3 files changed, 37 insertions(+) diff --git a/test/psych/test_array.rb b/test/psych/test_array.rb index 28b76da785e8ef..a6be0baf2fe820 100644 --- a/test/psych/test_array.rb +++ b/test/psych/test_array.rb @@ -57,6 +57,19 @@ def test_self_referential assert_cycle(@list) end + def test_recursive_array_uses_alias + @list << @list + + expected = <<~eoyaml + --- &1 + - :a: b + - foo + - *1 + eoyaml + + assert_equal expected, Psych.dump(@list) + end + def test_cycle assert_cycle(@list) end diff --git a/test/psych/test_hash.rb b/test/psych/test_hash.rb index 5374781339209c..43e4b8bf14c3d5 100644 --- a/test/psych/test_hash.rb +++ b/test/psych/test_hash.rb @@ -112,6 +112,18 @@ def test_ref_append assert_equal({"foo"=>{"hello"=>"world"}, "bar"=>{"hello"=>"world"}}, hash) end + def test_recursive_hash_uses_alias + h = { } + h["recursive_reference"] = h + + expected = <<~eoyaml + --- &1 + recursive_reference: *1 + eoyaml + + assert_equal(expected, Psych.dump(h)) + end + def test_key_deduplication unless String.method_defined?(:-@) && (-("a" * 20)).equal?((-("a" * 20))) pend "This Ruby implementation doesn't support string deduplication" diff --git a/test/psych/test_object.rb b/test/psych/test_object.rb index 0faf6b244db2e5..648a3ca6a5b32b 100644 --- a/test/psych/test_object.rb +++ b/test/psych/test_object.rb @@ -41,5 +41,17 @@ def test_cyclic_references assert_instance_of(Foo, loaded) assert_equal loaded, loaded.parent end + + def test_cyclic_reference_uses_alias + foo = Foo.new(nil) + foo.parent = foo + + expected = <<~eoyaml + --- &1 !ruby/object:Psych::Foo + parent: *1 + eoyaml + + assert_equal expected, Psych.dump(foo) + end end end From 71f89c287459d5ca313d0b1a16b7a743e0d71b8b Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Wed, 27 Jul 2022 10:19:37 -0400 Subject: [PATCH 052/546] [ruby/psych] Don't hardcode expected alias names https://github.com/ruby/psych/commit/b9ab19094f --- test/psych/test_array.rb | 19 +++++++++++-------- test/psych/test_hash.rb | 18 ++++++++++++------ test/psych/test_object.rb | 13 +++++-------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/test/psych/test_array.rb b/test/psych/test_array.rb index a6be0baf2fe820..6a9931ab2ff939 100644 --- a/test/psych/test_array.rb +++ b/test/psych/test_array.rb @@ -57,17 +57,20 @@ def test_self_referential assert_cycle(@list) end - def test_recursive_array_uses_alias + def test_recursive_array @list << @list - expected = <<~eoyaml - --- &1 - - :a: b - - foo - - *1 - eoyaml + loaded = Psych.load(Psych.dump(@list), aliases: true) + + assert_same loaded, loaded.last + end + + def test_recursive_array_uses_alias + @list << @list - assert_equal expected, Psych.dump(@list) + assert_raise(BadAlias) do + Psych.load(Psych.dump(@list), aliases: false) + end end def test_cycle diff --git a/test/psych/test_hash.rb b/test/psych/test_hash.rb index 43e4b8bf14c3d5..0555f6e0346fd2 100644 --- a/test/psych/test_hash.rb +++ b/test/psych/test_hash.rb @@ -112,16 +112,22 @@ def test_ref_append assert_equal({"foo"=>{"hello"=>"world"}, "bar"=>{"hello"=>"world"}}, hash) end - def test_recursive_hash_uses_alias + def test_recursive_hash h = { } h["recursive_reference"] = h - expected = <<~eoyaml - --- &1 - recursive_reference: *1 - eoyaml + loaded = Psych.load(Psych.dump(h), aliases: true) + + assert_same loaded, loaded.fetch("recursive_reference") + end + + def test_recursive_hash_uses_alias + h = { } + h["recursive_reference"] = h - assert_equal(expected, Psych.dump(h)) + assert_raise(BadAlias) do + Psych.load(Psych.dump(h), aliases: false) + end end def test_key_deduplication diff --git a/test/psych/test_object.rb b/test/psych/test_object.rb index 648a3ca6a5b32b..227a1d1d53953a 100644 --- a/test/psych/test_object.rb +++ b/test/psych/test_object.rb @@ -36,22 +36,19 @@ def test_tag_round_trip def test_cyclic_references foo = Foo.new(nil) foo.parent = foo - loaded = Psych.unsafe_load Psych.dump foo + loaded = Psych.load(Psych.dump(foo), permitted_classes: [Foo], aliases: true) assert_instance_of(Foo, loaded) - assert_equal loaded, loaded.parent + assert_same loaded, loaded.parent end def test_cyclic_reference_uses_alias foo = Foo.new(nil) foo.parent = foo - expected = <<~eoyaml - --- &1 !ruby/object:Psych::Foo - parent: *1 - eoyaml - - assert_equal expected, Psych.dump(foo) + assert_raise(BadAlias) do + Psych.load(Psych.dump(foo), permitted_classes: [Foo], aliases: false) + end end end end From 54219ae8c46bc431782caf01142883ce7e8b970b Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Thu, 21 Jul 2022 15:07:39 -0400 Subject: [PATCH 053/546] [ruby/psych] Raise specific error when aliases are not enabled https://github.com/ruby/psych/commit/0c11ddcf46 --- ext/psych/lib/psych.rb | 2 +- ext/psych/lib/psych/exception.rb | 7 +++++++ ext/psych/lib/psych/visitors/to_ruby.rb | 2 +- test/psych/helper.rb | 6 +++--- test/psych/test_array.rb | 2 +- test/psych/test_hash.rb | 2 +- test/psych/test_merge_keys.rb | 2 +- test/psych/test_object.rb | 2 +- test/psych/test_safe_load.rb | 2 +- 9 files changed, 17 insertions(+), 10 deletions(-) diff --git a/ext/psych/lib/psych.rb b/ext/psych/lib/psych.rb index 42d79efb832d43..4a2ab58514cbc0 100644 --- a/ext/psych/lib/psych.rb +++ b/ext/psych/lib/psych.rb @@ -307,7 +307,7 @@ def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: fals # A Psych::DisallowedClass exception will be raised if the yaml contains a # class that isn't in the +permitted_classes+ list. # - # A Psych::BadAlias exception will be raised if the yaml contains aliases + # A Psych::AliasesNotEnabled exception will be raised if the yaml contains aliases # but the +aliases+ keyword argument is set to false. # # +filename+ will be used in the exception message if any exception is raised diff --git a/ext/psych/lib/psych/exception.rb b/ext/psych/lib/psych/exception.rb index f473b95a3bbb6f..04a9a906a4189c 100644 --- a/ext/psych/lib/psych/exception.rb +++ b/ext/psych/lib/psych/exception.rb @@ -6,6 +6,13 @@ class Exception < RuntimeError class BadAlias < Exception end + # Subclasses `BadAlias` for backwards compatibility + class AliasesNotEnabled < BadAlias + def initialize + super "Alias parsing was not enabled. To enable it, pass `aliases: true` to `Psych::load` or `Psych::safe_load`." + end + end + class DisallowedClass < Exception def initialize action, klass_name super "Tried to #{action} unspecified class: #{klass_name}" diff --git a/ext/psych/lib/psych/visitors/to_ruby.rb b/ext/psych/lib/psych/visitors/to_ruby.rb index 935bc74f21c347..0bf5198ccc3fd7 100644 --- a/ext/psych/lib/psych/visitors/to_ruby.rb +++ b/ext/psych/lib/psych/visitors/to_ruby.rb @@ -427,7 +427,7 @@ def resolve_class klassname class NoAliasRuby < ToRuby def visit_Psych_Nodes_Alias o - raise BadAlias, "Unknown alias: #{o.anchor}" + raise AliasesNotEnabled end end end diff --git a/test/psych/helper.rb b/test/psych/helper.rb index 0643139d8c0cff..4e82887c6d1c2c 100644 --- a/test/psych/helper.rb +++ b/test/psych/helper.rb @@ -51,7 +51,7 @@ def assert_to_yaml( obj, yaml, loader = :load ) :UseVersion => true, :UseHeader => true, :SortKeys => true ) )) - rescue Psych::DisallowedClass, Psych::BadAlias + rescue Psych::DisallowedClass, Psych::BadAlias, Psych::AliasesNotEnabled assert_to_yaml obj, yaml, :unsafe_load end @@ -61,7 +61,7 @@ def assert_to_yaml( obj, yaml, loader = :load ) def assert_parse_only( obj, yaml ) begin assert_equal obj, Psych::load( yaml ) - rescue Psych::DisallowedClass, Psych::BadAlias + rescue Psych::DisallowedClass, Psych::BadAlias, Psych::AliasesNotEnabled assert_equal obj, Psych::unsafe_load( yaml ) end assert_equal obj, Psych::parse( yaml ).transform @@ -79,7 +79,7 @@ def assert_cycle( obj ) assert_equal(obj, Psych.load(v.tree.yaml)) assert_equal(obj, Psych::load(Psych.dump(obj))) assert_equal(obj, Psych::load(obj.to_yaml)) - rescue Psych::DisallowedClass, Psych::BadAlias + rescue Psych::DisallowedClass, Psych::BadAlias, Psych::AliasesNotEnabled assert_equal(obj, Psych.unsafe_load(v.tree.yaml)) assert_equal(obj, Psych::unsafe_load(Psych.dump(obj))) assert_equal(obj, Psych::unsafe_load(obj.to_yaml)) diff --git a/test/psych/test_array.rb b/test/psych/test_array.rb index 6a9931ab2ff939..0dc82439d44c90 100644 --- a/test/psych/test_array.rb +++ b/test/psych/test_array.rb @@ -68,7 +68,7 @@ def test_recursive_array def test_recursive_array_uses_alias @list << @list - assert_raise(BadAlias) do + assert_raise(AliasesNotEnabled) do Psych.load(Psych.dump(@list), aliases: false) end end diff --git a/test/psych/test_hash.rb b/test/psych/test_hash.rb index 0555f6e0346fd2..0efa21160f2082 100644 --- a/test/psych/test_hash.rb +++ b/test/psych/test_hash.rb @@ -125,7 +125,7 @@ def test_recursive_hash_uses_alias h = { } h["recursive_reference"] = h - assert_raise(BadAlias) do + assert_raise(AliasesNotEnabled) do Psych.load(Psych.dump(h), aliases: false) end end diff --git a/test/psych/test_merge_keys.rb b/test/psych/test_merge_keys.rb index dcf4f1fce3ffae..8d2fceabf5112f 100644 --- a/test/psych/test_merge_keys.rb +++ b/test/psych/test_merge_keys.rb @@ -117,7 +117,7 @@ def test_missing_merge_key bar: << : *foo eoyml - exp = assert_raise(Psych::BadAlias) { Psych.load yaml } + exp = assert_raise(Psych::BadAlias) { Psych.load(yaml, aliases: true) } assert_match 'foo', exp.message end diff --git a/test/psych/test_object.rb b/test/psych/test_object.rb index 227a1d1d53953a..21c27794ea5d4e 100644 --- a/test/psych/test_object.rb +++ b/test/psych/test_object.rb @@ -46,7 +46,7 @@ def test_cyclic_reference_uses_alias foo = Foo.new(nil) foo.parent = foo - assert_raise(BadAlias) do + assert_raise(AliasesNotEnabled) do Psych.load(Psych.dump(foo), permitted_classes: [Foo], aliases: false) end end diff --git a/test/psych/test_safe_load.rb b/test/psych/test_safe_load.rb index e57dbcb2f56152..a9ed73752816d2 100644 --- a/test/psych/test_safe_load.rb +++ b/test/psych/test_safe_load.rb @@ -28,7 +28,7 @@ def test_raises_when_alias_found_if_alias_parsing_not_enabled b: *ABC YAML - assert_raise(Psych::BadAlias) do + assert_raise(Psych::AliasesNotEnabled) do Psych.safe_load(yaml_with_aliases) end end From ea1efdf32faf2cf35f6db1ec426c277bfa8f0461 Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Fri, 22 Jul 2022 16:09:13 -0400 Subject: [PATCH 054/546] [ruby/psych] Add test for anchor reuse The spec calls this a "reuse" of an anchor https://yaml.org/spec/1.2.2/#71-alias-nodes https://github.com/ruby/psych/commit/57e3b70a56 --- test/psych/test_hash.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/psych/test_hash.rb b/test/psych/test_hash.rb index 0efa21160f2082..6c45da53b7017d 100644 --- a/test/psych/test_hash.rb +++ b/test/psych/test_hash.rb @@ -112,6 +112,17 @@ def test_ref_append assert_equal({"foo"=>{"hello"=>"world"}, "bar"=>{"hello"=>"world"}}, hash) end + def test_anchor_reuse + hash = Psych.unsafe_load(<<~eoyml) + --- + foo: &foo + hello: world + bar: *foo + eoyml + assert_equal({"foo"=>{"hello"=>"world"}, "bar"=>{"hello"=>"world"}}, hash) + assert_same(hash.fetch("foo"), hash.fetch("bar")) + end + def test_recursive_hash h = { } h["recursive_reference"] = h From 38ea6b30dc409b43e4ac0758eeb352eecfb0d1da Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Fri, 22 Jul 2022 16:09:26 -0400 Subject: [PATCH 055/546] [ruby/psych] Add test for missing anchor https://github.com/ruby/psych/commit/5f08137ae6 --- test/psych/test_hash.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/psych/test_hash.rb b/test/psych/test_hash.rb index 6c45da53b7017d..e5392dc4d95d62 100644 --- a/test/psych/test_hash.rb +++ b/test/psych/test_hash.rb @@ -123,6 +123,17 @@ def test_anchor_reuse assert_same(hash.fetch("foo"), hash.fetch("bar")) end + def test_raises_if_anchor_not_defined + assert_raise(Psych::BadAlias) do + Psych.unsafe_load(<<~eoyml) + --- + foo: &foo + hello: world + bar: *not_foo + eoyml + end + end + def test_recursive_hash h = { } h["recursive_reference"] = h From 4b9cdf4e1cbb87c4f81edef472c586cd321e924e Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Fri, 22 Jul 2022 16:09:38 -0400 Subject: [PATCH 056/546] [ruby/psych] Update to squiggly heredocs in the file https://github.com/ruby/psych/commit/42b43de997 --- test/psych/test_hash.rb | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test/psych/test_hash.rb b/test/psych/test_hash.rb index e5392dc4d95d62..88b520ebfc5a15 100644 --- a/test/psych/test_hash.rb +++ b/test/psych/test_hash.rb @@ -102,13 +102,13 @@ def test_cycles end def test_ref_append - hash = Psych.unsafe_load(<<-eoyml) ---- -foo: &foo - hello: world -bar: - <<: *foo -eoyml + hash = Psych.unsafe_load(<<~eoyml) + --- + foo: &foo + hello: world + bar: + <<: *foo + eoyml assert_equal({"foo"=>{"hello"=>"world"}, "bar"=>{"hello"=>"world"}}, hash) end @@ -157,11 +157,11 @@ def test_key_deduplication pend "This Ruby implementation doesn't support string deduplication" end - hashes = Psych.load(<<-eoyml) ---- -- unique_identifier: 1 -- unique_identifier: 2 -eoyml + hashes = Psych.load(<<~eoyml) + --- + - unique_identifier: 1 + - unique_identifier: 2 + eoyml assert_same hashes[0].keys.first, hashes[1].keys.first end From 9c13a6ce5ff1f83489d9df65a5c7745efe16c8a4 Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Fri, 22 Jul 2022 16:16:04 -0400 Subject: [PATCH 057/546] [ruby/psych] Raise specific error when an anchor isn't defined https://github.com/ruby/psych/commit/98fbd5247a --- ext/psych/lib/psych/exception.rb | 7 +++++++ ext/psych/lib/psych/visitors/to_ruby.rb | 2 +- test/psych/test_hash.rb | 2 +- test/psych/test_merge_keys.rb | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/ext/psych/lib/psych/exception.rb b/ext/psych/lib/psych/exception.rb index 04a9a906a4189c..d7469a4b308a33 100644 --- a/ext/psych/lib/psych/exception.rb +++ b/ext/psych/lib/psych/exception.rb @@ -13,6 +13,13 @@ def initialize end end + # Subclasses `BadAlias` for backwards compatibility + class AnchorNotDefined < BadAlias + def initialize anchor_name + super "An alias referenced an unknown anchor: #{anchor_name}" + end + end + class DisallowedClass < Exception def initialize action, klass_name super "Tried to #{action} unspecified class: #{klass_name}" diff --git a/ext/psych/lib/psych/visitors/to_ruby.rb b/ext/psych/lib/psych/visitors/to_ruby.rb index 0bf5198ccc3fd7..cce5daf3bbdbfe 100644 --- a/ext/psych/lib/psych/visitors/to_ruby.rb +++ b/ext/psych/lib/psych/visitors/to_ruby.rb @@ -323,7 +323,7 @@ def visit_Psych_Nodes_Stream o end def visit_Psych_Nodes_Alias o - @st.fetch(o.anchor) { raise BadAlias, "Unknown alias: #{o.anchor}" } + @st.fetch(o.anchor) { raise AnchorNotDefined, o.anchor } end private diff --git a/test/psych/test_hash.rb b/test/psych/test_hash.rb index 88b520ebfc5a15..31eba8580bb96f 100644 --- a/test/psych/test_hash.rb +++ b/test/psych/test_hash.rb @@ -124,7 +124,7 @@ def test_anchor_reuse end def test_raises_if_anchor_not_defined - assert_raise(Psych::BadAlias) do + assert_raise(Psych::AnchorNotDefined) do Psych.unsafe_load(<<~eoyml) --- foo: &foo diff --git a/test/psych/test_merge_keys.rb b/test/psych/test_merge_keys.rb index 8d2fceabf5112f..2f55a1ed8ecbf1 100644 --- a/test/psych/test_merge_keys.rb +++ b/test/psych/test_merge_keys.rb @@ -117,7 +117,7 @@ def test_missing_merge_key bar: << : *foo eoyml - exp = assert_raise(Psych::BadAlias) { Psych.load(yaml, aliases: true) } + exp = assert_raise(Psych::AnchorNotDefined) { Psych.load(yaml, aliases: true) } assert_match 'foo', exp.message end From 78bc2aae7ff3a66f849de79bdd10e8d9dc205ec8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Aug 2022 15:19:34 +0000 Subject: [PATCH 058/546] [rubygems/rubygems] Bump rb-sys Bumps [rb-sys](https://github.com/oxidize-rb/rb-sys) from 0.9.28 to 0.9.29. - [Release notes](https://github.com/oxidize-rb/rb-sys/releases) - [Commits](https://github.com/oxidize-rb/rb-sys/compare/v0.9.28...v0.9.29) --- updated-dependencies: - dependency-name: rb-sys dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] https://github.com/rubygems/rubygems/commit/d5d96f6bae --- .../rust_ruby_example/Cargo.lock | 8 ++++---- .../rust_ruby_example/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock index 0a95dceca232fc..e3518198483f6e 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock @@ -153,9 +153,9 @@ dependencies = [ [[package]] name = "rb-sys" -version = "0.9.28" +version = "0.9.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7df1d7911fef801edda0b789cca202f3486dff5073eb13dbb85b4715e6f94a" +checksum = "0317cb843cdeef14c5622917c55c0a170cee31348eb600c4a1683fb8c9e87e7a" dependencies = [ "bindgen", "linkify", @@ -164,9 +164,9 @@ dependencies = [ [[package]] name = "rb-sys-build" -version = "0.9.28" +version = "0.9.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa9b908035cb531820f8f3977c538c318308cfaa77da1c6b436577c06db230" +checksum = "c4b8274327aecb7edcff86e290d9cbe7b572b7889c1cfc7476358f4831f78ce5" dependencies = [ "regex", "shell-words", diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml index 7cced882e98132..1867db8e66e16c 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -rb-sys = { version = "0.9.28", features = ["gem"] } +rb-sys = { version = "0.9.29", features = ["gem"] } From 79fdf9712dea4943a15c4ef34348b1a159b62f4a Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 9 Aug 2022 03:07:59 +0900 Subject: [PATCH 059/546] Tentatively exclude the test that triggers ruby/psych#572 --- test/excludes/Psych/TestDateTime.rb | 1 + 1 file changed, 1 insertion(+) create mode 100644 test/excludes/Psych/TestDateTime.rb diff --git a/test/excludes/Psych/TestDateTime.rb b/test/excludes/Psych/TestDateTime.rb new file mode 100644 index 00000000000000..1188b4d4a61a69 --- /dev/null +++ b/test/excludes/Psych/TestDateTime.rb @@ -0,0 +1 @@ +exclude(:test_new_datetime, "Psych loses the start date") From 35b9cd1def3360ea0f715eed5a2a702d61e71acc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Aug 2022 16:51:28 +0000 Subject: [PATCH 060/546] [rubygems/rubygems] Bump rb-sys in /test/rubygems/test_gem_ext_cargo_builder/custom_name Bumps [rb-sys](https://github.com/oxidize-rb/rb-sys) from 0.9.28 to 0.9.29. - [Release notes](https://github.com/oxidize-rb/rb-sys/releases) - [Commits](https://github.com/oxidize-rb/rb-sys/compare/v0.9.28...v0.9.29) --- updated-dependencies: - dependency-name: rb-sys dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] https://github.com/rubygems/rubygems/commit/77a945f0e8 --- .../test_gem_ext_cargo_builder/custom_name/Cargo.lock | 8 ++++---- .../test_gem_ext_cargo_builder/custom_name/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock index 86221bf249e295..da09e717f06391 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock @@ -160,9 +160,9 @@ dependencies = [ [[package]] name = "rb-sys" -version = "0.9.28" +version = "0.9.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7df1d7911fef801edda0b789cca202f3486dff5073eb13dbb85b4715e6f94a" +checksum = "0317cb843cdeef14c5622917c55c0a170cee31348eb600c4a1683fb8c9e87e7a" dependencies = [ "bindgen", "linkify", @@ -171,9 +171,9 @@ dependencies = [ [[package]] name = "rb-sys-build" -version = "0.9.28" +version = "0.9.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa9b908035cb531820f8f3977c538c318308cfaa77da1c6b436577c06db230" +checksum = "c4b8274327aecb7edcff86e290d9cbe7b572b7889c1cfc7476358f4831f78ce5" dependencies = [ "regex", "shell-words", diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml index ae32c194b2959f..2a215a55dd6b35 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -rb-sys = { version = "0.9.28", features = ["gem"] } +rb-sys = { version = "0.9.29", features = ["gem"] } From f8936b3341376948112e31f9e9b0cb3ad6e91e7c Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 8 Aug 2022 19:02:28 -0700 Subject: [PATCH 061/546] Remove documentation about dev dev is Shopify's internal tool that doesn't work if you use Intel Homebrew on M1 (or rbenv, btw). Now that we maintain this outside Shopify's repository, we should stop talking about it here. --- doc/yjit/yjit.md | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md index fd45096beb128b..f879e227adb9d0 100644 --- a/doc/yjit/yjit.md +++ b/doc/yjit/yjit.md @@ -347,6 +347,5 @@ While in your i386 shell, install Cargo and Homebrew, then hack away! 1. You must install a version of Homebrew for each architecture 2. Cargo will install in $HOME/.cargo by default, and I don't know a good way to change architectures after install -3. `dev` won't work if you have i386 Homebrew installed on an M1 If you use Fish shell you can [read this link](https://tenderlovemaking.com/2022/01/07/homebrew-rosetta-and-ruby.html) for information on making the dev environment easier. From 44264b4fee1e208e759710c39271186ff9856b40 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Tue, 9 Aug 2022 11:16:07 +0900 Subject: [PATCH 062/546] Merge rubygems/bundler HEAD. Pick from https://github.com/rubygems/rubygems/commit/dfbb5a38114640e0d8d616861607f3de73ee0199 --- lib/bundler/cli.rb | 2 +- lib/bundler/definition.rb | 8 ++-- lib/bundler/fetcher.rb | 12 ++--- lib/bundler/ruby_dsl.rb | 2 +- lib/rubygems.rb | 14 +++--- lib/rubygems/available_set.rb | 6 +-- lib/rubygems/basic_specification.rb | 4 +- lib/rubygems/command.rb | 12 ++--- lib/rubygems/commands/cert_command.rb | 2 +- lib/rubygems/commands/cleanup_command.rb | 2 +- lib/rubygems/commands/contents_command.rb | 2 +- lib/rubygems/commands/dependency_command.rb | 8 ++-- lib/rubygems/commands/fetch_command.rb | 4 +- .../commands/generate_index_command.rb | 4 +- lib/rubygems/commands/install_command.rb | 14 +++--- lib/rubygems/commands/pristine_command.rb | 10 ++-- lib/rubygems/commands/setup_command.rb | 8 ++-- lib/rubygems/commands/sources_command.rb | 6 +-- .../commands/specification_command.rb | 4 +- lib/rubygems/commands/uninstall_command.rb | 8 ++-- lib/rubygems/commands/update_command.rb | 10 ++-- lib/rubygems/commands/which_command.rb | 2 +- lib/rubygems/config_file.rb | 12 ++--- lib/rubygems/core_ext/kernel_warn.rb | 2 +- lib/rubygems/defaults.rb | 2 +- lib/rubygems/dependency.rb | 10 ++-- lib/rubygems/dependency_installer.rb | 10 ++-- lib/rubygems/dependency_list.rb | 8 ++-- lib/rubygems/doctor.rb | 8 ++-- lib/rubygems/exceptions.rb | 2 +- lib/rubygems/ext/ext_conf_builder.rb | 4 +- lib/rubygems/gemcutter_utilities.rb | 2 +- lib/rubygems/indexer.rb | 6 +-- lib/rubygems/installer.rb | 8 ++-- lib/rubygems/name_tuple.rb | 6 +-- lib/rubygems/package.rb | 6 +-- lib/rubygems/package/tar_header.rb | 34 ++++++------- lib/rubygems/package/tar_writer.rb | 2 +- lib/rubygems/platform.rb | 30 ++++++------ lib/rubygems/query_utils.rb | 12 ++--- lib/rubygems/remote_fetcher.rb | 6 +-- lib/rubygems/request.rb | 2 +- lib/rubygems/request/connection_pools.rb | 4 +- lib/rubygems/request/http_pool.rb | 2 +- lib/rubygems/request_set.rb | 6 +-- .../request_set/gem_dependency_api.rb | 6 +-- lib/rubygems/request_set/lockfile/parser.rb | 24 +++++----- .../request_set/lockfile/tokenizer.rb | 4 +- lib/rubygems/requirement.rb | 2 +- lib/rubygems/resolver.rb | 8 ++-- lib/rubygems/resolver/api_specification.rb | 8 ++-- lib/rubygems/resolver/best_set.rb | 6 +-- lib/rubygems/resolver/conflict.rb | 6 +-- lib/rubygems/resolver/git_specification.rb | 6 +-- .../resolver/installed_specification.rb | 4 +- lib/rubygems/resolver/installer_set.rb | 19 +++----- lib/rubygems/resolver/lock_set.rb | 2 +- lib/rubygems/resolver/lock_specification.rb | 2 +- lib/rubygems/resolver/vendor_specification.rb | 6 +-- lib/rubygems/security.rb | 2 +- lib/rubygems/security/policy.rb | 12 ++--- lib/rubygems/security/signer.rb | 2 +- lib/rubygems/source.rb | 2 +- lib/rubygems/source/git.rb | 8 ++-- lib/rubygems/spec_fetcher.rb | 2 +- lib/rubygems/specification.rb | 48 +++++++++---------- lib/rubygems/specification_policy.rb | 8 ++-- lib/rubygems/uninstaller.rb | 8 ++-- lib/rubygems/user_interaction.rb | 4 +- lib/rubygems/version.rb | 2 +- test/rubygems/helper.rb | 10 ++-- .../rubygems/test_gem_dependency_installer.rb | 4 +- .../custom_name/build.rb | 2 +- .../rust_ruby_example/build.rb | 2 +- test/rubygems/test_gem_installer.rb | 4 +- test/rubygems/test_gem_package_tar_reader.rb | 2 +- .../test_gem_resolver_installer_set.rb | 31 ++++++++++++ test/rubygems/test_gem_specification.rb | 4 +- test/rubygems/test_require.rb | 2 +- test/rubygems/utilities.rb | 8 ++-- 80 files changed, 312 insertions(+), 286 deletions(-) diff --git a/lib/bundler/cli.rb b/lib/bundler/cli.rb index 0fa646c8ea823c..5bf0f4fa3aeb82 100644 --- a/lib/bundler/cli.rb +++ b/lib/bundler/cli.rb @@ -238,7 +238,7 @@ def remove(*gems) "Install to the system location ($BUNDLE_PATH or $GEM_HOME) even if the bundle was previously installed somewhere else for this application" method_option "trust-policy", :alias => "P", :type => :string, :banner => "Gem trust policy (like gem install -P). Must be one of " + - Bundler.rubygems.security_policy_keys.join("|") + Bundler.rubygems.security_policy_keys.join("|") method_option "without", :type => :array, :banner => "Exclude gems that are part of the specified named group." method_option "with", :type => :array, :banner => diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 38bd01f08f8ea7..21c06e55bab503 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -885,10 +885,10 @@ def additional_base_requirements_for_resolve def remove_ruby_from_platforms_if_necessary!(dependencies) return if Bundler.frozen_bundle? || - Bundler.local_platform == Gem::Platform::RUBY || - !platforms.include?(Gem::Platform::RUBY) || - (@new_platform && platforms.last == Gem::Platform::RUBY) || - !@originally_locked_specs.incomplete_ruby_specs?(dependencies) + Bundler.local_platform == Gem::Platform::RUBY || + !platforms.include?(Gem::Platform::RUBY) || + (@new_platform && platforms.last == Gem::Platform::RUBY) || + !@originally_locked_specs.incomplete_ruby_specs?(dependencies) remove_platform(Gem::Platform::RUBY) add_current_platform diff --git a/lib/bundler/fetcher.rb b/lib/bundler/fetcher.rb index e9d5dd505ca4c4..e399a50cfd98ba 100644 --- a/lib/bundler/fetcher.rb +++ b/lib/bundler/fetcher.rb @@ -240,8 +240,8 @@ def cis def connection @connection ||= begin needs_ssl = remote_uri.scheme == "https" || - Bundler.settings[:ssl_verify_mode] || - Bundler.settings[:ssl_client_cert] + Bundler.settings[:ssl_verify_mode] || + Bundler.settings[:ssl_client_cert] raise SSLError if needs_ssl && !defined?(OpenSSL::SSL) con = PersistentHTTP.new :name => "bundler", :proxy => :ENV @@ -256,8 +256,8 @@ def connection end ssl_client_cert = Bundler.settings[:ssl_client_cert] || - (Gem.configuration.ssl_client_cert if - Gem.configuration.respond_to?(:ssl_client_cert)) + (Gem.configuration.ssl_client_cert if + Gem.configuration.respond_to?(:ssl_client_cert)) if ssl_client_cert pem = File.read(ssl_client_cert) con.cert = OpenSSL::X509::Certificate.new(pem) @@ -288,8 +288,8 @@ def gemspec_cached_path(spec_file_name) def bundler_cert_store store = OpenSSL::X509::Store.new ssl_ca_cert = Bundler.settings[:ssl_ca_cert] || - (Gem.configuration.ssl_ca_cert if - Gem.configuration.respond_to?(:ssl_ca_cert)) + (Gem.configuration.ssl_ca_cert if + Gem.configuration.respond_to?(:ssl_ca_cert)) if ssl_ca_cert if File.directory? ssl_ca_cert store.add_path ssl_ca_cert diff --git a/lib/bundler/ruby_dsl.rb b/lib/bundler/ruby_dsl.rb index f6ba220cd55904..3b3a0583a5bc85 100644 --- a/lib/bundler/ruby_dsl.rb +++ b/lib/bundler/ruby_dsl.rb @@ -9,7 +9,7 @@ def ruby(*ruby_version) raise GemfileError, "Please define :engine" if options[:engine_version] && options[:engine].nil? if options[:engine] == "ruby" && options[:engine_version] && - ruby_version != Array(options[:engine_version]) + ruby_version != Array(options[:engine_version]) raise GemfileEvalError, "ruby_version must match the :engine_version for MRI" end @ruby_version = RubyVersion.new(ruby_version, options[:patchlevel], options[:engine], options[:engine_version]) diff --git a/lib/rubygems.rb b/lib/rubygems.rb index b21f00acc74b90..915a899f38edfb 100644 --- a/lib/rubygems.rb +++ b/lib/rubygems.rb @@ -121,7 +121,7 @@ module Gem # When https://bugs.ruby-lang.org/issues/17259 is available, there is no need to override Kernel#warn KERNEL_WARN_IGNORES_INTERNAL_ENTRIES = RUBY_ENGINE == "truffleruby" || - (RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.0") + (RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.0") ## # An Array of Regexps that match windows Ruby platforms. @@ -741,8 +741,8 @@ def self.pre_uninstall(&hook) def self.prefix prefix = File.dirname RUBYGEMS_DIR - if prefix != File.expand_path(RbConfig::CONFIG["sitelibdir"]) and - prefix != File.expand_path(RbConfig::CONFIG["libdir"]) and + if prefix != File.expand_path(RbConfig::CONFIG["sitelibdir"]) && + prefix != File.expand_path(RbConfig::CONFIG["libdir"]) && "lib" == File.basename(RUBYGEMS_DIR) prefix end @@ -845,8 +845,8 @@ def self.latest_spec_for(name) # Returns the latest release version of RubyGems. def self.latest_rubygems_version - latest_version_for("rubygems-update") or - raise "Can't find 'rubygems-update' in any repo. Check `gem source list`." + latest_version_for("rubygems-update") || + raise("Can't find 'rubygems-update' in any repo. Check `gem source list`.") end ## @@ -854,7 +854,7 @@ def self.latest_rubygems_version def self.latest_version_for(name) spec = latest_spec_for name - spec and spec.version + spec && spec.version end ## @@ -944,7 +944,7 @@ def self.suffixes ".rb", *%w[DLEXT DLEXT2].map do |key| val = RbConfig::CONFIG[key] - next unless val and not val.empty? + next unless val && !val.empty? ".#{val}" end, ].compact.uniq diff --git a/lib/rubygems/available_set.rb b/lib/rubygems/available_set.rb index 499483d9e94cd4..58b601f6b09dcf 100644 --- a/lib/rubygems/available_set.rb +++ b/lib/rubygems/available_set.rb @@ -26,7 +26,7 @@ def <<(o) s = o.set when Array s = o.map do |sp,so| - if !sp.kind_of?(Gem::Specification) or !so.kind_of?(Gem::Source) + if !sp.kind_of?(Gem::Specification) || !so.kind_of?(Gem::Source) raise TypeError, "Array must be in [[spec, source], ...] form" end @@ -149,8 +149,8 @@ def remove_installed!(dep) @set.reject! do |t| # already locally installed Gem::Specification.any? do |installed_spec| - dep.name == installed_spec.name and - dep.requirement.satisfied_by? installed_spec.version + dep.name == installed_spec.name && + dep.requirement.satisfied_by?(installed_spec.version) end end diff --git a/lib/rubygems/basic_specification.rb b/lib/rubygems/basic_specification.rb index 526a5069c24227..dcc64e64096e73 100644 --- a/lib/rubygems/basic_specification.rb +++ b/lib/rubygems/basic_specification.rb @@ -77,7 +77,7 @@ def contains_requirable_file?(file) if Gem::Platform::RUBY == platform || Gem::Platform.local === platform warn "Ignoring #{full_name} because its extensions are not built. " + - "Try: gem pristine #{name} --version #{version}" + "Try: gem pristine #{name} --version #{version}" end return false @@ -131,7 +131,7 @@ def full_gem_path # default Ruby platform. def full_name - if platform == Gem::Platform::RUBY or platform.nil? + if platform == Gem::Platform::RUBY || platform.nil? "#{name}-#{version}".dup.tap(&Gem::UNTAINT) else "#{name}-#{version}-#{platform}".dup.tap(&Gem::UNTAINT) diff --git a/lib/rubygems/command.rb b/lib/rubygems/command.rb index 9fc3360fa13cc8..badc21023af10c 100644 --- a/lib/rubygems/command.rb +++ b/lib/rubygems/command.rb @@ -159,11 +159,11 @@ def show_lookup_failure(gem_name, version, errors, suppress_suggestions = false, gem = "'#{gem_name}' (#{version})" msg = String.new "Could not find a valid gem #{gem}" - if errors and !errors.empty? + if errors && !errors.empty? msg << ", here is why:\n" errors.each {|x| msg << " #{x.wordy}\n" } else - if required_by and gem != required_by + if required_by && gem != required_by msg << " (required by #{required_by}) in any repository" else msg << " in any repository" @@ -186,7 +186,7 @@ def show_lookup_failure(gem_name, version, errors, suppress_suggestions = false, def get_all_gem_names args = options[:args] - if args.nil? or args.empty? + if args.nil? || args.empty? raise Gem::CommandLineError, "Please specify at least one gem name (e.g. gem build GEMNAME)" end @@ -216,7 +216,7 @@ def get_all_gem_names_and_versions def get_one_gem_name args = options[:args] - if args.nil? or args.empty? + if args.nil? || args.empty? raise Gem::CommandLineError, "Please specify a gem name on the command line (e.g. gem build GEMNAME)" end @@ -554,7 +554,7 @@ def create_option_parser end def configure_options(header, option_list) - return if option_list.nil? or option_list.empty? + return if option_list.nil? || option_list.empty? header = header.to_s.empty? ? "" : "#{header} " @parser.separator " #{header}Options:" @@ -586,7 +586,7 @@ def wrap(text, width) # :doc: add_common_option("-V", "--[no-]verbose", "Set the verbose level of output") do |value, options| # Set us to "really verbose" so the progress meter works - if Gem.configuration.verbose and value + if Gem.configuration.verbose && value Gem.configuration.verbose = 1 else Gem.configuration.verbose = value diff --git a/lib/rubygems/commands/cert_command.rb b/lib/rubygems/commands/cert_command.rb index 56bf5ce6891b85..17b1d11b1915d1 100644 --- a/lib/rubygems/commands/cert_command.rb +++ b/lib/rubygems/commands/cert_command.rb @@ -152,7 +152,7 @@ def build(email) def build_cert(email, key) # :nodoc: expiration_length_days = options[:expiration_length_days] || - Gem.configuration.cert_expiration_length_days + Gem.configuration.cert_expiration_length_days cert = Gem::Security.create_cert_email( email, diff --git a/lib/rubygems/commands/cleanup_command.rb b/lib/rubygems/commands/cleanup_command.rb index 7f143999eb5006..1ae84924c1410d 100644 --- a/lib/rubygems/commands/cleanup_command.rb +++ b/lib/rubygems/commands/cleanup_command.rb @@ -149,7 +149,7 @@ def get_primary_gems @primary_gems = {} Gem::Specification.each do |spec| - if @primary_gems[spec.name].nil? or + if @primary_gems[spec.name].nil? || @primary_gems[spec.name].version < spec.version @primary_gems[spec.name] = spec end diff --git a/lib/rubygems/commands/contents_command.rb b/lib/rubygems/commands/contents_command.rb index 3dd0b16265fc69..c5fdfca31e5e6e 100644 --- a/lib/rubygems/commands/contents_command.rb +++ b/lib/rubygems/commands/contents_command.rb @@ -77,7 +77,7 @@ def execute gem_contents name end - terminate_interaction 1 unless found or names.length > 1 + terminate_interaction 1 unless found || names.length > 1 end end diff --git a/lib/rubygems/commands/dependency_command.rb b/lib/rubygems/commands/dependency_command.rb index c9ddc9af0a8cab..3f69a95e836895 100644 --- a/lib/rubygems/commands/dependency_command.rb +++ b/lib/rubygems/commands/dependency_command.rb @@ -77,7 +77,7 @@ def fetch_specs(name_pattern, requirement, prerelease) # :nodoc: name_matches = name_pattern ? name_pattern =~ spec.name : true version_matches = requirement.satisfied_by?(spec.version) - name_matches and version_matches + name_matches && version_matches }.map(&:to_spec) end @@ -133,7 +133,7 @@ def execute end def ensure_local_only_reverse_dependencies # :nodoc: - if options[:reverse_dependencies] and remote? and not local? + if options[:reverse_dependencies] && remote? && !local? alert_error "Only reverse dependencies for local gems are supported." terminate_interaction 1 end @@ -182,7 +182,7 @@ def find_reverse_dependencies(spec) # :nodoc: sp.dependencies.each do |dep| dep = Gem::Dependency.new(*dep) unless Gem::Dependency === dep - if spec.name == dep.name and + if spec.name == dep.name && dep.requirement.satisfied_by?(spec.version) result << [sp.full_name, dep] end @@ -197,7 +197,7 @@ def find_reverse_dependencies(spec) # :nodoc: def name_pattern(args) return if args.empty? - if args.length == 1 and args.first =~ /\A(.*)(i)?\z/m + if args.length == 1 && args.first =~ /\A(.*)(i)?\z/m flags = $2 ? Regexp::IGNORECASE : nil Regexp.new $1, flags else diff --git a/lib/rubygems/commands/fetch_command.rb b/lib/rubygems/commands/fetch_command.rb index 582563ba8105b2..3a8c435d0e4711 100644 --- a/lib/rubygems/commands/fetch_command.rb +++ b/lib/rubygems/commands/fetch_command.rb @@ -52,8 +52,8 @@ def usage # :nodoc: end def check_version # :nodoc: - if options[:version] != Gem::Requirement.default and - get_all_gem_names.size > 1 + if options[:version] != Gem::Requirement.default && + get_all_gem_names.size > 1 alert_error "Can't use --version with multiple gems. You can specify multiple gems with" \ " version requirements using `gem fetch 'my_gem:1.0.0' 'my_other_gem:~>2.0.0'`" terminate_interaction 1 diff --git a/lib/rubygems/commands/generate_index_command.rb b/lib/rubygems/commands/generate_index_command.rb index 8bb24c9ce3e29d..bc71e60ff0a295 100644 --- a/lib/rubygems/commands/generate_index_command.rb +++ b/lib/rubygems/commands/generate_index_command.rb @@ -68,8 +68,8 @@ def execute # This is always true because it's the only way now. options[:build_modern] = true - if not File.exist?(options[:directory]) or - not File.directory?(options[:directory]) + if !File.exist?(options[:directory]) || + !File.directory?(options[:directory]) alert_error "unknown directory name #{options[:directory]}." terminate_interaction 1 else diff --git a/lib/rubygems/commands/install_command.rb b/lib/rubygems/commands/install_command.rb index 690f90c2e42577..724b4fe51dac1c 100644 --- a/lib/rubygems/commands/install_command.rb +++ b/lib/rubygems/commands/install_command.rb @@ -46,8 +46,8 @@ def arguments # :nodoc: def defaults_str # :nodoc: "--both --version '#{Gem::Requirement.default}' --no-force\n" + - "--install-dir #{Gem.dir} --lock\n" + - install_update_defaults_str + "--install-dir #{Gem.dir} --lock\n" + + install_update_defaults_str end def description # :nodoc: @@ -134,15 +134,15 @@ def usage # :nodoc: end def check_install_dir # :nodoc: - if options[:install_dir] and options[:user_install] + if options[:install_dir] && options[:user_install] alert_error "Use --install-dir or --user-install but not both" terminate_interaction 1 end end def check_version # :nodoc: - if options[:version] != Gem::Requirement.default and - get_all_gem_names.size > 1 + if options[:version] != Gem::Requirement.default && + get_all_gem_names.size > 1 alert_error "Can't use --version with multiple gems. You can specify multiple gems with" \ " version requirements using `gem install 'my_gem:1.0.0' 'my_other_gem:~>2.0.0'`" terminate_interaction 1 @@ -191,8 +191,8 @@ def install_from_gemdeps # :nodoc: end def install_gem(name, version) # :nodoc: - return if options[:conservative] and - not Gem::Dependency.new(name, version).matching_specs.empty? + return if options[:conservative] && + !Gem::Dependency.new(name, version).matching_specs.empty? req = Gem::Requirement.create(version) diff --git a/lib/rubygems/commands/pristine_command.rb b/lib/rubygems/commands/pristine_command.rb index d4dadf0736e261..72db53ef378cc3 100644 --- a/lib/rubygems/commands/pristine_command.rb +++ b/lib/rubygems/commands/pristine_command.rb @@ -102,10 +102,10 @@ def execute # `--extensions` must be explicitly given to pristine only gems # with extensions. - elsif options[:extensions_set] and - options[:extensions] and options[:args].empty? + elsif options[:extensions_set] && + options[:extensions] && options[:args].empty? Gem::Specification.select do |spec| - spec.extensions and not spec.extensions.empty? + spec.extensions && !spec.extensions.empty? end else get_all_gem_names.sort.map do |gem_name| @@ -135,14 +135,14 @@ def execute end end - unless spec.extensions.empty? or options[:extensions] or options[:only_executables] or options[:only_plugins] + unless spec.extensions.empty? || options[:extensions] || options[:only_executables] || options[:only_plugins] say "Skipped #{spec.full_name}, it needs to compile an extension" next end gem = spec.cache_file - unless File.exist? gem or options[:only_executables] or options[:only_plugins] + unless File.exist?(gem) || options[:only_executables] || options[:only_plugins] require_relative "../remote_fetcher" say "Cached gem for #{spec.full_name} not found, attempting to fetch..." diff --git a/lib/rubygems/commands/setup_command.rb b/lib/rubygems/commands/setup_command.rb index 1ed889a7134be0..c782c3618cf480 100644 --- a/lib/rubygems/commands/setup_command.rb +++ b/lib/rubygems/commands/setup_command.rb @@ -329,9 +329,9 @@ def install_rdoc # ignore end - if File.writable? gem_doc_dir and - (not File.exist? rubygems_doc_dir or - File.writable? rubygems_doc_dir) + if File.writable?(gem_doc_dir) && + (!File.exist?(rubygems_doc_dir) || + File.writable?(rubygems_doc_dir)) say "Removing old RubyGems RDoc and ri" if @verbose Dir[File.join(Gem.dir, "doc", "rubygems-[0-9]*")].each do |dir| rm_rf dir @@ -559,7 +559,7 @@ def show_release_notes history_string = "" - until versions.length == 0 or + until versions.length == 0 || versions.shift <= options[:previous_version] do history_string += version_lines.shift + text.shift end diff --git a/lib/rubygems/commands/sources_command.rb b/lib/rubygems/commands/sources_command.rb index a5f2d022c6b20a..5a8f5af9c3da25 100644 --- a/lib/rubygems/commands/sources_command.rb +++ b/lib/rubygems/commands/sources_command.rb @@ -82,7 +82,7 @@ def check_typo_squatting(source) def check_rubygems_https(source_uri) # :nodoc: uri = URI source_uri - if uri.scheme and uri.scheme.downcase == "http" and + if uri.scheme && uri.scheme.downcase == "http" && uri.host.downcase == "rubygems.org" question = <<-QUESTION.chomp https://rubygems.org is recommended for security over #{uri} @@ -215,9 +215,9 @@ def update # :nodoc: def remove_cache_file(desc, path) # :nodoc: FileUtils.rm_rf path - if not File.exist?(path) + if !File.exist?(path) say "*** Removed #{desc} source cache ***" - elsif not File.writable?(path) + elsif !File.writable?(path) say "*** Unable to remove #{desc} source cache (write protected) ***" else say "*** Unable to remove #{desc} source cache ***" diff --git a/lib/rubygems/commands/specification_command.rb b/lib/rubygems/commands/specification_command.rb index 6457a755ae4629..12004a6d5696c4 100644 --- a/lib/rubygems/commands/specification_command.rb +++ b/lib/rubygems/commands/specification_command.rb @@ -88,7 +88,7 @@ def execute raise Gem::CommandLineError, "Unsupported version type: '#{v}'" end - if !req.none? and options[:all] + if !req.none? && options[:all] alert_error "Specify --all or -v, not both" terminate_interaction 1 end @@ -102,7 +102,7 @@ def execute field = get_one_optional_argument raise Gem::CommandLineError, "--ruby and FIELD are mutually exclusive" if - field and options[:format] == :ruby + field && options[:format] == :ruby if local? if File.exist? gem diff --git a/lib/rubygems/commands/uninstall_command.rb b/lib/rubygems/commands/uninstall_command.rb index d03a96bf871891..3c520826e5bf97 100644 --- a/lib/rubygems/commands/uninstall_command.rb +++ b/lib/rubygems/commands/uninstall_command.rb @@ -96,7 +96,7 @@ def arguments # :nodoc: def defaults_str # :nodoc: "--version '#{Gem::Requirement.default}' --no-force " + - "--user-install" + "--user-install" end def description # :nodoc: @@ -114,8 +114,8 @@ def usage # :nodoc: end def check_version # :nodoc: - if options[:version] != Gem::Requirement.default and - get_all_gem_names.size > 1 + if options[:version] != Gem::Requirement.default && + get_all_gem_names.size > 1 alert_error "Can't use --version with multiple gems. You can specify multiple gems with" \ " version requirements using `gem uninstall 'my_gem:1.0.0' 'my_other_gem:~>2.0.0'`" terminate_interaction 1 @@ -125,7 +125,7 @@ def check_version # :nodoc: def execute check_version - if options[:all] and not options[:args].empty? + if options[:all] && !options[:args].empty? uninstall_specific elsif options[:all] uninstall_all diff --git a/lib/rubygems/commands/update_command.rb b/lib/rubygems/commands/update_command.rb index a0796426699f0b..7c24fedcded3f3 100644 --- a/lib/rubygems/commands/update_command.rb +++ b/lib/rubygems/commands/update_command.rb @@ -56,7 +56,7 @@ def arguments # :nodoc: def defaults_str # :nodoc: "--no-force --install-dir #{Gem.dir}\n" + - install_update_defaults_str + install_update_defaults_str end def description # :nodoc: @@ -155,7 +155,7 @@ def highest_installed_gems # :nodoc: Gem::Specification.dirs = Gem.user_dir if options[:user_install] Gem::Specification.each do |spec| - if hig[spec.name].nil? or hig[spec.name].version < spec.version + if hig[spec.name].nil? || hig[spec.name].version < spec.version hig[spec.name] = spec end end @@ -292,8 +292,8 @@ def update_rubygems_arguments # :nodoc: args << "--no-document" unless options[:document].include?("rdoc") || options[:document].include?("ri") args << "--no-format-executable" if options[:no_format_executable] args << "--previous-version" << Gem::VERSION if - options[:system] == true or - Gem::Version.new(options[:system]) >= Gem::Version.new(2) + options[:system] == true || + Gem::Version.new(options[:system]) >= Gem::Version.new(2) args end @@ -301,7 +301,7 @@ def which_to_update(highest_installed_gems, gem_names) result = [] highest_installed_gems.each do |l_name, l_spec| - next if not gem_names.empty? and + next if !gem_names.empty? && gem_names.none? {|name| name == l_spec.name } highest_remote_tup = highest_remote_name_tuple l_spec diff --git a/lib/rubygems/commands/which_command.rb b/lib/rubygems/commands/which_command.rb index 78493e9380ab1b..5b9a79b734be9b 100644 --- a/lib/rubygems/commands/which_command.rb +++ b/lib/rubygems/commands/which_command.rb @@ -71,7 +71,7 @@ def find_paths(package_name, dirs) dirs.each do |dir| Gem.suffixes.each do |ext| full_path = File.join dir, "#{package_name}#{ext}" - if File.exist? full_path and not File.directory? full_path + if File.exist?(full_path) && !File.directory?(full_path) result << full_path return result unless options[:show_all] end diff --git a/lib/rubygems/config_file.rb b/lib/rubygems/config_file.rb index 8d64b58cb964cd..c53e209ae8ace7 100644 --- a/lib/rubygems/config_file.rb +++ b/lib/rubygems/config_file.rb @@ -368,7 +368,7 @@ def load_file(filename) # True if the backtrace option has been specified, or debug is on. def backtrace - @backtrace or $DEBUG + @backtrace || $DEBUG end # The name of the configuration file. @@ -477,11 +477,11 @@ def []=(key, value) end def ==(other) # :nodoc: - self.class === other and - @backtrace == other.backtrace and - @bulk_threshold == other.bulk_threshold and - @verbose == other.verbose and - @update_sources == other.update_sources and + self.class === other && + @backtrace == other.backtrace && + @bulk_threshold == other.bulk_threshold && + @verbose == other.verbose && + @update_sources == other.update_sources && @hash == other.hash end diff --git a/lib/rubygems/core_ext/kernel_warn.rb b/lib/rubygems/core_ext/kernel_warn.rb index 7df6c48b8f34bf..8f43e00456b81d 100644 --- a/lib/rubygems/core_ext/kernel_warn.rb +++ b/lib/rubygems/core_ext/kernel_warn.rb @@ -39,7 +39,7 @@ class << self start += 1 if path = loc.path - unless path.start_with?(rubygems_path) or path.start_with?("(other) def ==(other) case other when self.class - @name == other.name and - @version == other.version and + @name == other.name && + @version == other.version && @platform == other.platform when Array to_a == other diff --git a/lib/rubygems/package.rb b/lib/rubygems/package.rb index 77f9f282d8fb79..084dc5d2d9523a 100644 --- a/lib/rubygems/package.rb +++ b/lib/rubygems/package.rb @@ -68,14 +68,14 @@ def initialize(message, source = nil) class PathError < Error def initialize(destination, destination_dir) super "installing into parent path %s of %s is not allowed" % - [destination, destination_dir] + [destination, destination_dir] end end class SymlinkError < Error def initialize(name, destination, destination_dir) super "installing symlink '%s' pointing to parent path %s of %s is not allowed" % - [name, destination, destination_dir] + [name, destination, destination_dir] end end @@ -687,7 +687,7 @@ def verify_files(gem) "package content (data.tar.gz) is missing", @gem end - if duplicates = @files.group_by {|f| f }.select {|k,v| v.size > 1 }.map(&:first) and duplicates.any? + if (duplicates = @files.group_by {|f| f }.select {|k,v| v.size > 1 }.map(&:first)) && duplicates.any? raise Gem::Security::Exception, "duplicate files in the package: (#{duplicates.map(&:inspect).join(', ')})" end end diff --git a/lib/rubygems/package/tar_header.rb b/lib/rubygems/package/tar_header.rb index fb70765dde2ff4..ee515a9e050f5a 100644 --- a/lib/rubygems/package/tar_header.rb +++ b/lib/rubygems/package/tar_header.rb @@ -173,23 +173,23 @@ def empty? end def ==(other) # :nodoc: - self.class === other and - @checksum == other.checksum and - @devmajor == other.devmajor and - @devminor == other.devminor and - @gid == other.gid and - @gname == other.gname and - @linkname == other.linkname and - @magic == other.magic and - @mode == other.mode and - @mtime == other.mtime and - @name == other.name and - @prefix == other.prefix and - @size == other.size and - @typeflag == other.typeflag and - @uid == other.uid and - @uname == other.uname and - @version == other.version + self.class === other && + @checksum == other.checksum && + @devmajor == other.devmajor && + @devminor == other.devminor && + @gid == other.gid && + @gname == other.gname && + @linkname == other.linkname && + @magic == other.magic && + @mode == other.mode && + @mtime == other.mtime && + @name == other.name && + @prefix == other.prefix && + @size == other.size && + @typeflag == other.typeflag && + @uid == other.uid && + @uname == other.uname && + @version == other.version end def to_s # :nodoc: diff --git a/lib/rubygems/package/tar_writer.rb b/lib/rubygems/package/tar_writer.rb index 6f068f50c22b34..db5242c5e4a46e 100644 --- a/lib/rubygems/package/tar_writer.rb +++ b/lib/rubygems/package/tar_writer.rb @@ -314,7 +314,7 @@ def split_name(name) # :nodoc: prefix = parts.join("/") end - if name.bytesize > 100 or prefix.empty? + if name.bytesize > 100 || prefix.empty? raise Gem::Package::TooLongFileName.new("File \"#{prefix}/#{name}\" has a too long name (should be 100 or less)") end diff --git a/lib/rubygems/platform.rb b/lib/rubygems/platform.rb index 607e3906d6ed0e..ed3571dbff9316 100644 --- a/lib/rubygems/platform.rb +++ b/lib/rubygems/platform.rb @@ -23,9 +23,9 @@ def self.match(platform) def self.match_platforms?(platform, platforms) platforms.any? do |local_platform| - platform.nil? or - local_platform == platform or - (local_platform != Gem::Platform::RUBY and local_platform =~ platform) + platform.nil? || + local_platform == platform || + (local_platform != Gem::Platform::RUBY && local_platform =~ platform) end end private_class_method :match_platforms? @@ -70,7 +70,7 @@ def initialize(arch) when String then arch = arch.split "-" - if arch.length > 2 and arch.last !~ /\d/ # reassemble x86-linux-gnu + if arch.length > 2 && arch.last !~ (/\d/) # reassemble x86-linux-gnu extra = arch.pop arch.last << "-#{extra}" end @@ -82,7 +82,7 @@ def initialize(arch) else cpu end - if arch.length == 2 and arch.last =~ /^\d+(\.\d+)?$/ # for command-line + if arch.length == 2 && arch.last =~ /^\d+(\.\d+)?$/ # for command-line @os, @version = arch return end @@ -107,7 +107,7 @@ def initialize(arch) when /mingw-?(\w+)?/ then [ "mingw", $1 ] when /(mswin\d+)(\_(\d+))?/ then os, version = $1, $3 - @cpu = "x86" if @cpu.nil? and os =~ /32$/ + @cpu = "x86" if @cpu.nil? && os =~ /32$/ [os, version] when /netbsdelf/ then [ "netbsdelf", nil ] when /openbsd(\d+\.\d+)?/ then [ "openbsd", $1 ] @@ -139,7 +139,7 @@ def to_s # the same CPU, OS and version. def ==(other) - self.class === other and to_a == other.to_a + self.class === other && to_a == other.to_a end alias :eql? :== @@ -160,18 +160,18 @@ def ===(other) return nil unless Gem::Platform === other # universal-mingw32 matches x64-mingw-ucrt - return true if (@cpu == "universal" or other.cpu == "universal") and - @os.start_with?("mingw") and other.os.start_with?("mingw") + return true if (@cpu == "universal" || other.cpu == "universal") && + @os.start_with?("mingw") && other.os.start_with?("mingw") # cpu - ([nil,"universal"].include?(@cpu) or [nil, "universal"].include?(other.cpu) or @cpu == other.cpu or - (@cpu == "arm" and other.cpu.start_with?("arm"))) and + ([nil,"universal"].include?(@cpu) || [nil, "universal"].include?(other.cpu) || @cpu == other.cpu || + (@cpu == "arm" && other.cpu.start_with?("arm"))) && - # os - @os == other.os and + # os + @os == other.os && - # version - (@version.nil? or other.version.nil? or @version == other.version) + # version + (@version.nil? || other.version.nil? || @version == other.version) end ## diff --git a/lib/rubygems/query_utils.rb b/lib/rubygems/query_utils.rb index 4601d9374ca93d..a502717f94c96a 100644 --- a/lib/rubygems/query_utils.rb +++ b/lib/rubygems/query_utils.rb @@ -112,7 +112,7 @@ def args end def display_header(type) - if (ui.outs.tty? and Gem.configuration.verbose) or both? + if (ui.outs.tty? && Gem.configuration.verbose) || both? say say "*** #{type} GEMS ***" say @@ -132,7 +132,7 @@ def show_local_gems(name, req = Gem::Requirement.default) name_matches = name ? s.name =~ name : true version_matches = show_prereleases? || !s.version.prerelease? - name_matches and version_matches + name_matches && version_matches end spec_tuples = specs.map do |spec| @@ -176,7 +176,7 @@ def specs_type # Check if gem +name+ version +version+ is installed. def installed?(name, req = Gem::Requirement.default) - Gem::Specification.any? {|s| s.name =~ name and req =~ s.version } + Gem::Specification.any? {|s| s.name =~ name && req =~ s.version } end def output_query_results(spec_tuples) @@ -242,7 +242,7 @@ def entry_versions(entry, name_tuples, platforms, specs) return unless options[:versions] list = - if platforms.empty? or options[:details] + if platforms.empty? || options[:details] name_tuples.map {|n| n.version }.uniq else platforms.sort.reverse.map do |version, pls| @@ -289,13 +289,13 @@ def spec_authors(entry, spec) end def spec_homepage(entry, spec) - return if spec.homepage.nil? or spec.homepage.empty? + return if spec.homepage.nil? || spec.homepage.empty? entry << "\n" << format_text("Homepage: #{spec.homepage}", 68, 4) end def spec_license(entry, spec) - return if spec.license.nil? or spec.license.empty? + return if spec.license.nil? || spec.license.empty? licenses = "License#{spec.licenses.length > 1 ? 's' : ''}: ".dup licenses << spec.licenses.join(", ") diff --git a/lib/rubygems/remote_fetcher.rb b/lib/rubygems/remote_fetcher.rb index d0287398614c7b..0ac6eaa130e9be 100644 --- a/lib/rubygems/remote_fetcher.rb +++ b/lib/rubygems/remote_fetcher.rb @@ -114,7 +114,7 @@ def download(spec, source_uri, install_dir = Gem.dir) cache_dir = if Dir.pwd == install_dir # see fetch_command install_dir - elsif File.writable?(install_cache_dir) || (File.writable?(install_dir) && (not File.exist?(install_cache_dir))) + elsif File.writable?(install_cache_dir) || (File.writable?(install_dir) && (!File.exist?(install_cache_dir))) install_cache_dir else File.join Gem.user_dir, "cache" @@ -247,7 +247,7 @@ def fetch_path(uri, mtime = nil, head = false) data = send "fetch_#{uri.scheme}", uri, mtime, head - if data and !head and uri.to_s.end_with?(".gz") + if data && !head && uri.to_s.end_with?(".gz") begin data = Gem::Util.gunzip data rescue Zlib::GzipFile::Error @@ -288,7 +288,7 @@ def cache_update_path(uri, path = nil, update = true) return Gem.read_binary(path) end - if update and path + if update && path Gem.write_binary(path, data) end diff --git a/lib/rubygems/request.rb b/lib/rubygems/request.rb index d15ba91209fb7c..c3ea46e0ebf344 100644 --- a/lib/rubygems/request.rb +++ b/lib/rubygems/request.rb @@ -173,7 +173,7 @@ def self.get_proxy_from_env(scheme = "http") require "uri" uri = URI(Gem::UriFormatter.new(env_proxy).normalize) - if uri and uri.user.nil? and uri.password.nil? + if uri && uri.user.nil? && uri.password.nil? user = ENV["#{_scheme}_proxy_user"] || ENV["#{_SCHEME}_PROXY_USER"] password = ENV["#{_scheme}_proxy_pass"] || ENV["#{_SCHEME}_PROXY_PASS"] diff --git a/lib/rubygems/request/connection_pools.rb b/lib/rubygems/request/connection_pools.rb index a283267674ac93..44280489fbeb68 100644 --- a/lib/rubygems/request/connection_pools.rb +++ b/lib/rubygems/request/connection_pools.rb @@ -39,7 +39,7 @@ def close_all def get_no_proxy_from_env env_no_proxy = ENV["no_proxy"] || ENV["NO_PROXY"] - return [] if env_no_proxy.nil? or env_no_proxy.empty? + return [] if env_no_proxy.nil? || env_no_proxy.empty? env_no_proxy.split(/\s*,\s*/) end @@ -78,7 +78,7 @@ def net_http_args(uri, proxy_uri) no_proxy = get_no_proxy_from_env - if proxy_uri and not no_proxy?(hostname, no_proxy) + if proxy_uri && !no_proxy?(hostname, no_proxy) proxy_hostname = proxy_uri.respond_to?(:hostname) ? proxy_uri.hostname : proxy_uri.host net_http_args + [ proxy_hostname, diff --git a/lib/rubygems/request/http_pool.rb b/lib/rubygems/request/http_pool.rb index f028516db8c407..7b309eedd36f1a 100644 --- a/lib/rubygems/request/http_pool.rb +++ b/lib/rubygems/request/http_pool.rb @@ -26,7 +26,7 @@ def checkin(connection) def close_all until @queue.empty? - if connection = @queue.pop(true) and connection.started? + if (connection = @queue.pop(true)) && connection.started? connection.finish end end diff --git a/lib/rubygems/request_set.rb b/lib/rubygems/request_set.rb index df215e4af38ad9..64701a821407c7 100644 --- a/lib/rubygems/request_set.rb +++ b/lib/rubygems/request_set.rb @@ -443,14 +443,14 @@ def tsort_each_node(&block) # :nodoc: def tsort_each_child(node) # :nodoc: node.spec.dependencies.each do |dep| - next if dep.type == :development and not @development + next if dep.type == :development && !@development match = @requests.find do |r| - dep.match? r.spec.name, r.spec.version, @prerelease + dep.match? r.spec.name, r.spec.version, r.spec.is_a?(Gem::Resolver::InstalledSpecification) || @prerelease end unless match - next if dep.type == :development and @development_shallow + next if dep.type == :development && @development_shallow next if @soft_missing raise Gem::DependencyError, "Unresolved dependency found during sorting - #{dep} (requested by #{node.spec.full_name})" diff --git a/lib/rubygems/request_set/gem_dependency_api.rb b/lib/rubygems/request_set/gem_dependency_api.rb index 568d9f952ff8aa..fe75ac5208de5c 100644 --- a/lib/rubygems/request_set/gem_dependency_api.rb +++ b/lib/rubygems/request_set/gem_dependency_api.rb @@ -371,7 +371,7 @@ def gem(name, *requirements) duplicate = @dependencies.include? name @dependencies[name] = - if requirements.empty? and not source_set + if requirements.empty? && !source_set Gem::Requirement.default elsif source_set Gem::Requirement.source_set @@ -789,7 +789,7 @@ def ruby(version, options = {}) raise ArgumentError, "You must specify engine_version along with the Ruby engine" if - engine and not engine_version + engine && !engine_version return true if @installing @@ -800,7 +800,7 @@ def ruby(version, options = {}) raise Gem::RubyVersionMismatch, message end - if engine and engine != Gem.ruby_engine + if engine && engine != Gem.ruby_engine message = "Your Ruby engine is #{Gem.ruby_engine}, " + "but your #{gem_deps_file} requires #{engine}" diff --git a/lib/rubygems/request_set/lockfile/parser.rb b/lib/rubygems/request_set/lockfile/parser.rb index 376d37f9e2d8ae..8446f9df8eb5f1 100644 --- a/lib/rubygems/request_set/lockfile/parser.rb +++ b/lib/rubygems/request_set/lockfile/parser.rb @@ -30,7 +30,7 @@ def parse when "PLATFORMS" then parse_PLATFORMS else - token = get until @tokens.empty? or peek.first == :section + token = get until @tokens.empty? || peek.first == :section end else raise "BUG: unhandled token #{token.type} (#{token.value.inspect}) at line #{token.line} column #{token.column}" @@ -44,7 +44,7 @@ def parse def get(expected_types = nil, expected_value = nil) # :nodoc: token = @tokens.shift - if expected_types and not Array(expected_types).include? token.type + if expected_types && !Array(expected_types).include?(token.type) unget token message = "unexpected token [#{token.type.inspect}, #{token.value.inspect}], " + @@ -53,7 +53,7 @@ def get(expected_types = nil, expected_value = nil) # :nodoc: raise Gem::RequestSet::Lockfile::ParseError.new message, token.column, token.line, @filename end - if expected_value and expected_value != token.value + if expected_value && expected_value != token.value unget token message = "unexpected token [#{token.type.inspect}, #{token.value.inspect}], " + @@ -67,7 +67,7 @@ def get(expected_types = nil, expected_value = nil) # :nodoc: end def parse_DEPENDENCIES # :nodoc: - while not @tokens.empty? and :text == peek.type do + while !@tokens.empty? && :text == peek.type do token = get :text requirements = [] @@ -127,7 +127,7 @@ def parse_GEM # :nodoc: set = Gem::Resolver::LockSet.new sources last_specs = nil - while not @tokens.empty? and :text == peek.type do + while !@tokens.empty? && :text == peek.type do token = get :text name = token.value column = token.column @@ -144,7 +144,7 @@ def parse_GEM # :nodoc: type = token.type data = token.value - if type == :text and column == 4 + if type == :text && column == 4 version, platform = data.split "-", 2 platform = @@ -183,7 +183,7 @@ def parse_GIT # :nodoc: type = peek.type value = peek.value - if type == :entry and %w[branch ref tag].include? value + if type == :entry && %w[branch ref tag].include?(value) get get :text @@ -199,7 +199,7 @@ def parse_GIT # :nodoc: last_spec = nil - while not @tokens.empty? and :text == peek.type do + while !@tokens.empty? && :text == peek.type do token = get :text name = token.value column = token.column @@ -214,7 +214,7 @@ def parse_GIT # :nodoc: type = token.type data = token.value - if type == :text and column == 4 + if type == :text && column == 4 last_spec = set.add_git_spec name, data, repository, revision, true else dependency = parse_dependency name, data @@ -246,7 +246,7 @@ def parse_PATH # :nodoc: set = Gem::Resolver::VendorSet.new last_spec = nil - while not @tokens.empty? and :text == peek.first do + while !@tokens.empty? && :text == peek.first do token = get :text name = token.value column = token.column @@ -261,7 +261,7 @@ def parse_PATH # :nodoc: type = token.type data = token.value - if type == :text and column == 4 + if type == :text && column == 4 last_spec = set.add_vendor_gem name, directory else dependency = parse_dependency name, data @@ -281,7 +281,7 @@ def parse_PATH # :nodoc: end def parse_PLATFORMS # :nodoc: - while not @tokens.empty? and :text == peek.first do + while !@tokens.empty? && :text == peek.first do name = get(:text).value @platforms << name diff --git a/lib/rubygems/request_set/lockfile/tokenizer.rb b/lib/rubygems/request_set/lockfile/tokenizer.rb index 79c573a02d9fb3..4476a041c426dd 100644 --- a/lib/rubygems/request_set/lockfile/tokenizer.rb +++ b/lib/rubygems/request_set/lockfile/tokenizer.rb @@ -1,4 +1,4 @@ -# frozen_string_literal: true +#) frozen_string_literal: true require_relative "parser" class Gem::RequestSet::Lockfile::Tokenizer @@ -26,7 +26,7 @@ def to_a end def skip(type) - @tokens.shift while not @tokens.empty? and peek.type == type + @tokens.shift while !@tokens.empty? && peek.type == type end ## diff --git a/lib/rubygems/requirement.rb b/lib/rubygems/requirement.rb index 12bf371f4ed17e..4f19b8c5b0362d 100644 --- a/lib/rubygems/requirement.rb +++ b/lib/rubygems/requirement.rb @@ -253,7 +253,7 @@ def satisfied_by?(version) def specific? return true if @requirements.length > 1 # GIGO, > 1, > 2 is silly - not %w[> >=].include? @requirements.first.first # grab the operator + !%w[> >=].include? @requirements.first.first # grab the operator end def to_s # :nodoc: diff --git a/lib/rubygems/resolver.rb b/lib/rubygems/resolver.rb index 097e8243eedb3a..bf7d6d943b8fb7 100644 --- a/lib/rubygems/resolver.rb +++ b/lib/rubygems/resolver.rb @@ -153,10 +153,10 @@ def requests(s, act, reqs=[]) # :nodoc: s.fetch_development_dependencies if @development s.dependencies.reverse_each do |d| - next if d.type == :development and not @development - next if d.type == :development and @development_shallow and + next if d.type == :development && !@development + next if d.type == :development && @development_shallow && act.development? - next if d.type == :development and @development_shallow and + next if d.type == :development && @development_shallow && act.parent reqs << Gem::Resolver::DependencyRequest.new(d, act) @@ -192,7 +192,7 @@ def resolve conflict = e.conflicts.values.first raise Gem::DependencyResolutionError, Conflict.new(conflict.requirement_trees.first.first, conflict.existing, conflict.requirement) ensure - @output.close if defined?(@output) and !debug? + @output.close if defined?(@output) && !debug? end ## diff --git a/lib/rubygems/resolver/api_specification.rb b/lib/rubygems/resolver/api_specification.rb index 7af4d9cff369ec..1e65d5e5a9dc2e 100644 --- a/lib/rubygems/resolver/api_specification.rb +++ b/lib/rubygems/resolver/api_specification.rb @@ -40,10 +40,10 @@ def initialize(set, api_data) end def ==(other) # :nodoc: - self.class === other and - @set == other.set and - @name == other.name and - @version == other.version and + self.class === other && + @set == other.set && + @name == other.name && + @version == other.version && @platform == other.platform end diff --git a/lib/rubygems/resolver/best_set.rb b/lib/rubygems/resolver/best_set.rb index ab91ebca087e41..075ee1ef5c94ba 100644 --- a/lib/rubygems/resolver/best_set.rb +++ b/lib/rubygems/resolver/best_set.rb @@ -25,7 +25,7 @@ def pick_sets # :nodoc: end def find_all(req) # :nodoc: - pick_sets if @remote and @sets.empty? + pick_sets if @remote && @sets.empty? super rescue Gem::RemoteFetcher::FetchError => e @@ -35,7 +35,7 @@ def find_all(req) # :nodoc: end def prefetch(reqs) # :nodoc: - pick_sets if @remote and @sets.empty? + pick_sets if @remote && @sets.empty? super end @@ -63,7 +63,7 @@ def replace_failed_api_set(error) # :nodoc: uri = uri + "." raise error unless api_set = @sets.find do |set| - Gem::Resolver::APISet === set and set.dep_uri == uri + Gem::Resolver::APISet === set && set.dep_uri == uri end index_set = Gem::Resolver::IndexSet.new api_set.source diff --git a/lib/rubygems/resolver/conflict.rb b/lib/rubygems/resolver/conflict.rb index 54a7ca4641a21b..aba6d73ea730b2 100644 --- a/lib/rubygems/resolver/conflict.rb +++ b/lib/rubygems/resolver/conflict.rb @@ -27,9 +27,9 @@ def initialize(dependency, activated, failed_dep=dependency) end def ==(other) # :nodoc: - self.class === other and - @dependency == other.dependency and - @activated == other.activated and + self.class === other && + @dependency == other.dependency && + @activated == other.activated && @failed_dep == other.failed_dep end diff --git a/lib/rubygems/resolver/git_specification.rb b/lib/rubygems/resolver/git_specification.rb index d1e04737da02d3..6a178ea82e2db1 100644 --- a/lib/rubygems/resolver/git_specification.rb +++ b/lib/rubygems/resolver/git_specification.rb @@ -6,9 +6,9 @@ class Gem::Resolver::GitSpecification < Gem::Resolver::SpecSpecification def ==(other) # :nodoc: - self.class === other and - @set == other.set and - @spec == other.spec and + self.class === other && + @set == other.set && + @spec == other.spec && @source == other.source end diff --git a/lib/rubygems/resolver/installed_specification.rb b/lib/rubygems/resolver/installed_specification.rb index 7c7ad8d85b9bc8..8932e068be8640 100644 --- a/lib/rubygems/resolver/installed_specification.rb +++ b/lib/rubygems/resolver/installed_specification.rb @@ -5,8 +5,8 @@ class Gem::Resolver::InstalledSpecification < Gem::Resolver::SpecSpecification def ==(other) # :nodoc: - self.class === other and - @set == other.set and + self.class === other && + @set == other.set && @spec == other.spec end diff --git a/lib/rubygems/resolver/installer_set.rb b/lib/rubygems/resolver/installer_set.rb index 15580d7095e27f..f663ce4ad58157 100644 --- a/lib/rubygems/resolver/installer_set.rb +++ b/lib/rubygems/resolver/installer_set.rb @@ -61,13 +61,12 @@ def add_always_install(dependency) found = find_all request found.delete_if do |s| - s.version.prerelease? and not s.local? + s.version.prerelease? && !s.local? end unless dependency.prerelease? found = found.select do |s| - Gem::Source::SpecificFile === s.source or - Gem::Platform::RUBY == s.platform or - Gem::Platform.local === s.platform + Gem::Source::SpecificFile === s.source || + Gem::Platform.match(s.platform) end found = found.sort_by do |s| @@ -111,14 +110,14 @@ def add_local(dep_name, spec, source) # Should local gems should be considered? def consider_local? # :nodoc: - @domain == :both or @domain == :local + @domain == :both || @domain == :local end ## # Should remote gems should be considered? def consider_remote? # :nodoc: - @domain == :both or @domain == :remote + @domain == :both || @domain == :remote end ## @@ -137,8 +136,8 @@ def find_all(req) dep = req.dependency - return res if @ignore_dependencies and - @always_install.none? {|spec| dep.match? spec } + return res if @ignore_dependencies && + @always_install.none? {|spec| dep.match? spec } name = dep.name @@ -168,10 +167,6 @@ def find_all(req) end end - res.delete_if do |spec| - spec.version.prerelease? and not dep.prerelease? - end - res.concat @remote_set.find_all req if consider_remote? res diff --git a/lib/rubygems/resolver/lock_set.rb b/lib/rubygems/resolver/lock_set.rb index ff6c6c912f99a6..b1a5433cb54307 100644 --- a/lib/rubygems/resolver/lock_set.rb +++ b/lib/rubygems/resolver/lock_set.rb @@ -54,7 +54,7 @@ def load_spec(name, version, platform, source) # :nodoc: dep = Gem::Dependency.new name, version found = @specs.find do |spec| - dep.matches_spec? spec and spec.platform == platform + dep.matches_spec?(spec) && spec.platform == platform end tuple = Gem::NameTuple.new found.name, found.version, found.platform diff --git a/lib/rubygems/resolver/lock_specification.rb b/lib/rubygems/resolver/lock_specification.rb index 4a30dcf8490212..7de2a146589de2 100644 --- a/lib/rubygems/resolver/lock_specification.rb +++ b/lib/rubygems/resolver/lock_specification.rb @@ -71,7 +71,7 @@ def pretty_print(q) # :nodoc: def spec @spec ||= Gem::Specification.find do |spec| - spec.name == @name and spec.version == @version + spec.name == @name && spec.version == @version end @spec ||= Gem::Specification.new do |s| diff --git a/lib/rubygems/resolver/vendor_specification.rb b/lib/rubygems/resolver/vendor_specification.rb index 8dfe5940f2a954..600a98a2bfd17b 100644 --- a/lib/rubygems/resolver/vendor_specification.rb +++ b/lib/rubygems/resolver/vendor_specification.rb @@ -6,9 +6,9 @@ class Gem::Resolver::VendorSpecification < Gem::Resolver::SpecSpecification def ==(other) # :nodoc: - self.class === other and - @set == other.set and - @spec == other.spec and + self.class === other && + @set == other.set && + @spec == other.spec && @source == other.source end diff --git a/lib/rubygems/security.rb b/lib/rubygems/security.rb index 4eb402305546a4..dd16283a982296 100644 --- a/lib/rubygems/security.rb +++ b/lib/rubygems/security.rb @@ -618,7 +618,7 @@ def self.write(pemmable, path, permissions = 0600, passphrase = nil, cipher = KE path = File.expand_path path File.open path, "wb", permissions do |io| - if passphrase and cipher + if passphrase && cipher io.write pemmable.to_pem cipher, passphrase else io.write pemmable.to_pem diff --git a/lib/rubygems/security/policy.rb b/lib/rubygems/security/policy.rb index 43588fd7f10a78..959880ddc1e653 100644 --- a/lib/rubygems/security/policy.rb +++ b/lib/rubygems/security/policy.rb @@ -88,16 +88,16 @@ def check_cert(signer, issuer, time) message = "certificate #{signer.subject}" - if not_before = signer.not_before and not_before > time + if (not_before = signer.not_before) && not_before > time raise Gem::Security::Exception, "#{message} not valid before #{not_before}" end - if not_after = signer.not_after and not_after < time + if (not_after = signer.not_after) && not_after < time raise Gem::Security::Exception, "#{message} not valid after #{not_after}" end - if issuer and not signer.verify issuer.public_key + if issuer && !signer.verify(issuer.public_key) raise Gem::Security::Exception, "#{message} was not issued by #{issuer.subject}" end @@ -109,7 +109,7 @@ def check_cert(signer, issuer, time) # Ensures the public key of +key+ matches the public key in +signer+ def check_key(signer, key) - unless signer and key + unless signer && key return true unless @only_signed raise Gem::Security::Exception, "missing key or signature" @@ -231,7 +231,7 @@ def verify(chain, key = nil, digests = {}, signatures = {}, if @verify_data raise Gem::Security::Exception, "no digests provided (probable bug)" if - signer_digests.nil? or signer_digests.empty? + signer_digests.nil? || signer_digests.empty? else signer_digests = {} end @@ -248,7 +248,7 @@ def verify(chain, key = nil, digests = {}, signatures = {}, if @only_trusted check_trust chain, digester, trust_dir - elsif signatures.empty? and digests.empty? + elsif signatures.empty? && digests.empty? # trust is irrelevant if there's no signatures to verify else alert_warning "#{subject signer} is not trusted for #{full_name}" diff --git a/lib/rubygems/security/signer.rb b/lib/rubygems/security/signer.rb index b1308c4e4212a6..cca82f1cf85a88 100644 --- a/lib/rubygems/security/signer.rb +++ b/lib/rubygems/security/signer.rb @@ -141,7 +141,7 @@ def sign(data) raise Gem::Security::Exception, "no certs provided" if @cert_chain.empty? - if @cert_chain.length == 1 and @cert_chain.last.not_after < Time.now + if @cert_chain.length == 1 && @cert_chain.last.not_after < Time.now alert("Your certificate has expired, trying to re-sign it...") re_sign_key( diff --git a/lib/rubygems/source.rb b/lib/rubygems/source.rb index 7c3b6786458d6a..fc72a1038aa68a 100644 --- a/lib/rubygems/source.rb +++ b/lib/rubygems/source.rb @@ -62,7 +62,7 @@ def <=>(other) end def ==(other) # :nodoc: - self.class === other and @uri == other.uri + self.class === other && @uri == other.uri end alias_method :eql?, :== # :nodoc: diff --git a/lib/rubygems/source/git.rb b/lib/rubygems/source/git.rb index 22355adcfa90e8..2609a309e818cb 100644 --- a/lib/rubygems/source/git.rb +++ b/lib/rubygems/source/git.rb @@ -76,10 +76,10 @@ def <=>(other) end def ==(other) # :nodoc: - super and - @name == other.name and - @repository == other.repository and - @reference == other.reference and + super && + @name == other.name && + @repository == other.repository && + @reference == other.reference && @need_submodules == other.need_submodules end diff --git a/lib/rubygems/spec_fetcher.rb b/lib/rubygems/spec_fetcher.rb index 43e7e05b63d693..0d06d1f144d523 100644 --- a/lib/rubygems/spec_fetcher.rb +++ b/lib/rubygems/spec_fetcher.rb @@ -98,7 +98,7 @@ def search_for_dependency(dependency, matching_platform=true) found[source] = specs.select do |tup| if dependency.match?(tup) - if matching_platform and !Gem::Platform.match_gem?(tup.platform, tup.name) + if matching_platform && !Gem::Platform.match_gem?(tup.platform, tup.name) pm = ( rejected_specs[dependency] ||= \ Gem::PlatformMismatch.new(tup.name, tup.version)) diff --git a/lib/rubygems/specification.rb b/lib/rubygems/specification.rb index 28ad176b535578..af07cd36e25e81 100644 --- a/lib/rubygems/specification.rb +++ b/lib/rubygems/specification.rb @@ -473,7 +473,7 @@ def author=(o) # spec.platform = Gem::Platform.local def platform=(platform) - if @original_platform.nil? or + if @original_platform.nil? || @original_platform == Gem::Platform::RUBY @original_platform = platform end @@ -1046,7 +1046,7 @@ def self.find_inactive_by_path(path) def self.find_active_stub_by_path(path) stub = @@active_stub_with_requirable_file[path] ||= (stubs.find do |s| - s.activated? and s.contains_requirable_file? path + s.activated? && s.contains_requirable_file?(path) end || NOT_FOUND) stub.this end @@ -1234,7 +1234,7 @@ def self.outdated_and_latest_version latest_remote = remotes.sort.last yield [local_spec, latest_remote] if - latest_remote and local_spec.version < latest_remote + latest_remote && local_spec.version < latest_remote end nil @@ -1556,7 +1556,7 @@ def add_self_to_load_path # Singular reader for #authors. Returns the first author in the list def author - val = authors and val.first + (val = authors) && val.first end ## @@ -1668,7 +1668,7 @@ def conflicts conflicts = {} self.runtime_dependencies.each do |dep| spec = Gem.loaded_specs[dep.name] - if spec and not spec.satisfies_requirement? dep + if spec && !spec.satisfies_requirement?(dep) (conflicts[spec] ||= []) << dep end end @@ -1695,7 +1695,7 @@ def has_conflicts? self.dependencies.any? do |dep| if dep.runtime? spec = Gem.loaded_specs[dep.name] - spec and not spec.satisfies_requirement? dep + spec && !spec.satisfies_requirement?(dep) else false end @@ -1716,7 +1716,7 @@ def date DateLike = Object.new # :nodoc: def DateLike.===(obj) # :nodoc: - defined?(::Date) and Date === obj + defined?(::Date) && Date === obj end DateTimeFormat = # :nodoc: @@ -1756,9 +1756,9 @@ def date=(date) # executable now. See Gem.bin_path. def default_executable # :nodoc: - if defined?(@default_executable) and @default_executable + if defined?(@default_executable) && @default_executable result = @default_executable - elsif @executables and @executables.size == 1 + elsif @executables && @executables.size == 1 result = Array(@executables).first else result = nil @@ -1875,7 +1875,7 @@ def eql?(other) # :nodoc: # Singular accessor for #executables def executable - val = executables and val.first + (val = executables) && val.first end ## @@ -1987,7 +1987,7 @@ def has_rdoc=(ignored) # :nodoc: # True if this gem has files in test_files def has_unit_tests? # :nodoc: - not test_files.empty? + !test_files.empty? end # :stopdoc: @@ -2040,7 +2040,7 @@ def initialize(name = nil, version = nil) self.name = name if name self.version = version if version - if platform = Gem.platforms.last and platform != Gem::Platform::RUBY and platform != Gem::Platform.local + if (platform = Gem.platforms.last) && platform != Gem::Platform::RUBY && platform != Gem::Platform.local self.platform = platform end @@ -2155,8 +2155,8 @@ def method_missing(sym, *a, &b) # :nodoc: return end - if @specification_version > CURRENT_SPECIFICATION_VERSION and - sym.to_s.end_with?("=") + if @specification_version > CURRENT_SPECIFICATION_VERSION && + sym.to_s.end_with?("=") warn "ignoring #{sym} loading #{full_name}" if $DEBUG else super @@ -2182,7 +2182,7 @@ def missing_extensions? # file list. def normalize - if defined?(@extra_rdoc_files) and @extra_rdoc_files + if defined?(@extra_rdoc_files) && @extra_rdoc_files @extra_rdoc_files.uniq! @files ||= [] @files.concat(@extra_rdoc_files) @@ -2207,7 +2207,7 @@ def name_tuple # platform. For use with legacy gems. def original_name # :nodoc: - if platform == Gem::Platform::RUBY or platform.nil? + if platform == Gem::Platform::RUBY || platform.nil? "#{@name}-#{@version}" else "#{@name}-#{@version}-#{@original_platform}" @@ -2240,8 +2240,8 @@ def pretty_print(q) # :nodoc: attributes.each do |attr_name| current_value = self.send attr_name current_value = current_value.sort if %i[files test_files].include? attr_name - if current_value != default_value(attr_name) or - self.class.required_attribute? attr_name + if current_value != default_value(attr_name) || + self.class.required_attribute?(attr_name) q.text "s.#{attr_name} = " @@ -2299,7 +2299,7 @@ def rdoc_options=(options) # Singular accessor for #require_paths def require_path - val = require_paths and val.first + (val = require_paths) && val.first end ## @@ -2374,7 +2374,7 @@ def same_attributes?(spec) def satisfies_requirement?(dependency) return @name == dependency.name && - dependency.requirement.satisfied_by?(@version) + dependency.requirement.satisfied_by?(@version) end ## @@ -2428,7 +2428,7 @@ def summary=(str) # Singular accessor for #test_files def test_file # :nodoc: - val = test_files and val.first + (val = test_files) && val.first end ## @@ -2450,7 +2450,7 @@ def test_files # :nodoc: @test_files = [@test_suite_file].flatten @test_suite_file = nil end - if defined?(@test_files) and @test_files + if defined?(@test_files) && @test_files @test_files else @test_files = [] @@ -2474,13 +2474,13 @@ def to_ruby result << " s.name = #{ruby_code name}" result << " s.version = #{ruby_code version}" - unless platform.nil? or platform == Gem::Platform::RUBY + unless platform.nil? || platform == Gem::Platform::RUBY result << " s.platform = #{ruby_code original_platform}" end result << "" result << " s.required_rubygems_version = #{ruby_code required_rubygems_version} if s.respond_to? :required_rubygems_version=" - if metadata and !metadata.empty? + if metadata && !metadata.empty? result << " s.metadata = #{ruby_code metadata} if s.respond_to? :metadata=" end result << " s.require_paths = #{ruby_code raw_require_paths}" diff --git a/lib/rubygems/specification_policy.rb b/lib/rubygems/specification_policy.rb index 332189ae9f4c30..44b31211e536ca 100644 --- a/lib/rubygems/specification_policy.rb +++ b/lib/rubygems/specification_policy.rb @@ -188,7 +188,7 @@ def validate_dependencies # :nodoc: prerelease_dep && !@specification.version.prerelease? open_ended = dep.requirement.requirements.all? do |op, version| - not version.prerelease? and (op == ">" or op == ">=") + !version.prerelease? && (op == ">" || op == ">=") end if open_ended @@ -203,7 +203,7 @@ def validate_dependencies # :nodoc: else bugfix = if op == ">" ", '> #{dep_version}'" - elsif op == ">=" and base != segments + elsif op == ">=" && base != segments ", '>= #{dep_version}'" end @@ -338,7 +338,7 @@ def validate_array_attribute(field) String end - unless Array === val and val.all? {|x| x.kind_of?(klass) } + unless Array === val && val.all? {|x| x.kind_of?(klass) } error "#{field} must be an Array of #{klass}" end end @@ -404,7 +404,7 @@ def validate_lazy_metadata homepage = @specification.homepage # Make sure a homepage is valid HTTP/HTTPS URI - if homepage and not homepage.empty? + if homepage && !homepage.empty? require "uri" begin homepage_uri = URI.parse(homepage) diff --git a/lib/rubygems/uninstaller.rb b/lib/rubygems/uninstaller.rb index 1ae301a44de850..5883ed1c41d82d 100644 --- a/lib/rubygems/uninstaller.rb +++ b/lib/rubygems/uninstaller.rb @@ -105,8 +105,8 @@ def uninstall @default_specs_matching_uninstall_params = default_specs list, other_repo_specs = list.partition do |spec| - @gem_home == spec.base_dir or - (@user_install and spec.base_dir == Gem.user_dir) + @gem_home == spec.base_dir || + (@user_install && spec.base_dir == Gem.user_dir) end list.sort! @@ -239,8 +239,8 @@ def remove_all(list) # spec:: the spec of the gem to be uninstalled def remove(spec) - unless path_ok?(@gem_home, spec) or - (@user_install and path_ok?(Gem.user_dir, spec)) + unless path_ok?(@gem_home, spec) || + (@user_install && path_ok?(Gem.user_dir, spec)) e = Gem::GemNotInHomeException.new \ "Gem '#{spec.full_name}' is not installed in directory #{@gem_home}" e.spec = spec diff --git a/lib/rubygems/user_interaction.rb b/lib/rubygems/user_interaction.rb index 4b0a7c60bb6b61..2fa505423becd2 100644 --- a/lib/rubygems/user_interaction.rb +++ b/lib/rubygems/user_interaction.rb @@ -284,7 +284,7 @@ def ask_yes_no(question, default=nil) # Ask a question. Returns an answer if connected to a tty, nil otherwise. def ask(question) - return nil if not tty? + return nil if !tty? @outs.print(question + " ") @outs.flush @@ -298,7 +298,7 @@ def ask(question) # Ask for a password. Does not echo response to terminal. def ask_for_password(question) - return nil if not tty? + return nil if !tty? @outs.print(question, " ") @outs.flush diff --git a/lib/rubygems/version.rb b/lib/rubygems/version.rb index bb41374ffc3d02..f67889ef1a315b 100644 --- a/lib/rubygems/version.rb +++ b/lib/rubygems/version.rb @@ -252,7 +252,7 @@ def bump # same precision. Version "1.0" is not the same as version "1". def eql?(other) - self.class === other and @version == other._version + self.class === other && @version == other._version end def hash # :nodoc: diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index 46eefbb48e4ce0..cb0177adb264cb 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -599,7 +599,7 @@ def have_git? end def in_path?(executable) # :nodoc: - return true if %r{\A([A-Z]:|/)} =~ executable and File.exist? executable + return true if %r{\A([A-Z]:|/)} =~ executable && File.exist?(executable) ENV["PATH"].split(File::PATH_SEPARATOR).any? do |directory| File.exist? File.join directory, executable @@ -849,7 +849,7 @@ def new_default_spec(name, version, deps = nil, *files) # or a +block+ can be given for full customization of the specification. def util_spec(name, version = 2, deps = nil, *files) # :yields: specification - raise "deps or block, not both" if deps and block_given? + raise "deps or block, not both" if deps && block_given? spec = Gem::Specification.new do |s| s.platform = Gem::Platform::RUBY @@ -1279,10 +1279,10 @@ def self.rubybin rubyexe = "#{ruby}.exe" 3.times do - if File.exist? ruby and File.executable? ruby and !File.directory? ruby + if File.exist?(ruby) && File.executable?(ruby) && !File.directory?(ruby) return File.expand_path(ruby) end - if File.exist? rubyexe and File.executable? rubyexe + if File.exist?(rubyexe) && File.executable?(rubyexe) return File.expand_path(rubyexe) end ruby = File.join("..", ruby) @@ -1592,7 +1592,7 @@ def stub(name, val_or_callable, *block_args) metaclass = class << self; self; end - if respond_to? name and not methods.map(&:to_s).include? name.to_s + if respond_to?(name) && !methods.map(&:to_s).include?(name.to_s) metaclass.send :define_method, name do |*args| super(*args) end diff --git a/test/rubygems/test_gem_dependency_installer.rb b/test/rubygems/test_gem_dependency_installer.rb index 9db904ba537482..2b0b874b2d6727 100644 --- a/test/rubygems/test_gem_dependency_installer.rb +++ b/test/rubygems/test_gem_dependency_installer.rb @@ -1051,8 +1051,8 @@ def test_find_gems_with_sources_prerelease releases = set.all_specs - assert releases.any? {|s| s.name == "a" and s.version.to_s == "1" } - refute releases.any? {|s| s.name == "a" and s.version.to_s == "1.a" } + assert releases.any? {|s| s.name == "a" && s.version.to_s == "1" } + refute releases.any? {|s| s.name == "a" && s.version.to_s == "1.a" } dependency.prerelease = true diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/build.rb b/test/rubygems/test_gem_ext_cargo_builder/custom_name/build.rb index 21c4fd1c8dec29..0e04f0de5e1444 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/build.rb +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/build.rb @@ -1,4 +1,4 @@ -if ENV["RUBYOPT"] or defined? Gem +if ENV["RUBYOPT"] || defined? Gem ENV.delete "RUBYOPT" require "rbconfig" diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/build.rb b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/build.rb index 81b12f99ec0255..f404aa34688c41 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/build.rb +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/build.rb @@ -1,4 +1,4 @@ -if ENV["RUBYOPT"] or defined? Gem +if ENV["RUBYOPT"] || defined? Gem ENV.delete "RUBYOPT" require "rbconfig" diff --git a/test/rubygems/test_gem_installer.rb b/test/rubygems/test_gem_installer.rb index 55f0a074b8776b..0d0746ec84332b 100644 --- a/test/rubygems/test_gem_installer.rb +++ b/test/rubygems/test_gem_installer.rb @@ -473,7 +473,7 @@ def test_generate_bin_script_no_perms end end ensure - FileUtils.chmod 0755, util_inst_bindir unless ($DEBUG or win_platform?) + FileUtils.chmod 0755, util_inst_bindir unless ($DEBUG || win_platform?) end def test_generate_bin_script_no_shebang @@ -577,7 +577,7 @@ def test_generate_bin_symlink_no_perms end end ensure - FileUtils.chmod 0755, util_inst_bindir unless ($DEBUG or win_platform?) + FileUtils.chmod 0755, util_inst_bindir unless ($DEBUG || win_platform?) end def test_generate_bin_symlink_update_newer diff --git a/test/rubygems/test_gem_package_tar_reader.rb b/test/rubygems/test_gem_package_tar_reader.rb index 86ffff4fe52e2e..19860eb7e82090 100644 --- a/test/rubygems/test_gem_package_tar_reader.rb +++ b/test/rubygems/test_gem_package_tar_reader.rb @@ -29,7 +29,7 @@ def test_rewind str = tar_file_header("lib/foo", "", 010644, content.size, Time.now) + - content + "\0" * (512 - content.size) + content + "\0" * (512 - content.size) str << "\0" * 1024 io = TempIO.new(str) diff --git a/test/rubygems/test_gem_resolver_installer_set.rb b/test/rubygems/test_gem_resolver_installer_set.rb index ffa6b13ea4d599..7617919e2ced8c 100644 --- a/test/rubygems/test_gem_resolver_installer_set.rb +++ b/test/rubygems/test_gem_resolver_installer_set.rb @@ -51,6 +51,25 @@ def test_add_always_install_platform assert_equal %w[a-1], set.always_install.map {|s| s.full_name } end + def test_add_always_install_platform_if_gem_platforms_modified_by_platform_flag + freebsd = Gem::Platform.new "x86-freebsd-9" + + spec_fetcher do |fetcher| + fetcher.download "a", 1 + fetcher.download "a", 1 do |s| + s.platform = freebsd + end + end + + # equivalent to --platform=x86-freebsd-9 + Gem.platforms << freebsd + set = Gem::Resolver::InstallerSet.new :both + + set.add_always_install dep("a") + + assert_equal %w[a-1-x86-freebsd-9], set.always_install.map {|s| s.full_name } + end + def test_add_always_install_index_spec_platform _, a_1_local_gem = util_gem "a", 1 do |s| s.platform = Gem::Platform.local @@ -200,6 +219,18 @@ def test_find_all_prerelease set.find_all(req).map {|spec| spec.full_name }.sort end + def test_find_all_prerelease_dependencies_with_add_local + activesupport_7_1_0_alpha = util_spec "activesupport", "7.1.0.alpha" + + install_gem activesupport_7_1_0_alpha + + set = Gem::Resolver::InstallerSet.new :both + + req = Gem::Resolver::DependencyRequest.new dep("activesupport", ">= 4.2.0"), nil + + assert_equal %w[activesupport-7.1.0.alpha], set.find_all(req).map {|spec| spec.full_name } + end + def test_load_spec specs = spec_fetcher do |fetcher| fetcher.spec "a", 2 diff --git a/test/rubygems/test_gem_specification.rb b/test/rubygems/test_gem_specification.rb index cf0dba4331fc3c..8ce8293f33a742 100644 --- a/test/rubygems/test_gem_specification.rb +++ b/test/rubygems/test_gem_specification.rb @@ -796,7 +796,7 @@ def test_self_load_relative assert_equal File.join(@tempdir, "a-2.gemspec"), spec.loaded_from end - if RUBY_ENGINE == "ruby" and RUBY_VERSION < "2.7" + if RUBY_ENGINE == "ruby" && RUBY_VERSION < "2.7" def test_self_load_tainted full_path = @a2.spec_file write_file full_path do |io| @@ -1450,7 +1450,7 @@ def test_build_extensions_extensions_dir_unwritable @ext.build_extensions assert_path_not_exist @ext.extension_dir ensure - unless ($DEBUG or win_platform? or Process.uid.zero? or Gem.java_platform?) + unless ($DEBUG || win_platform? || Process.uid.zero? || Gem.java_platform?) FileUtils.chmod 0755, File.join(@ext.base_dir, "extensions") FileUtils.chmod 0755, @ext.base_dir end diff --git a/test/rubygems/test_require.rb b/test/rubygems/test_require.rb index f933bbb5d5c036..6135acea924a03 100644 --- a/test/rubygems/test_require.rb +++ b/test/rubygems/test_require.rb @@ -269,7 +269,7 @@ def test_activate_via_require_respects_loaded_files assert_includes $LOAD_PATH, rubylibdir message = proc { "this test relies on the b-2 gem lib/ to be before stdlib to make sense\n" + - $LOAD_PATH.pretty_inspect + $LOAD_PATH.pretty_inspect } assert_operator $LOAD_PATH.index(b2.load_paths[0]), :<, $LOAD_PATH.index(rubylibdir), message diff --git a/test/rubygems/utilities.rb b/test/rubygems/utilities.rb index 5f8f763cb56bd0..c01f7acd481b45 100644 --- a/test/rubygems/utilities.rb +++ b/test/rubygems/utilities.rb @@ -39,9 +39,9 @@ def initialize end def find_data(path) - return Gem.read_binary path.path if URI === path and "file" == path.scheme + return Gem.read_binary path.path if URI === path && "file" == path.scheme - if URI === path and "URI::#{path.scheme.upcase}" != path.class.name + if URI === path && "URI::#{path.scheme.upcase}" != path.class.name raise ArgumentError, "mismatch for scheme #{path.scheme} and class #{path.class}" end @@ -67,7 +67,7 @@ def fetch_path(path, mtime = nil, head = false) if data.respond_to?(:call) data.call else - if path.to_s.end_with?(".gz") and not data.nil? and not data.empty? + if path.to_s.end_with?(".gz") && !data.nil? && !data.empty? data = Gem::Util.gunzip data end data @@ -76,7 +76,7 @@ def fetch_path(path, mtime = nil, head = false) def cache_update_path(uri, path = nil, update = true) if data = fetch_path(uri) - File.open(path, "wb") {|io| io.write data } if path and update + File.open(path, "wb") {|io| io.write data } if path && update data else Gem.read_binary(path) if path From ff07e5c264c82f73b0368dd0bc2ae39f78678519 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 9 Aug 2022 02:15:10 +0900 Subject: [PATCH 063/546] Skip poisoned regions Poisoned regions cannot be accessed without unpoisoning outside gc.c. Specifically, debug.gem is terminated by AddressSanitizer. ``` SUMMARY: AddressSanitizer: use-after-poison iseq_collector.c:39 in iseq_i ``` --- gc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gc.c b/gc.c index df4c99b15b2cce..ecb4aa7e2085f5 100644 --- a/gc.c +++ b/gc.c @@ -3904,7 +3904,8 @@ objspace_each_objects_try(VALUE arg) uintptr_t pstart = (uintptr_t)page->start; uintptr_t pend = pstart + (page->total_slots * size_pool->slot_size); - if ((*data->callback)((void *)pstart, (void *)pend, size_pool->slot_size, data->data)) { + if (!__asan_region_is_poisoned((void *)pstart, pend - pstart) && + (*data->callback)((void *)pstart, (void *)pend, size_pool->slot_size, data->data)) { break; } From 357352af5ee37bf31ba4468ee242ea5a10158461 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Tue, 19 Oct 2021 12:59:39 -0700 Subject: [PATCH 064/546] Do not enable RUBY_DEVEL by RUBY_PATCHLEVEL This makes RUBY_DEVEL not enabled automatically. It still can be enabled manually. Test manually using RUBY_DEVEL in CI. Implements [Feature #17468] --- .github/workflows/compilers.yml | 1 + configure.ac | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/compilers.yml b/.github/workflows/compilers.yml index ab29a7acc556a3..bd082039b316b1 100644 --- a/.github/workflows/compilers.yml +++ b/.github/workflows/compilers.yml @@ -154,6 +154,7 @@ jobs: - { name: NDEBUG, env: { cppflags: '-DNDEBUG' } } - { name: RUBY_DEBUG, env: { cppflags: '-DRUBY_DEBUG' } } + - { name: RUBY_DEVEL, env: { cppflags: '-DRUBY_DEVEL' } } # - { name: ARRAY_DEBUG, env: { cppflags: '-DARRAY_DEBUG' } } # - { name: BIGNUM_DEBUG, env: { cppflags: '-DBIGNUM_DEBUG' } } # - { name: CCAN_LIST_DEBUG, env: { cppflags: '-DCCAN_LIST_DEBUG' } } diff --git a/configure.ac b/configure.ac index 9ed0c1ef9e2ca9..575bdf663155fd 100644 --- a/configure.ac +++ b/configure.ac @@ -622,8 +622,7 @@ AS_IF([test "$fdeclspec" = yes], [ RUBY_APPEND_OPTIONS(CXXFLAGS, -fdeclspec) ]) -AS_CASE([$RUBY_PATCHLEVEL], [-*], - [RUBY_DEVEL=yes], [RUBY_DEVEL=no]) +AS_IF([test "x$RUBY_DEVEL" != xyes], [RUBY_DEVEL=no]) particular_werror_flags=$RUBY_DEVEL AC_ARG_ENABLE(werror, AS_HELP_STRING([--disable-werror], From 3569d1309501a5e8ae4656d90d521c7eaa3330c6 Mon Sep 17 00:00:00 2001 From: git Date: Wed, 10 Aug 2022 14:13:45 +0900 Subject: [PATCH 065/546] * 2022-08-10 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 4b9ca6f5ff2cbc..22ed2490953f41 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 9 +#define RUBY_RELEASE_DAY 10 #include "ruby/version.h" #include "ruby/internal/abi.h" From 9f8abd28babf1ab84093fe0cc97f8d42cf62286c Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 20 Jul 2022 12:16:24 -0700 Subject: [PATCH 066/546] Add peephole optimizer for newarray(2)/expandarray(2, 0) -> swap An optimization for multiple assignment in the popped case to avoid array allocation was lost in my fix to make multiple assignment follow left-to-right evaluation (50c54d40a81bb2a4794a6be5f1861152900b4fed). Before, in the two element case, swap was used. Afterward, newarray(2) and expandarray(2, 0) were used, which is the same as swap, with the addition of an unnecessary allocation. Because this issue is not specific to multiple assignment, and the multiple assignment code is complex enough as it is, this updates the peephole optimizer to do the newarray(2)/expandarray(2, 0) -> swap conversion. A more general optimization pass for newarray(X)/expandarray(X, 0) -> reverse(X) will follow, but that requires readding the reverse instruction. --- compile.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/compile.c b/compile.c index 6a9ed2a5d09453..f38a320b765b36 100644 --- a/compile.c +++ b/compile.c @@ -3332,6 +3332,25 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal } } + if (IS_INSN_ID(iobj, newarray)) { + LINK_ELEMENT *next = iobj->link.next; + if (IS_INSN(next) && IS_INSN_ID(next, expandarray) && + OPERAND_AT(iobj, 0) == OPERAND_AT(next, 0) && + OPERAND_AT(next, 1) == INT2FIX(0)) { + /* + * newarray 2 + * expandarray 2, 0 + * => + * swap + */ + if (OPERAND_AT(iobj, 0) == INT2FIX(2)) { + ELEM_REMOVE(next); + INSN_OF(iobj) = BIN(swap); + iobj->operand_size = 0; + } + } + } + if (IS_INSN_ID(iobj, anytostring)) { LINK_ELEMENT *next = iobj->link.next; /* From d9167491db220756df159048348f006619df28c1 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 20 Jul 2022 12:24:50 -0700 Subject: [PATCH 067/546] Revert "Remove reverse VM instruction" This reverts commit 5512353d97250e85c13bf10b9b32e750478cf474. --- insns.def | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/insns.def b/insns.def index 06ca31a85010db..ebdbed62375322 100644 --- a/insns.def +++ b/insns.def @@ -597,6 +597,25 @@ swap /* none */ } +/* reverse stack top N order. */ +DEFINE_INSN +reverse +(rb_num_t n) +(...) +(...) +// attr rb_snum_t sp_inc = 0; +{ + rb_num_t i; + VALUE *sp = STACK_ADDR_FROM_TOP(n); + + for (i=0; i Date: Wed, 20 Jul 2022 12:28:48 -0700 Subject: [PATCH 068/546] Add peephole optimizer for newarray(X)/expandarray(X, 0) -> opt_reverse(X) This renames the reverse instruction to opt_reverse, since now it is only added by the optimizer. Then it uses as a more general form of swap. This optimizes multiple assignment in the popped case with more than two elements. --- compile.c | 11 ++++++++++- insns.def | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/compile.c b/compile.c index f38a320b765b36..9716f04374705d 100644 --- a/compile.c +++ b/compile.c @@ -3337,6 +3337,7 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal if (IS_INSN(next) && IS_INSN_ID(next, expandarray) && OPERAND_AT(iobj, 0) == OPERAND_AT(next, 0) && OPERAND_AT(next, 1) == INT2FIX(0)) { + ELEM_REMOVE(next); /* * newarray 2 * expandarray 2, 0 @@ -3344,10 +3345,18 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal * swap */ if (OPERAND_AT(iobj, 0) == INT2FIX(2)) { - ELEM_REMOVE(next); INSN_OF(iobj) = BIN(swap); iobj->operand_size = 0; } + /* + * newarray X + * expandarray X, 0 + * => + * opt_reverse X + */ + else { + INSN_OF(iobj) = BIN(opt_reverse); + } } } diff --git a/insns.def b/insns.def index ebdbed62375322..15c4734b8b7257 100644 --- a/insns.def +++ b/insns.def @@ -599,7 +599,7 @@ swap /* reverse stack top N order. */ DEFINE_INSN -reverse +opt_reverse (rb_num_t n) (...) (...) From 7922fd65e30fb9f011b939dead38cda94a7e2721 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 20 Jul 2022 13:13:47 -0700 Subject: [PATCH 069/546] Update multiple assignment benchmarks to include non-literal array cases This allows them to show the effect of the previous newarray/expandarray to swap/opt_reverse optimization. This shows an 35-83% performance improvement in the four multiple assignment benchmarks that use this optimization. --- benchmark/masgn.yml | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/benchmark/masgn.yml b/benchmark/masgn.yml index 4be9333e232c1f..31cb8ee4a301de 100644 --- a/benchmark/masgn.yml +++ b/benchmark/masgn.yml @@ -1,7 +1,7 @@ prelude: | a = [nil] * 3 b = Class.new{attr_writer :a, :b, :c}.new - c, d, e, f = nil, nil, nil, nil + c = d = e = f = g = h = i = nil benchmark: array2_2: "c = (a[0], a[1] = 1, 2)" array2_3: "c = (a[0], a[1] = 1, 2, 3)" @@ -27,3 +27,27 @@ benchmark: lvar2_3p: "(d, e = 1, 2, 3; nil)" lvar3_2p: "(d, e, f = 1, 2; nil)" lvar3_3p: "(d, e, f = 1, 2, 3; nil)" + array2_2lv: "c = (a[0], a[1] = g, h)" + array2_ilv: "c = (a[0], a[1] = g, h, i)" + arrayi_2lv: "c = (a[0], a[1], a[2] = g, h)" + arrayi_ilv: "c = (a[0], a[1], a[2] = g, h, i)" + attr2_2lv: "c = (b.a, b.b = g, h)" + attr2_ilv: "c = (b.a, b.b = g, h, i)" + attri_2lv: "c = (b.a, b.b, b.c = g, h)" + attri_ilv: "c = (b.a, b.b, b.c = g, h, i)" + lvar2_2lv: "c = (d, e = g, h)" + lvar2_ilv: "c = (d, e = g, h, i)" + lvari_2lv: "c = (d, e, f = g, h)" + lvari_ilv: "c = (d, e, f = g, h, i)" + array2_2plv: "(a[0], a[1] = g, h; nil)" + array2_iplv: "(a[0], a[1] = g, h, i; nil)" + arrayi_2plv: "(a[0], a[1], a[2] = g, h; nil)" + arrayi_iplv: "(a[0], a[1], a[2] = g, h, i; nil)" + attr2_2plv: "(b.a, b.b = g, h; nil)" + attr2_iplv: "(b.a, b.b = g, h, i; nil)" + attri_2plv: "(b.a, b.b, b.c = g, h; nil)" + attri_iplv: "(b.a, b.b, b.c = g, h, i; nil)" + lvar2_2plv: "(d, e = g, h; nil)" + lvar2_iplv: "(d, e = g, h, i; nil)" + lvari_2plv: "(d, e, f = g, h; nil)" + lvari_iplv: "(d, e, f = g, h, i; nil)" From fc4b4f2e8db3d68b80b9c7580c40a0165736006c Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 20 Jul 2022 14:06:28 -0700 Subject: [PATCH 070/546] Expand newarray/expandarray optimization for unequal operands This optimizes unbalanced multiple assignment cases such as: ```ruby a.b, c.d = e, f, g a.b, c.d, e.f = g, h ``` Previously, this would use: ``` newarray(3) expandarray(2, 0) newarray(2) expandarray(3, 0) ``` These would both allocate arrays. This switches to opt_reverse with either pop or putnil: ``` pop opt_reverse(2) putnil opt_reverse(3) ``` This avoids an unnecessary array allocation, and results in a 35-76% performance increase in these types of unbalanced cases (tested with benchmark/masgn.yml). --- compile.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/compile.c b/compile.c index 9716f04374705d..8879e661fe08f2 100644 --- a/compile.c +++ b/compile.c @@ -3335,16 +3335,20 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal if (IS_INSN_ID(iobj, newarray)) { LINK_ELEMENT *next = iobj->link.next; if (IS_INSN(next) && IS_INSN_ID(next, expandarray) && - OPERAND_AT(iobj, 0) == OPERAND_AT(next, 0) && OPERAND_AT(next, 1) == INT2FIX(0)) { - ELEM_REMOVE(next); + VALUE op1, op2; + op1 = OPERAND_AT(iobj, 0); + op2 = OPERAND_AT(next, 0); + ELEM_REMOVE(next); + + if (op1 == op2) { /* * newarray 2 * expandarray 2, 0 * => * swap */ - if (OPERAND_AT(iobj, 0) == INT2FIX(2)) { + if (op1 == INT2FIX(2)) { INSN_OF(iobj) = BIN(swap); iobj->operand_size = 0; } @@ -3357,6 +3361,38 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal else { INSN_OF(iobj) = BIN(opt_reverse); } + } + else { + NODE dummy_line_node = generate_dummy_line_node(iobj->insn_info.line_no, iobj->insn_info.node_id); + long diff = FIX2LONG(op1) - FIX2LONG(op2); + INSN_OF(iobj) = BIN(opt_reverse); + OPERAND_AT(iobj, 0) = OPERAND_AT(next, 0); + + if (op1 > op2) { + /* X > Y + * newarray X + * expandarray Y, 0 + * => + * pop * (Y-X) + * opt_reverse Y + */ + for (; diff > 0; diff--) { + INSERT_BEFORE_INSN(iobj, &dummy_line_node, pop); + } + } + else { /* (op1 < op2) */ + /* X < Y + * newarray X + * expandarray Y, 0 + * => + * putnil * (Y-X) + * opt_reverse Y + */ + for (; diff < 0; diff++) { + INSERT_BEFORE_INSN(iobj, &dummy_line_node, putnil); + } + } + } } } From 9363b0423a0269272eff2e243d4b55bc8d135430 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 20 Jul 2022 15:09:20 -0700 Subject: [PATCH 071/546] Optimize duparray/expandarray -> putobject/expandarray There's no point in making a copy of an array just to expand it. Saves an unnecessary array allocation in the multiple assignment case, with a 35-84% improvement in affected cases in benchmark/masgn.yml. --- compile.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/compile.c b/compile.c index 8879e661fe08f2..484399abc6d7d6 100644 --- a/compile.c +++ b/compile.c @@ -3396,6 +3396,20 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal } } + if (IS_INSN_ID(iobj, duparray)) { + LINK_ELEMENT *next = iobj->link.next; + /* + * duparray obj + * expandarray X, 0 + * => + * putobject obj + * expandarray X, 0 + */ + if (IS_INSN(next) && IS_INSN_ID(next, expandarray)) { + INSN_OF(iobj) = BIN(putobject); + } + } + if (IS_INSN_ID(iobj, anytostring)) { LINK_ELEMENT *next = iobj->link.next; /* From b7e492fa9e0444ceb9e56eda3d30fe7a13f0c6b1 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 20 Jul 2022 18:24:06 -0700 Subject: [PATCH 072/546] Regen YJIT bindings --- yjit/src/cruby_bindings.inc.rs | 332 +++++++++++++++++---------------- 1 file changed, 167 insertions(+), 165 deletions(-) diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index fed132588cf769..31f09ef98d4bd5 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -779,171 +779,173 @@ pub const YARVINSN_pop: ruby_vminsn_type = 34; pub const YARVINSN_dup: ruby_vminsn_type = 35; pub const YARVINSN_dupn: ruby_vminsn_type = 36; pub const YARVINSN_swap: ruby_vminsn_type = 37; -pub const YARVINSN_topn: ruby_vminsn_type = 38; -pub const YARVINSN_setn: ruby_vminsn_type = 39; -pub const YARVINSN_adjuststack: ruby_vminsn_type = 40; -pub const YARVINSN_defined: ruby_vminsn_type = 41; -pub const YARVINSN_checkmatch: ruby_vminsn_type = 42; -pub const YARVINSN_checkkeyword: ruby_vminsn_type = 43; -pub const YARVINSN_checktype: ruby_vminsn_type = 44; -pub const YARVINSN_defineclass: ruby_vminsn_type = 45; -pub const YARVINSN_definemethod: ruby_vminsn_type = 46; -pub const YARVINSN_definesmethod: ruby_vminsn_type = 47; -pub const YARVINSN_send: ruby_vminsn_type = 48; -pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 49; -pub const YARVINSN_objtostring: ruby_vminsn_type = 50; -pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 51; -pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 52; -pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 53; -pub const YARVINSN_opt_newarray_max: ruby_vminsn_type = 54; -pub const YARVINSN_opt_newarray_min: ruby_vminsn_type = 55; -pub const YARVINSN_invokesuper: ruby_vminsn_type = 56; -pub const YARVINSN_invokeblock: ruby_vminsn_type = 57; -pub const YARVINSN_leave: ruby_vminsn_type = 58; -pub const YARVINSN_throw: ruby_vminsn_type = 59; -pub const YARVINSN_jump: ruby_vminsn_type = 60; -pub const YARVINSN_branchif: ruby_vminsn_type = 61; -pub const YARVINSN_branchunless: ruby_vminsn_type = 62; -pub const YARVINSN_branchnil: ruby_vminsn_type = 63; -pub const YARVINSN_opt_getinlinecache: ruby_vminsn_type = 64; -pub const YARVINSN_opt_setinlinecache: ruby_vminsn_type = 65; -pub const YARVINSN_once: ruby_vminsn_type = 66; -pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 67; -pub const YARVINSN_opt_plus: ruby_vminsn_type = 68; -pub const YARVINSN_opt_minus: ruby_vminsn_type = 69; -pub const YARVINSN_opt_mult: ruby_vminsn_type = 70; -pub const YARVINSN_opt_div: ruby_vminsn_type = 71; -pub const YARVINSN_opt_mod: ruby_vminsn_type = 72; -pub const YARVINSN_opt_eq: ruby_vminsn_type = 73; -pub const YARVINSN_opt_neq: ruby_vminsn_type = 74; -pub const YARVINSN_opt_lt: ruby_vminsn_type = 75; -pub const YARVINSN_opt_le: ruby_vminsn_type = 76; -pub const YARVINSN_opt_gt: ruby_vminsn_type = 77; -pub const YARVINSN_opt_ge: ruby_vminsn_type = 78; -pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 79; -pub const YARVINSN_opt_and: ruby_vminsn_type = 80; -pub const YARVINSN_opt_or: ruby_vminsn_type = 81; -pub const YARVINSN_opt_aref: ruby_vminsn_type = 82; -pub const YARVINSN_opt_aset: ruby_vminsn_type = 83; -pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 84; -pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 85; -pub const YARVINSN_opt_length: ruby_vminsn_type = 86; -pub const YARVINSN_opt_size: ruby_vminsn_type = 87; -pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 88; -pub const YARVINSN_opt_succ: ruby_vminsn_type = 89; -pub const YARVINSN_opt_not: ruby_vminsn_type = 90; -pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 91; -pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 92; -pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 93; -pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 94; -pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 95; -pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 96; -pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 97; -pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 98; -pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 99; -pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 100; -pub const YARVINSN_trace_nop: ruby_vminsn_type = 101; -pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 102; -pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 103; -pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 104; -pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 105; -pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 106; -pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 107; -pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 108; -pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 109; -pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 110; -pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 111; -pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 112; -pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 113; -pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 114; -pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 115; -pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 116; -pub const YARVINSN_trace_putnil: ruby_vminsn_type = 117; -pub const YARVINSN_trace_putself: ruby_vminsn_type = 118; -pub const YARVINSN_trace_putobject: ruby_vminsn_type = 119; -pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 120; -pub const YARVINSN_trace_putstring: ruby_vminsn_type = 121; -pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 122; -pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 123; -pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 124; -pub const YARVINSN_trace_intern: ruby_vminsn_type = 125; -pub const YARVINSN_trace_newarray: ruby_vminsn_type = 126; -pub const YARVINSN_trace_newarraykwsplat: ruby_vminsn_type = 127; -pub const YARVINSN_trace_duparray: ruby_vminsn_type = 128; -pub const YARVINSN_trace_duphash: ruby_vminsn_type = 129; -pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 130; -pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 131; -pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 132; -pub const YARVINSN_trace_newhash: ruby_vminsn_type = 133; -pub const YARVINSN_trace_newrange: ruby_vminsn_type = 134; -pub const YARVINSN_trace_pop: ruby_vminsn_type = 135; -pub const YARVINSN_trace_dup: ruby_vminsn_type = 136; -pub const YARVINSN_trace_dupn: ruby_vminsn_type = 137; -pub const YARVINSN_trace_swap: ruby_vminsn_type = 138; -pub const YARVINSN_trace_topn: ruby_vminsn_type = 139; -pub const YARVINSN_trace_setn: ruby_vminsn_type = 140; -pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 141; -pub const YARVINSN_trace_defined: ruby_vminsn_type = 142; -pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 143; -pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 144; -pub const YARVINSN_trace_checktype: ruby_vminsn_type = 145; -pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 146; -pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 147; -pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 148; -pub const YARVINSN_trace_send: ruby_vminsn_type = 149; -pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 150; -pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 151; -pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 152; -pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 153; -pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 154; -pub const YARVINSN_trace_opt_newarray_max: ruby_vminsn_type = 155; -pub const YARVINSN_trace_opt_newarray_min: ruby_vminsn_type = 156; -pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 157; -pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 158; -pub const YARVINSN_trace_leave: ruby_vminsn_type = 159; -pub const YARVINSN_trace_throw: ruby_vminsn_type = 160; -pub const YARVINSN_trace_jump: ruby_vminsn_type = 161; -pub const YARVINSN_trace_branchif: ruby_vminsn_type = 162; -pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 163; -pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 164; -pub const YARVINSN_trace_opt_getinlinecache: ruby_vminsn_type = 165; -pub const YARVINSN_trace_opt_setinlinecache: ruby_vminsn_type = 166; -pub const YARVINSN_trace_once: ruby_vminsn_type = 167; -pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 168; -pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 169; -pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 170; -pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 171; -pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 172; -pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 173; -pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 174; -pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 175; -pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 176; -pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 177; -pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 178; -pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 179; -pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 180; -pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 181; -pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 182; -pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 183; -pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 184; -pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 185; -pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 186; -pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 187; -pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 188; -pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 189; -pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 190; -pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 191; -pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 192; -pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 193; -pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 194; -pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 195; -pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 196; -pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 197; -pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 198; -pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 199; -pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 200; -pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 201; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 202; +pub const YARVINSN_opt_reverse: ruby_vminsn_type = 38; +pub const YARVINSN_topn: ruby_vminsn_type = 39; +pub const YARVINSN_setn: ruby_vminsn_type = 40; +pub const YARVINSN_adjuststack: ruby_vminsn_type = 41; +pub const YARVINSN_defined: ruby_vminsn_type = 42; +pub const YARVINSN_checkmatch: ruby_vminsn_type = 43; +pub const YARVINSN_checkkeyword: ruby_vminsn_type = 44; +pub const YARVINSN_checktype: ruby_vminsn_type = 45; +pub const YARVINSN_defineclass: ruby_vminsn_type = 46; +pub const YARVINSN_definemethod: ruby_vminsn_type = 47; +pub const YARVINSN_definesmethod: ruby_vminsn_type = 48; +pub const YARVINSN_send: ruby_vminsn_type = 49; +pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 50; +pub const YARVINSN_objtostring: ruby_vminsn_type = 51; +pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 52; +pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 53; +pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 54; +pub const YARVINSN_opt_newarray_max: ruby_vminsn_type = 55; +pub const YARVINSN_opt_newarray_min: ruby_vminsn_type = 56; +pub const YARVINSN_invokesuper: ruby_vminsn_type = 57; +pub const YARVINSN_invokeblock: ruby_vminsn_type = 58; +pub const YARVINSN_leave: ruby_vminsn_type = 59; +pub const YARVINSN_throw: ruby_vminsn_type = 60; +pub const YARVINSN_jump: ruby_vminsn_type = 61; +pub const YARVINSN_branchif: ruby_vminsn_type = 62; +pub const YARVINSN_branchunless: ruby_vminsn_type = 63; +pub const YARVINSN_branchnil: ruby_vminsn_type = 64; +pub const YARVINSN_opt_getinlinecache: ruby_vminsn_type = 65; +pub const YARVINSN_opt_setinlinecache: ruby_vminsn_type = 66; +pub const YARVINSN_once: ruby_vminsn_type = 67; +pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 68; +pub const YARVINSN_opt_plus: ruby_vminsn_type = 69; +pub const YARVINSN_opt_minus: ruby_vminsn_type = 70; +pub const YARVINSN_opt_mult: ruby_vminsn_type = 71; +pub const YARVINSN_opt_div: ruby_vminsn_type = 72; +pub const YARVINSN_opt_mod: ruby_vminsn_type = 73; +pub const YARVINSN_opt_eq: ruby_vminsn_type = 74; +pub const YARVINSN_opt_neq: ruby_vminsn_type = 75; +pub const YARVINSN_opt_lt: ruby_vminsn_type = 76; +pub const YARVINSN_opt_le: ruby_vminsn_type = 77; +pub const YARVINSN_opt_gt: ruby_vminsn_type = 78; +pub const YARVINSN_opt_ge: ruby_vminsn_type = 79; +pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 80; +pub const YARVINSN_opt_and: ruby_vminsn_type = 81; +pub const YARVINSN_opt_or: ruby_vminsn_type = 82; +pub const YARVINSN_opt_aref: ruby_vminsn_type = 83; +pub const YARVINSN_opt_aset: ruby_vminsn_type = 84; +pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 85; +pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 86; +pub const YARVINSN_opt_length: ruby_vminsn_type = 87; +pub const YARVINSN_opt_size: ruby_vminsn_type = 88; +pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 89; +pub const YARVINSN_opt_succ: ruby_vminsn_type = 90; +pub const YARVINSN_opt_not: ruby_vminsn_type = 91; +pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 92; +pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 93; +pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 94; +pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 95; +pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 96; +pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 97; +pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 98; +pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 99; +pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 100; +pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 101; +pub const YARVINSN_trace_nop: ruby_vminsn_type = 102; +pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 103; +pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 104; +pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 105; +pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 106; +pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 107; +pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 108; +pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 109; +pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 110; +pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 111; +pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 112; +pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 113; +pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 114; +pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 115; +pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 116; +pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 117; +pub const YARVINSN_trace_putnil: ruby_vminsn_type = 118; +pub const YARVINSN_trace_putself: ruby_vminsn_type = 119; +pub const YARVINSN_trace_putobject: ruby_vminsn_type = 120; +pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 121; +pub const YARVINSN_trace_putstring: ruby_vminsn_type = 122; +pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 123; +pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 124; +pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 125; +pub const YARVINSN_trace_intern: ruby_vminsn_type = 126; +pub const YARVINSN_trace_newarray: ruby_vminsn_type = 127; +pub const YARVINSN_trace_newarraykwsplat: ruby_vminsn_type = 128; +pub const YARVINSN_trace_duparray: ruby_vminsn_type = 129; +pub const YARVINSN_trace_duphash: ruby_vminsn_type = 130; +pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 131; +pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 132; +pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 133; +pub const YARVINSN_trace_newhash: ruby_vminsn_type = 134; +pub const YARVINSN_trace_newrange: ruby_vminsn_type = 135; +pub const YARVINSN_trace_pop: ruby_vminsn_type = 136; +pub const YARVINSN_trace_dup: ruby_vminsn_type = 137; +pub const YARVINSN_trace_dupn: ruby_vminsn_type = 138; +pub const YARVINSN_trace_swap: ruby_vminsn_type = 139; +pub const YARVINSN_trace_opt_reverse: ruby_vminsn_type = 140; +pub const YARVINSN_trace_topn: ruby_vminsn_type = 141; +pub const YARVINSN_trace_setn: ruby_vminsn_type = 142; +pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 143; +pub const YARVINSN_trace_defined: ruby_vminsn_type = 144; +pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 145; +pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 146; +pub const YARVINSN_trace_checktype: ruby_vminsn_type = 147; +pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 148; +pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 149; +pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 150; +pub const YARVINSN_trace_send: ruby_vminsn_type = 151; +pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 152; +pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 153; +pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 154; +pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 155; +pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 156; +pub const YARVINSN_trace_opt_newarray_max: ruby_vminsn_type = 157; +pub const YARVINSN_trace_opt_newarray_min: ruby_vminsn_type = 158; +pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 159; +pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 160; +pub const YARVINSN_trace_leave: ruby_vminsn_type = 161; +pub const YARVINSN_trace_throw: ruby_vminsn_type = 162; +pub const YARVINSN_trace_jump: ruby_vminsn_type = 163; +pub const YARVINSN_trace_branchif: ruby_vminsn_type = 164; +pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 165; +pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 166; +pub const YARVINSN_trace_opt_getinlinecache: ruby_vminsn_type = 167; +pub const YARVINSN_trace_opt_setinlinecache: ruby_vminsn_type = 168; +pub const YARVINSN_trace_once: ruby_vminsn_type = 169; +pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 170; +pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 171; +pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 172; +pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 173; +pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 174; +pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 175; +pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 176; +pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 177; +pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 178; +pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 179; +pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 180; +pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 181; +pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 182; +pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 183; +pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 184; +pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 185; +pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 186; +pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 187; +pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 188; +pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 189; +pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 190; +pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 191; +pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 192; +pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 193; +pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 194; +pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 195; +pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 196; +pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 197; +pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 198; +pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 199; +pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 200; +pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 201; +pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 202; +pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 203; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 204; pub type ruby_vminsn_type = u32; extern "C" { pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; From 1139bc8c20d243b17f159d6c6518df17fcf887fe Mon Sep 17 00:00:00 2001 From: git Date: Wed, 10 Aug 2022 07:05:11 +0000 Subject: [PATCH 073/546] Update bundled gems list at 2022-08-10 --- NEWS.md | 2 +- gems/bundled_gems | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7d42ff2f4dbd63..4dd5eb112ac21e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -179,7 +179,7 @@ Note: We're only listing outstanding class updates. * net-imap 0.2.3 * rbs 2.6.0 * typeprof 0.21.3 - * debug 1.6.1 + * debug 1.6.2 * The following default gems are now bundled gems. ## Compatibility issues diff --git a/gems/bundled_gems b/gems/bundled_gems index 4bf063abff3b34..2699ecda8b2299 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -13,4 +13,4 @@ matrix 0.4.2 https://github.com/ruby/matrix prime 0.1.2 https://github.com/ruby/prime rbs 2.6.0 https://github.com/ruby/rbs 14abbbae8885a09a2ed82de2ef31d67a9c0a108d typeprof 0.21.3 https://github.com/ruby/typeprof -debug 1.6.1 https://github.com/ruby/debug +debug 1.6.2 https://github.com/ruby/debug From 99e7fa5b3718d61d61427c67d244ec50b9eb8578 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Wed, 10 Aug 2022 18:36:59 +0900 Subject: [PATCH 074/546] [ruby/error_highlight] Make ErrorHighlight.spot accept Exception (https://github.com/ruby/error_highlight/pull/25) ... and move things from core_ext.rb to base.rb. This will confine CRuby-dependent things to ErrorHighlight.spot. https://github.com/ruby/error_highlight/commit/22d1dd7824 --- lib/error_highlight/base.rb | 58 +++++++++++++++++--- lib/error_highlight/core_ext.rb | 33 +---------- test/error_highlight/test_error_highlight.rb | 2 +- 3 files changed, 55 insertions(+), 38 deletions(-) diff --git a/lib/error_highlight/base.rb b/lib/error_highlight/base.rb index 8392979e245784..51f1ce369d21e5 100644 --- a/lib/error_highlight/base.rb +++ b/lib/error_highlight/base.rb @@ -1,12 +1,17 @@ require_relative "version" module ErrorHighlight - # Identify the code fragment that seems associated with a given error + # Identify the code fragment at that a given exception occurred. # - # Arguments: - # node: RubyVM::AbstractSyntaxTree::Node (script_lines should be enabled) - # point_type: :name | :args - # name: The name associated with the NameError/NoMethodError + # Options: + # + # point_type: :name | :args + # :name (default) points the method/variable name that the exception occurred. + # :args points the arguments of the method call that the exception occurred. + # + # backtrace_location: Thread::Backtrace::Location + # It locates the code fragment of the given backtrace_location. + # By default, it uses the first frame of backtrace_locations of the given exception. # # Returns: # { @@ -15,9 +20,47 @@ module ErrorHighlight # last_lineno: Integer, # last_column: Integer, # snippet: String, + # script_lines: [String], # } | nil - def self.spot(...) - Spotter.new(...).spot + def self.spot(obj, **opts) + case obj + when Exception + exc = obj + opts = { point_type: opts.fetch(:point_type, :name) } + + loc = opts[:backtrace_location] + unless loc + case exc + when TypeError, ArgumentError + opts[:point_type] = :args + end + + locs = exc.backtrace_locations + return nil unless locs + + loc = locs.first + return nil unless loc + + opts[:name] = exc.name if NameError === obj + end + + node = RubyVM::AbstractSyntaxTree.of(loc, keep_script_lines: true) + + Spotter.new(node, **opts).spot + + when RubyVM::AbstractSyntaxTree::Node + # Just for compatibility + Spotter.new(node, **opts).spot + + else + raise TypeError, "Exception is expected" + end + + rescue SyntaxError, + SystemCallError, # file not found or something + ArgumentError # eval'ed code + + return nil end class Spotter @@ -122,6 +165,7 @@ def spot last_lineno: @end_lineno, last_column: @end_column, snippet: @snippet, + script_lines: @node.script_lines, } else return nil diff --git a/lib/error_highlight/core_ext.rb b/lib/error_highlight/core_ext.rb index 53e409dd8f6f8c..130f9ef832e478 100644 --- a/lib/error_highlight/core_ext.rb +++ b/lib/error_highlight/core_ext.rb @@ -3,36 +3,9 @@ module ErrorHighlight module CoreExt private def generate_snippet - locs = backtrace_locations - return "" unless locs - - loc = locs.first - return "" unless loc - - begin - node = RubyVM::AbstractSyntaxTree.of(loc, keep_script_lines: true) - opts = {} - - case self - when NoMethodError, NameError - opts[:point_type] = :name - opts[:name] = name - when TypeError, ArgumentError - opts[:point_type] = :args - end - - spot = ErrorHighlight.spot(node, **opts) - - rescue SyntaxError - rescue SystemCallError # file not found or something - rescue ArgumentError # eval'ed code - end - - if spot - return ErrorHighlight.formatter.message_for(spot) - end - - "" + spot = ErrorHighlight.spot(self) + return "" unless spot + return ErrorHighlight.formatter.message_for(spot) end if Exception.method_defined?(:detailed_message) diff --git a/test/error_highlight/test_error_highlight.rb b/test/error_highlight/test_error_highlight.rb index 5b7c05e5f49607..89c930f5e62bd9 100644 --- a/test/error_highlight/test_error_highlight.rb +++ b/test/error_highlight/test_error_highlight.rb @@ -1150,7 +1150,7 @@ def v.foo; 1; end def test_custom_formatter custom_formatter = Object.new def custom_formatter.message_for(spot) - "\n\n" + spot.inspect + "\n\n" + spot.except(:script_lines).inspect end original_formatter, ErrorHighlight.formatter = ErrorHighlight.formatter, custom_formatter From 3a58009066049f9af09ea3171cecab88ba0d9f1c Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Wed, 10 Aug 2022 21:17:04 +0900 Subject: [PATCH 075/546] [ruby/error_highlight] Make backtrace_location keyword work We had to keep backtrace_location before opts is overwritten. https://github.com/ruby/error_highlight/commit/2735e4681a --- lib/error_highlight/base.rb | 4 ++- test/error_highlight/test_error_highlight.rb | 26 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/error_highlight/base.rb b/lib/error_highlight/base.rb index 51f1ce369d21e5..dbd173a5cd4768 100644 --- a/lib/error_highlight/base.rb +++ b/lib/error_highlight/base.rb @@ -26,9 +26,9 @@ def self.spot(obj, **opts) case obj when Exception exc = obj + loc = opts[:backtrace_location] opts = { point_type: opts.fetch(:point_type, :name) } - loc = opts[:backtrace_location] unless loc case exc when TypeError, ArgumentError @@ -44,6 +44,8 @@ def self.spot(obj, **opts) opts[:name] = exc.name if NameError === obj end + return nil unless Thread::Backtrace::Location === loc + node = RubyVM::AbstractSyntaxTree.of(loc, keep_script_lines: true) Spotter.new(node, **opts).spot diff --git a/test/error_highlight/test_error_highlight.rb b/test/error_highlight/test_error_highlight.rb index 89c930f5e62bd9..c4a998092b5686 100644 --- a/test/error_highlight/test_error_highlight.rb +++ b/test/error_highlight/test_error_highlight.rb @@ -1231,4 +1231,30 @@ def test_spoofed_filename end end end + + def raise_name_error + 1.time + end + + def test_spot_with_backtrace_location + lineno = __LINE__ + begin + raise_name_error + rescue NameError => exc + end + + spot = ErrorHighlight.spot(exc).except(:script_lines) + assert_equal(lineno - 4, spot[:first_lineno]) + assert_equal(lineno - 4, spot[:last_lineno]) + assert_equal(5, spot[:first_column]) + assert_equal(10, spot[:last_column]) + assert_equal(" 1.time\n", spot[:snippet]) + + spot = ErrorHighlight.spot(exc, backtrace_location: exc.backtrace_locations[1]).except(:script_lines) + assert_equal(lineno + 2, spot[:first_lineno]) + assert_equal(lineno + 2, spot[:last_lineno]) + assert_equal(6, spot[:first_column]) + assert_equal(22, spot[:last_column]) + assert_equal(" raise_name_error\n", spot[:snippet]) + end end From 1b32a4c7bb2e1ceb456b191ca88289d3c3d73dd7 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Wed, 10 Aug 2022 21:50:48 +0900 Subject: [PATCH 076/546] [ruby/error_highlight] Bump version https://github.com/ruby/error_highlight/commit/6edf0a0a5d --- lib/error_highlight/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/error_highlight/version.rb b/lib/error_highlight/version.rb index 49a34502cb923a..4279b6d05fc9ff 100644 --- a/lib/error_highlight/version.rb +++ b/lib/error_highlight/version.rb @@ -1,3 +1,3 @@ module ErrorHighlight - VERSION = "0.3.0" + VERSION = "0.4.0" end From 2ebb428fd9cde6c21a3f7578acc996b6ca9c8efc Mon Sep 17 00:00:00 2001 From: git Date: Wed, 10 Aug 2022 12:52:38 +0000 Subject: [PATCH 077/546] Update default gems list at 1b32a4c7bb2e1ceb456b191ca88289 [ci skip] --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 4dd5eb112ac21e..b391702821b654 100644 --- a/NEWS.md +++ b/NEWS.md @@ -159,6 +159,7 @@ Note: We're only listing outstanding class updates. * bundler 2.4.0.dev * cgi 0.3.2 * date 3.2.3 + * error_highlight 0.4.0 * etc 1.4.0 * io-console 0.5.11 * io-nonblock 0.1.1 From a661aac9a76cbe93f445507c90dcbab5f3f3196d Mon Sep 17 00:00:00 2001 From: Soutaro Matsumoto Date: Wed, 10 Aug 2022 23:01:49 +0900 Subject: [PATCH 078/546] Bundle unreleased RBS (#6228) --- gems/bundled_gems | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gems/bundled_gems b/gems/bundled_gems index 2699ecda8b2299..a053812f0a8146 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -11,6 +11,6 @@ net-pop 0.1.1 https://github.com/ruby/net-pop net-smtp 0.3.1 https://github.com/ruby/net-smtp matrix 0.4.2 https://github.com/ruby/matrix prime 0.1.2 https://github.com/ruby/prime -rbs 2.6.0 https://github.com/ruby/rbs 14abbbae8885a09a2ed82de2ef31d67a9c0a108d +rbs 2.6.0 https://github.com/ruby/rbs 5202d4eeed3257448f19004b4baac4bcf4127717 typeprof 0.21.3 https://github.com/ruby/typeprof debug 1.6.2 https://github.com/ruby/debug From 26bed71959f74a5aa7fea5608d09cc3a708b4068 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Wed, 10 Aug 2022 13:40:50 -0500 Subject: [PATCH 079/546] [DOC] Adding a few standards-based formats (#6227) --- doc/strftime_formatting.rdoc | 172 ++++++++++++++++++++++++----------- 1 file changed, 118 insertions(+), 54 deletions(-) diff --git a/doc/strftime_formatting.rdoc b/doc/strftime_formatting.rdoc index 6c27fa6a2379ec..30a629bf683083 100644 --- a/doc/strftime_formatting.rdoc +++ b/doc/strftime_formatting.rdoc @@ -294,6 +294,124 @@ longhand specifier. DateTime.now.strftime('%a %b %e %H:%M:%S %Z %Y') # => "Wed Jun 29 08:32:18 -05:00 2022" +=== Flags + +Flags may affect certain formatting specifications. + +Multiple flags may be given with a single conversion specified; +order does not matter. + +==== Padding Flags + +- 0 - Pad with zeroes: + + Time.new(10).strftime('%0Y') # => "0010" + +- _ - Pad with blanks: + + Time.new(10).strftime('%_Y') # => " 10" + +- - - Don't pad: + + Time.new(10).strftime('%-Y') # => "10" + +==== Casing Flags + +- ^ - Upcase result: + + Time.new(2022, 1).strftime('%B') # => "January" # No casing flag. + Time.new(2022, 1).strftime('%^B') # => "JANUARY" + +- # - Swapcase result: + + Time.now.strftime('%p') # => "AM" + Time.now.strftime('%^p') # => "AM" + Time.now.strftime('%#p') # => "am" + +==== Timezone Flags + +- : - Put timezone as colon-separated hours and minutes: + + Time.now.strftime('%:z') # => "-05:00" + +- :: - Put timezone as colon-separated hours, minutes, and seconds: + + Time.now.strftime('%::z') # => "-05:00:00" + +=== Width Specifiers + +The integer width specifier gives a minimum width for the returned string: + + Time.new(2002).strftime('%Y') # => "2002" # No width specifier. + Time.new(2002).strftime('%10Y') # => "0000002002" + Time.new(2002, 12).strftime('%B') # => "December" # No width specifier. + Time.new(2002, 12).strftime('%10B') # => " December" + Time.new(2002, 12).strftime('%3B') # => "December" # Ignored if too small. + +== Specialized Format Strings + +Here are a few specialized format strings, +each based on an external standard. + +=== HTTP Format + +The HTTP date format is based on +{RFC 2616}[https://datatracker.ietf.org/doc/html/rfc2616], +and treats dates in the format '%a, %d %b %Y %T GMT': + + d = Date.new(2001, 2, 3) # => # + # Return HTTP-formatted string. + httpdate = d.httpdate # => "Sat, 03 Feb 2001 00:00:00 GMT" + # Return new date parsed from HTTP-formatted string. + Date.httpdate(httpdate) # => # + # Return hash parsed from HTTP-formatted string. + Date._httpdate(httpdate) + # => {:wday=>6, :mday=>3, :mon=>2, :year=>2001, :hour=>0, :min=>0, :sec=>0, :zone=>"GMT", :offset=>0} + +=== RFC 3339 Format + +The RFC 3339 date format is based on +{RFC 3339}[https://datatracker.ietf.org/doc/html/rfc3339]: + + d = Date.new(2001, 2, 3) # => # + # Return 3339-formatted string. + rfc3339 = d.rfc3339 # => "2001-02-03T00:00:00+00:00" + # Return new date parsed from 3339-formatted string. + Date.rfc3339(rfc3339) # => # + # Return hash parsed from 3339-formatted string. + Date._rfc3339(rfc3339) + # => {:year=>2001, :mon=>2, :mday=>3, :hour=>0, :min=>0, :sec=>0, :zone=>"+00:00", :offset=>0} + +=== RFC 2822 Format + +The RFC 2822 date format is based on +{RFC 2822}[https://datatracker.ietf.org/doc/html/rfc2822], +and treats dates in the format '%a, %-d %b %Y %T %z']: + + d = Date.new(2001, 2, 3) # => # + # Return 2822-formatted string. + rfc2822 = d.rfc2822 # => "Sat, 3 Feb 2001 00:00:00 +0000" + # Return new date parsed from 2822-formatted string. + Date.rfc2822(rfc2822) # => # + # Return hash parsed from 2822-formatted string. + Date._rfc2822(rfc2822) + # => {:wday=>6, :mday=>3, :mon=>2, :year=>2001, :hour=>0, :min=>0, :sec=>0, :zone=>"+0000", :offset=>0} + +=== JIS X 0301 Format + +The JIS X 0301 format includes the +{Japanese era name}[https://en.wikipedia.org/wiki/Japanese_era_name], +and treats dates in the format '%Y-%m-%d' +with the first letter of the romanized era name prefixed: + + d = Date.new(2001, 2, 3) # => # + # Return 0301-formatted string. + jisx0301 = d.jisx0301 # => "H13.02.03" + # Return new date parsed from 0301-formatted string. + Date.jisx0301(jisx0301) # => # + # Return hash parsed from 0301-formatted string. + Date._jisx0301(jisx0301) # => {:year=>2001, :mon=>2, :mday=>3} + === ISO 8601 Format Specifications This section shows format specifications that are compatible with @@ -407,57 +525,3 @@ separated by the letter +T+. For the relevant +strftime+ formats, see {Dates}[rdoc-ref:strftime_formatting.rdoc@Dates] and {Times}[rdoc-ref:strftime_formatting.rdoc@Times] above. - -=== Flags - -Flags may affect certain formatting specifications. - -Multiple flags may be given with a single conversion specified; -order does not matter. - -==== Padding Flags - -- 0 - Pad with zeroes: - - Time.new(10).strftime('%0Y') # => "0010" - -- _ - Pad with blanks: - - Time.new(10).strftime('%_Y') # => " 10" - -- - - Don't pad: - - Time.new(10).strftime('%-Y') # => "10" - -==== Casing Flags - -- ^ - Upcase result: - - Time.new(2022, 1).strftime('%B') # => "January" # No casing flag. - Time.new(2022, 1).strftime('%^B') # => "JANUARY" - -- # - Swapcase result: - - Time.now.strftime('%p') # => "AM" - Time.now.strftime('%^p') # => "AM" - Time.now.strftime('%#p') # => "am" - -==== Timezone Flags - -- : - Put timezone as colon-separated hours and minutes: - - Time.now.strftime('%:z') # => "-05:00" - -- :: - Put timezone as colon-separated hours, minutes, and seconds: - - Time.now.strftime('%::z') # => "-05:00:00" - -=== Width Specifiers - -The integer width specifier gives a minimum width for the returned string: - - Time.new(2002).strftime('%Y') # => "2002" # No width specifier. - Time.new(2002).strftime('%10Y') # => "0000002002" - Time.new(2002, 12).strftime('%B') # => "December" # No width specifier. - Time.new(2002, 12).strftime('%10B') # => " December" - Time.new(2002, 12).strftime('%3B') # => "December" # Ignored if too small. From d115a06037e900e1ba29d2293e1d9e4964499ff2 Mon Sep 17 00:00:00 2001 From: git Date: Thu, 11 Aug 2022 03:41:04 +0900 Subject: [PATCH 080/546] * 2022-08-11 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 22ed2490953f41..780a93e8dcc0de 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 10 +#define RUBY_RELEASE_DAY 11 #include "ruby/version.h" #include "ruby/internal/abi.h" From bfa6a8ddc84fffe0aef5a0f91b417167e124dbbf Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 10 Aug 2022 13:02:19 -0700 Subject: [PATCH 081/546] Only allow procs created by Symbol#to_proc to call public methods Fixes [Bug #18826] Co-authored-by: Nobuyoshi Nakada --- .../core/kernel/fixtures/warn_core_method.rb | 2 +- spec/ruby/core/symbol/to_proc_spec.rb | 27 ++++++++++++++ vm_insnhelper.c | 36 +++++++++++++++---- 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/spec/ruby/core/kernel/fixtures/warn_core_method.rb b/spec/ruby/core/kernel/fixtures/warn_core_method.rb index f5dee6b668aaed..fd825624047afa 100644 --- a/spec/ruby/core/kernel/fixtures/warn_core_method.rb +++ b/spec/ruby/core/kernel/fixtures/warn_core_method.rb @@ -1,6 +1,6 @@ raise 'should be run without RubyGems' if defined?(Gem) -def deprecated(n=1) +public def deprecated(n=1) # puts nil, caller(0), nil warn "use X instead", uplevel: n end diff --git a/spec/ruby/core/symbol/to_proc_spec.rb b/spec/ruby/core/symbol/to_proc_spec.rb index 47f2a939ab2d75..81939e0046c540 100644 --- a/spec/ruby/core/symbol/to_proc_spec.rb +++ b/spec/ruby/core/symbol/to_proc_spec.rb @@ -46,6 +46,33 @@ end end + ruby_version_is "3.2" do + it "only calls public methods" do + body = proc do + public def pub; @a << :pub end + protected def pro; @a << :pro end + private def pri; @a << :pri end + attr_reader :a + end + + @a = [] + singleton_class.class_eval(&body) + tap(&:pub) + proc{tap(&:pro)}.should raise_error(NoMethodError) + proc{tap(&:pri)}.should raise_error(NoMethodError) + @a.should == [:pub] + + @a = [] + c = Class.new(&body) + o = c.new + o.instance_variable_set(:@a, []) + o.tap(&:pub) + proc{tap(&:pro)}.should raise_error(NoMethodError) + proc{o.tap(&:pri)}.should raise_error(NoMethodError) + o.a.should == [:pub] + end + end + it "raises an ArgumentError when calling #call on the Proc without receiver" do -> { :object_id.to_proc.call diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 2c0a369a439001..1812f7ce71aead 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3182,9 +3182,11 @@ ci_missing_reason(const struct rb_callinfo *ci) return stat; } +static VALUE vm_call_method_missing(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling); + static VALUE vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, - struct rb_calling_info *calling, const struct rb_callinfo *ci, VALUE symbol) + struct rb_calling_info *calling, const struct rb_callinfo *ci, VALUE symbol, int flags) { ASSUME(calling->argc >= 0); /* Also assumes CALLER_SETUP_ARG is already done. */ @@ -3194,9 +3196,7 @@ vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, VALUE recv = calling->recv; VALUE klass = CLASS_OF(recv); ID mid = rb_check_id(&symbol); - int flags = VM_CALL_FCALL | - VM_CALL_OPT_SEND | - (calling->kw_splat ? VM_CALL_KW_SPLAT : 0); + flags |= VM_CALL_OPT_SEND | (calling->kw_splat ? VM_CALL_KW_SPLAT : 0); if (UNLIKELY(! mid)) { mid = idMethodMissing; @@ -3243,7 +3243,29 @@ vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, { .method_missing_reason = missing_reason }, rb_callable_method_entry_with_refinements(klass, mid, NULL)); - return vm_call_method(ec, reg_cfp, calling); + if (flags & VM_CALL_FCALL) { + return vm_call_method(ec, reg_cfp, calling); + } + + const struct rb_callcache *cc = calling->cc; + VM_ASSERT(callable_method_entry_p(vm_cc_cme(cc))); + + if (vm_cc_cme(cc) != NULL) { + switch (METHOD_ENTRY_VISI(vm_cc_cme(cc))) { + case METHOD_VISI_PUBLIC: /* likely */ + return vm_call_method_each_type(ec, reg_cfp, calling); + case METHOD_VISI_PRIVATE: + vm_cc_method_missing_reason_set(cc, MISSING_PRIVATE); + case METHOD_VISI_PROTECTED: + vm_cc_method_missing_reason_set(cc, MISSING_PROTECTED); + break; + default: + VM_UNREACHABLE(vm_call_method); + } + return vm_call_method_missing(ec, reg_cfp, calling); + } + + return vm_call_method_nome(ec, reg_cfp, calling); } static VALUE @@ -3283,7 +3305,7 @@ vm_call_opt_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct calling->argc -= 1; DEC_SP(1); - return vm_call_symbol(ec, reg_cfp, calling, calling->ci, sym); + return vm_call_symbol(ec, reg_cfp, calling, calling->ci, sym, VM_CALL_FCALL); } } @@ -4097,7 +4119,7 @@ vm_invoke_symbol_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, VALUE symbol = VM_BH_TO_SYMBOL(block_handler); CALLER_SETUP_ARG(reg_cfp, calling, ci); calling->recv = TOPN(--calling->argc); - return vm_call_symbol(ec, reg_cfp, calling, ci, symbol); + return vm_call_symbol(ec, reg_cfp, calling, ci, symbol, 0); } } From ff42e2359bdbf37e1721a82b4cfd95b31f494f3f Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Fri, 3 Jun 2022 18:15:56 -0700 Subject: [PATCH 082/546] Revert "Add {Method,UnboundMethod}#{public?,private?,protected?}" This reverts commit 27278150685e738f84105d09843d3ba371146c7a and 58dc8bf8f15df9a33d191074e8a5d4946a3d59d5. Visibility is an attribute of the method entry in a class, not an attribute of the Method object. Fixes [#18729] Fixes [#18751] Fixes [#18435] --- proc.c | 60 +--------------------------------------- test/ruby/test_method.rb | 53 ++--------------------------------- 2 files changed, 3 insertions(+), 110 deletions(-) diff --git a/proc.c b/proc.c index a525562230b3c8..c234ed3f9365b7 100644 --- a/proc.c +++ b/proc.c @@ -40,7 +40,6 @@ struct METHOD { const VALUE iclass; const rb_method_entry_t * const me; /* for bound methods, `me' should be rb_callable_method_entry_t * */ - rb_method_visibility_t visibility; }; VALUE rb_cUnboundMethod; @@ -1664,7 +1663,6 @@ mnew_missing(VALUE klass, VALUE obj, ID id, VALUE mclass) me = rb_method_entry_create(id, klass, METHOD_VISI_UNDEF, def); RB_OBJ_WRITE(method, &data->me, me); - data->visibility = METHOD_ENTRY_VISI(me); return method; } @@ -1722,7 +1720,6 @@ mnew_internal(const rb_method_entry_t *me, VALUE klass, VALUE iclass, RB_OBJ_WRITE(method, &data->klass, klass); RB_OBJ_WRITE(method, &data->iclass, iclass); RB_OBJ_WRITE(method, &data->me, me); - data->visibility = visi; return method; } @@ -1820,7 +1817,6 @@ method_eq(VALUE method, VALUE other) if (!rb_method_entry_eq(m1->me, m2->me) || klass1 != klass2 || - m1->visibility != m2->visibility || m1->klass != m2->klass || m1->recv != m2->recv) { return Qfalse; @@ -1874,7 +1870,6 @@ method_unbind(VALUE obj) RB_OBJ_WRITE(method, &data->klass, orig->klass); RB_OBJ_WRITE(method, &data->iclass, orig->iclass); RB_OBJ_WRITE(method, &data->me, rb_method_entry_clone(orig->me)); - data->visibility = orig->visibility; return method; } @@ -2390,7 +2385,6 @@ method_clone(VALUE self) RB_OBJ_WRITE(clone, &data->klass, orig->klass); RB_OBJ_WRITE(clone, &data->iclass, orig->iclass); RB_OBJ_WRITE(clone, &data->me, rb_method_entry_clone(orig->me)); - data->visibility = orig->visibility; return clone; } @@ -2641,7 +2635,6 @@ umethod_bind(VALUE method, VALUE recv) RB_OBJ_WRITE(method, &bound->klass, klass); RB_OBJ_WRITE(method, &bound->iclass, iclass); RB_OBJ_WRITE(method, &bound->me, me); - bound->visibility = data->visibility; return method; } @@ -2677,7 +2670,7 @@ umethod_bind_call(int argc, VALUE *argv, VALUE method) VALUE methclass, klass, iclass; const rb_method_entry_t *me; convert_umethod_to_method_components(data, recv, &methclass, &klass, &iclass, &me); - struct METHOD bound = { recv, klass, 0, me, METHOD_ENTRY_VISI(me) }; + struct METHOD bound = { recv, klass, 0, me }; return call_method_data(ec, &bound, argc, argv, passed_procval, RB_PASS_CALLED_KEYWORDS); } @@ -3354,51 +3347,6 @@ method_super_method(VALUE method) return mnew_internal(me, me->owner, iclass, data->recv, mid, rb_obj_class(method), FALSE, FALSE); } -/* - * call-seq: - * meth.public? -> true or false - * - * Returns whether the method is public. - */ - -static VALUE -method_public_p(VALUE method) -{ - const struct METHOD *data; - TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); - return RBOOL(data->visibility == METHOD_VISI_PUBLIC); -} - -/* - * call-seq: - * meth.protected? -> true or false - * - * Returns whether the method is protected. - */ - -static VALUE -method_protected_p(VALUE method) -{ - const struct METHOD *data; - TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); - return RBOOL(data->visibility == METHOD_VISI_PROTECTED); -} - -/* - * call-seq: - * meth.private? -> true or false - * - * Returns whether the method is private. - */ - -static VALUE -method_private_p(VALUE method) -{ - const struct METHOD *data; - TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); - return RBOOL(data->visibility == METHOD_VISI_PRIVATE); -} - /* * call-seq: * local_jump_error.exit_value -> obj @@ -4339,9 +4287,6 @@ Init_Proc(void) rb_define_method(rb_cMethod, "source_location", rb_method_location, 0); rb_define_method(rb_cMethod, "parameters", rb_method_parameters, 0); rb_define_method(rb_cMethod, "super_method", method_super_method, 0); - rb_define_method(rb_cMethod, "public?", method_public_p, 0); - rb_define_method(rb_cMethod, "protected?", method_protected_p, 0); - rb_define_method(rb_cMethod, "private?", method_private_p, 0); rb_define_method(rb_mKernel, "method", rb_obj_method, 1); rb_define_method(rb_mKernel, "public_method", rb_obj_public_method, 1); rb_define_method(rb_mKernel, "singleton_method", rb_obj_singleton_method, 1); @@ -4365,9 +4310,6 @@ Init_Proc(void) rb_define_method(rb_cUnboundMethod, "source_location", rb_method_location, 0); rb_define_method(rb_cUnboundMethod, "parameters", rb_method_parameters, 0); rb_define_method(rb_cUnboundMethod, "super_method", method_super_method, 0); - rb_define_method(rb_cUnboundMethod, "public?", method_public_p, 0); - rb_define_method(rb_cUnboundMethod, "protected?", method_protected_p, 0); - rb_define_method(rb_cUnboundMethod, "private?", method_private_p, 0); /* Module#*_method */ rb_define_method(rb_cModule, "instance_method", rb_mod_instance_method, 1); diff --git a/test/ruby/test_method.rb b/test/ruby/test_method.rb index 83e499913a4bb4..56e94493d9860c 100644 --- a/test/ruby/test_method.rb +++ b/test/ruby/test_method.rb @@ -199,11 +199,6 @@ def m.foo; end assert_equal(o.method(:foo), o.method(:foo)) assert_equal(o.method(:foo), o.method(:bar)) assert_not_equal(o.method(:foo), o.method(:baz)) - - class << o - private :bar - end - assert_not_equal(o.method(:foo), o.method(:bar)) end def test_hash @@ -330,8 +325,8 @@ class << PUBLIC_SINGLETON_TEST def PUBLIC_SINGLETON_TEST.def; end end def test_define_singleton_method_public - assert_equal(true, PUBLIC_SINGLETON_TEST.method(:dsm).public?) - assert_equal(true, PUBLIC_SINGLETON_TEST.method(:def).public?) + assert_nil(PUBLIC_SINGLETON_TEST.dsm) + assert_nil(PUBLIC_SINGLETON_TEST.def) end def test_define_singleton_method_no_proc @@ -1197,50 +1192,6 @@ def foo assert_nil(super_method) end - def test_method_visibility_predicates - v = Visibility.new - assert_equal(true, v.method(:mv1).public?) - assert_equal(true, v.method(:mv2).private?) - assert_equal(true, v.method(:mv3).protected?) - assert_equal(false, v.method(:mv2).public?) - assert_equal(false, v.method(:mv3).private?) - assert_equal(false, v.method(:mv1).protected?) - end - - def test_unbound_method_visibility_predicates - assert_equal(true, Visibility.instance_method(:mv1).public?) - assert_equal(true, Visibility.instance_method(:mv2).private?) - assert_equal(true, Visibility.instance_method(:mv3).protected?) - assert_equal(false, Visibility.instance_method(:mv2).public?) - assert_equal(false, Visibility.instance_method(:mv3).private?) - assert_equal(false, Visibility.instance_method(:mv1).protected?) - end - - class VisibilitySub < Visibility - protected :mv1 - public :mv2 - private :mv3 - end - - def test_method_visibility_predicates_with_subclass_visbility_change - v = VisibilitySub.new - assert_equal(false, v.method(:mv1).public?) - assert_equal(false, v.method(:mv2).private?) - assert_equal(false, v.method(:mv3).protected?) - assert_equal(true, v.method(:mv2).public?) - assert_equal(true, v.method(:mv3).private?) - assert_equal(true, v.method(:mv1).protected?) - end - - def test_unbound_method_visibility_predicates_with_subclass_visbility_change - assert_equal(false, VisibilitySub.instance_method(:mv1).public?) - assert_equal(false, VisibilitySub.instance_method(:mv2).private?) - assert_equal(false, VisibilitySub.instance_method(:mv3).protected?) - assert_equal(true, VisibilitySub.instance_method(:mv2).public?) - assert_equal(true, VisibilitySub.instance_method(:mv3).private?) - assert_equal(true, VisibilitySub.instance_method(:mv1).protected?) - end - def rest_parameter(*rest) rest end From 9fc401b689e64dde5fc7cc56c734d5cddd6aa6e1 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Wed, 10 Aug 2022 16:18:27 -0500 Subject: [PATCH 083/546] [ruby/rdoc] Treat text markup (italic, bold, monofont) as blocks (https://github.com/ruby/rdoc/pull/911) https://github.com/ruby/rdoc/commit/dc88f1b425 --- doc/rdoc/markup_reference.rb | 367 ++++++++++++++++++++++++----------- 1 file changed, 253 insertions(+), 114 deletions(-) diff --git a/doc/rdoc/markup_reference.rb b/doc/rdoc/markup_reference.rb index c59f12cfe3c59e..b3d1d97f4f61be 100644 --- a/doc/rdoc/markup_reference.rb +++ b/doc/rdoc/markup_reference.rb @@ -51,23 +51,37 @@ # # === Blocks # -# It's convenient to think of markup input as a sequence of _blocks_, -# such as: -# -# - {Paragraphs}[rdoc-ref:RDoc::MarkupReference@Paragraphs]. -# - {Verbatim text blocks}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]. -# - {Code blocks}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]. -# - {Block quotes}[rdoc-ref:RDoc::MarkupReference@Block+Quotes]. -# - {Bullet lists}[rdoc-ref:RDoc::MarkupReference@Bullet+Lists]. -# - {Numbered lists}[rdoc-ref:RDoc::MarkupReference@Numbered+Lists]. -# - {Lettered lists}[rdoc-ref:RDoc::MarkupReference@Lettered+Lists]. -# - {Labeled lists}[rdoc-ref:RDoc::MarkupReference@Labeled+Lists]. -# - {Headings}[rdoc-ref:RDoc::MarkupReference@Headings]. -# - {Horizontal rules}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]. -# - {Directives}[rdoc-ref:RDoc::MarkupReference@Directives]. -# -# All of these except paragraph blocks are distinguished by indentation, -# or by unusual initial or embedded characters. +# It's convenient to think of \RDoc markup input as a sequence of _blocks_ +# of various types (details at the links): +# +# - {Paragraph}[rdoc-ref:RDoc::MarkupReference@Paragraphs]: +# an ordinary paragraph. +# - {Verbatim text block}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]: +# a block of text to be rendered literally. +# - {Code block}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]: +# a verbatim text block containing Ruby code, +# to be rendered with code highlighting. +# - {Block quote}[rdoc-ref:RDoc::MarkupReference@Block+Quotes]: +# a longish quoted passage, to be rendered with indentation +# instead of quote marks. +# - {List}[rdoc-ref:RDoc::MarkupReference@Lists]: items for +# a bullet list, numbered list, lettered list, or labeled list. +# - {Heading}[rdoc-ref:RDoc::MarkupReference@Headings]: +# a section heading. +# - {Horizontal rule}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]: +# a line across the rendered page. +# - {Directive}[rdoc-ref:RDoc::MarkupReference@Directives]: +# various special directions for the rendering. +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]: +# text to be rendered in italic, bold, or monofont. +# +# About the blocks: +# +# - Except for a paragraph, a block is distinguished by its indentation, +# or by unusual initial or embedded characters. +# - Any block may appear independently +# (that is, not nested in another block); +# some blocks may be nested, as detailed below. # # ==== Paragraphs # @@ -98,12 +112,13 @@ # # A paragraph may contain nested blocks, including: # -# - Verbatim text blocks. -# - Code blocks. -# - Block quotes. -# - Lists of any type. -# - Headings. -# - Horizontal rules. +# - {Verbatim text blocks}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]. +# - {Code blocks}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]. +# - {Block quotes}[rdoc-ref:RDoc::MarkupReference@Block+Quotes]. +# - {Lists}[rdoc-ref:RDoc::MarkupReference@Lists]. +# - {Headings}[rdoc-ref:RDoc::MarkupReference@Headings]. +# - {Horizontal rules}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]. +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]. # # ==== Verbatim Text Blocks # @@ -140,6 +155,9 @@ # # This is not verbatim text. # +# A verbatim text block may not contain nested blocks of any kind +# -- it's verbatim. +# # ==== Code Blocks # # A special case of verbatim text is the code block, @@ -173,6 +191,9 @@ # Pro tip: If your indented Ruby code does not get highlighted, # it may contain a syntax error. # +# A code block may not contain nested blocks of any kind +# -- it's verbatim. +# # ==== Block Quotes # # You can use the characters >>> (unindented), @@ -181,6 +202,7 @@ # # Example input: # +# Here's a block quote: # >>> # Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer # commodo quam iaculis massa posuere, dictum fringilla justo pulvinar. @@ -194,27 +216,30 @@ # Rendered HTML: # # >>> -# Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer -# commodo quam iaculis massa posuere, dictum fringilla justo pulvinar. -# Quisque turpis erat, pharetra eu dui at, sollicitudin accumsan nulla. +# Here's a block quote: +# >>> +# Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer +# commodo quam iaculis massa posuere, dictum fringilla justo pulvinar. +# Quisque turpis erat, pharetra eu dui at, sollicitudin accumsan nulla. # -# Aenean congue ligula eu ligula molestie, eu pellentesque purus -# faucibus. In id leo non ligula condimentum lobortis. Duis vestibulum, -# diam in pellentesque aliquet, mi tellus placerat sapien, id euismod -# purus magna ut tortor. +# Aenean congue ligula eu ligula molestie, eu pellentesque purus +# faucibus. In id leo non ligula condimentum lobortis. Duis vestibulum, +# diam in pellentesque aliquet, mi tellus placerat sapien, id euismod +# purus magna ut tortor. +# +# Note that, unlike verbatim text, single newlines are not honored, +# but that a double newline begins a new paragraph in the block quote. # # A block quote may contain nested blocks, including: # # - Other block quotes. -# - Paragraphs. -# - Verbatim text blocks. -# - Code blocks. -# - Lists of any type. -# - Headings. -# - Horizontal rules. -# -# Note that, unlike verbatim text, single newlines are not honored, -# but that a double newline begins a new paragraph in the block quote. +# - {Paragraphs}[rdoc-ref:RDoc::MarkupReference@Paragraphs]. +# - {Verbatim text blocks}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]. +# - {Code blocks}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]. +# - {Lists}[rdoc-ref:RDoc::MarkupReference@Lists]. +# - {Headings}[rdoc-ref:RDoc::MarkupReference@Headings]. +# - {Horizontal rules}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]. +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]. # # ==== Lists # @@ -240,12 +265,13 @@ # A list item may contain nested blocks, including: # # - Other lists of any type. -# - Paragraphs. -# - Verbatim text blocks. -# - Code blocks. -# - Block quotes. -# - Headings. -# - Horizontal rules. +# - {Paragraphs}[rdoc-ref:RDoc::MarkupReference@Paragraphs]. +# - {Verbatim text blocks}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]. +# - {Code blocks}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]. +# - {Block quotes}[rdoc-ref:RDoc::MarkupReference@Block+Quotes]. +# - {Headings}[rdoc-ref:RDoc::MarkupReference@Headings]. +# - {Horizontal rules}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]. +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]. # # ===== Bullet Lists # @@ -370,30 +396,29 @@ # # ============Still a Heading (Level 6) # # \== Not a Heading # +# A heading may contain only one type of nested block: +# +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]. +# # ==== Horizontal Rules # -# A horizontal rule begins with three or more hyphens. +# A horizontal rule consists of a line with three or more hyphens +# and nothing more. # # Example input: # -# # ------ -# # Stuff between. -# # -# # \--- Not a horizontal rule. -# # -# # -- Also not a horizontal rule. -# # -# # --- +# --- +# --- Not a horizontal rule. +# +# -- Also not a horizontal rule. +# --- # # Rendered HTML: # >>> -# ------ -# Stuff between. -# -# \--- Not a horizontal rule. +# --- +# --- Not a horizontal rule. # # -- Also not a horizontal rule. -# # --- # # ==== Directives @@ -584,83 +609,197 @@ # # For C code, the directive may appear in a stand-alone comment # -# === Text Markup +# ==== Text Markup # -# Text in a paragraph, list item (any type), or heading -# may have markup formatting. +# Text markup is metatext that marks text as: # -# ==== Italic +# - Italic. +# - Bold. +# - Monofont. # -# A single word may be italicized by prefixed and suffixed underscores. +# Text markup may contain only one type of nested block: # -# Examples: +# - More text markup: +# italic, bold, monofont. # -# # _Word_ in paragraph. -# # - _Word_ in bullet list item. -# # 1. _Word_ in numbered list item. -# # a. _Word_ in lettered list item. -# # [_word_] _Word_ in labeled list item. -# # ====== _Word_ in heading +# ===== Italic # -# Any text may be italicized via HTML tag +i+ or +em+. +# Text may be marked as italic via HTML tag or . # -# Examples: +# Example input: # -# # Two words in paragraph. -# # - Two words in bullet list item. -# # 1. Two words in numbered list item. -# # a. Two words in lettered list item. -# # [Two words] Two words in labeled list item. -# # ====== Two words in heading +# Two words in a paragraph. # -# ==== Bold +# >>> +# Two words in a block quote. # -# A single word may be made bold by prefixed and suffixed asterisks. +# - Two words in a bullet list item. # -# Examples: +# ====== Two words in a Heading # -# # *Word* in paragraph. -# # - *Word* in bullet list item. -# # 1. *Word* in numbered list item. -# # a. *Word* in lettered list item. -# # [*word*] *Word* in labeled list item. -# # ====== *Word* in heading +# Italicized passage containing *bold* and +monofont+. # -# Any text may be made bold via HTML tag +b+. +# Rendered HTML: +# >>> +# Two words in a paragraph. # -# Examples: +# >>> +# Two words in a block quote. # -# # Two words in paragraph. -# # - Two words in bullet list item. -# # 1. Two words in numbered list item. -# # a. Two words in lettered list item. -# # [Two words] Two words in labeled list item. -# # ====== Two words in heading +# - Two words in a bullet list item. # -# ==== Monofont +# ====== Two words in a Heading # -# A single word may be made monofont -- sometimes called "typewriter font" -- -# by prefixed and suffixed plus-signs. +# Italicized passage containing *bold* and +monofont+. # -# Examples: +# A single word may be italicized via a shorthand: +# prefixed and suffixed underscores. # -# # +Word+ in paragraph. -# # - +Word+ in bullet list item. -# # 1. +Word+ in numbered list item. -# # a. +Word+ in lettered list item. -# # [+word+] +Word+ in labeled list item. -# # ====== +Word+ in heading +# Example input: # -# Any text may be made monofont via HTML tag +tt+ or +code+. +# _Word_ in a paragraph. # -# Examples: +# >>> +# _Word_ in a block quote. +# +# - _Word_ in a bullet list item. +# +# [_word_] _Word_ in a labeled list item. +# +# ====== _Word_ in a Heading +# +# Rendered HTML: +# >>> +# _Word_ in a paragraph. +# +# >>> +# _Word_ in a block quote. +# +# - _Word_ in a bullet list item. +# +# [_word_] _Word_ in a labeled list item. +# +# ====== _Word_ in a Heading +# +# ===== Bold +# +# Text may be marked as bold via HTML tag . +# +# Example input: +# +# Two words in a paragraph. +# +# >>> +# Two words in a block quote. +# +# - Two words in a bullet list item. +# +# ====== Two words in a Heading +# +# Bold passage containing _italics_ and +monofont+. +# +# Rendered HTML: +# +# >>> +# Two words in a paragraph. +# +# >>> +# Two words in a block quote. +# +# - Two words in a bullet list item. +# +# ====== Two words in a Heading +# +# Bold passage containing _italics_ and +monofont+. +# +# A single word may be made bold via a shorthand: +# prefixed and suffixed asterisks. +# +# Example input: +# +# *Word* in a paragraph. +# +# >>> +# *Word* in a block quote. +# +# - *Word* in a bullet list item. +# +# [*word*] *Word* in a labeled list item. +# +# ===== *Word* in a Heading +# +# Rendered HTML: +# +# >>> +# *Word* in a paragraph. +# +# >>> +# *Word* in a block quote. +# +# - *Word* in a bullet list item. +# +# [*word*] *Word* in a labeled list item. +# +# ===== *Word* in a Heading +# +# ===== Monofont +# +# Text may be marked as monofont +# -- sometimes called 'typewriter font' -- +# via HTML tag or . +# +# Example input: +# +# Two words in a paragraph. +# +# >>> +# Two words in a block quote. +# +# - Two words in a bullet list item. +# +# ====== Two words in heading +# +# Monofont passage containing _italics_ and *bold*. +# +# Rendered HTML: +# +# >>> +# Two words in a paragraph. +# +# >>> +# Two words in a block quote. +# +# - Two words in a bullet list item. +# +# ====== Two words in heading +# +# Monofont passage containing _italics_ and *bold*. +# +# A single word may be made monofont by a shorthand: +# prefixed and suffixed plus-signs. +# +# Example input: +# +# +Word+ in a paragraph. +# +# >>> +# +Word+ in a block quote. +# +# - +Word+ in a bullet list item. +# +# ====== +Word+ in a Heading +# +# Rendered HTML: +# +# >>> +# +Word+ in a paragraph. +# +# >>> +# +Word+ in a block quote. +# +# - +Word+ in a bullet list item. # -# # Two words in paragraph. -# # - Two words in bullet list item. -# # 1. Two words in numbered list item. -# # a. Two words in lettered list item. -# # [Two words] Two words in labeled list item. -# # ====== Two words in heading +# ====== +Word+ in a Heading # # ==== Escaping Text Markup # From 74d95744bdb9cf47b8fa07b9a9938c323658d9b9 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 11 Aug 2022 11:02:28 +0900 Subject: [PATCH 084/546] Add `--enable-devel` configure option Since `RUBY_DEVEL` in cppflags has no effect in the configure script and makefiles. --- .github/workflows/compilers.yml | 2 +- configure.ac | 10 +++++++--- template/Makefile.in | 4 +++- win32/configure.bat | 12 ++++++++++++ win32/setup.mak | 4 +++- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/.github/workflows/compilers.yml b/.github/workflows/compilers.yml index bd082039b316b1..8ef04f1ef233d2 100644 --- a/.github/workflows/compilers.yml +++ b/.github/workflows/compilers.yml @@ -147,6 +147,7 @@ jobs: - { name: disable-dln, env: { append_configure: '--disable-dln' } } - { name: enable-mkmf-verbose, env: { append_configure: '--enable-mkmf-verbose' } } - { name: disable-rubygems, env: { append_configure: '--disable-rubygems' } } + - { name: RUBY_DEVEL, env: { append_configure: '--enable-devel' } } - { name: OPT_THREADED_CODE=1, env: { cppflags: '-DOPT_THREADED_CODE=1' } } - { name: OPT_THREADED_CODE=2, env: { cppflags: '-DOPT_THREADED_CODE=2' } } @@ -154,7 +155,6 @@ jobs: - { name: NDEBUG, env: { cppflags: '-DNDEBUG' } } - { name: RUBY_DEBUG, env: { cppflags: '-DRUBY_DEBUG' } } - - { name: RUBY_DEVEL, env: { cppflags: '-DRUBY_DEVEL' } } # - { name: ARRAY_DEBUG, env: { cppflags: '-DARRAY_DEBUG' } } # - { name: BIGNUM_DEBUG, env: { cppflags: '-DBIGNUM_DEBUG' } } # - { name: CCAN_LIST_DEBUG, env: { cppflags: '-DCCAN_LIST_DEBUG' } } diff --git a/configure.ac b/configure.ac index 575bdf663155fd..bc21c262922161 100644 --- a/configure.ac +++ b/configure.ac @@ -622,8 +622,13 @@ AS_IF([test "$fdeclspec" = yes], [ RUBY_APPEND_OPTIONS(CXXFLAGS, -fdeclspec) ]) -AS_IF([test "x$RUBY_DEVEL" != xyes], [RUBY_DEVEL=no]) -particular_werror_flags=$RUBY_DEVEL +AC_ARG_ENABLE(devel, + AS_HELP_STRING([--enable-devel], [enable development build]), + [RUBY_DEVEL=$enableval], + [AS_IF([test "x${RUBY_DEVEL-no}" != xyes], [RUBY_DEVEL=])] +)dnl +AC_SUBST(RUBY_DEVEL) +particular_werror_flags=${RUBY_DEVEL:-no} AC_ARG_ENABLE(werror, AS_HELP_STRING([--disable-werror], [don't make warnings into errors @@ -867,7 +872,6 @@ AS_IF([test "$GCC" = yes], [ test "${debugflags+set}" || {RUBY_TRY_CFLAGS(-g3, [debugflags=-g3])} ]) test $ac_cv_prog_cc_g = yes && : ${debugflags=-g} -AS_IF([test "x$RUBY_DEVEL" = xyes], [RUBY_APPEND_OPTION(XCFLAGS, -DRUBY_DEVEL=1)]) AS_IF([test "$GCC" = ""], [ AS_CASE(["$target_os"],[aix*],[warnflags="$warnflags -qinfo=por" rb_cv_warnflags="$rb_cv_warnflags -qinfo=por"]) diff --git a/template/Makefile.in b/template/Makefile.in index a8581260b99147..756af363e4ba8e 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -89,7 +89,9 @@ optflags = @optflags@ debugflags = @debugflags@ warnflags = @warnflags@ @strict_warnflags@ cppflags = @cppflags@ -XCFLAGS = @XCFLAGS@ $(INCFLAGS) +RUBY_DEVEL = @RUBY_DEVEL@ # "yes" or empty +_RUBY_DEVEL_enabled = $(RUBY_DEVEL:no=) +XCFLAGS = @XCFLAGS@ $(INCFLAGS) $(_RUBY_DEVEL_enabled:yes=-DRUBY_DEVEL=1) USE_RUBYGEMS = @USE_RUBYGEMS@ USE_RUBYGEMS_ = $(USE_RUBYGEMS:yes=) CPPFLAGS = @CPPFLAGS@ $(USE_RUBYGEMS_:no=-DDISABLE_RUBYGEMS=1) diff --git a/win32/configure.bat b/win32/configure.bat index 573f8bf0e581c3..4602b41ec599a2 100755 --- a/win32/configure.bat +++ b/win32/configure.bat @@ -34,6 +34,8 @@ if "%1" == "--enable-install-static-library" goto :enable-lib if "%1" == "--disable-install-static-library" goto :disable-lib if "%1" == "--enable-debug-env" goto :enable-debug-env if "%1" == "--disable-debug-env" goto :disable-debug-env +if "%1" == "--enable-devel" goto :enable-devel +if "%1" == "--disable-devel" goto :disable-devel if "%1" == "--enable-rubygems" goto :enable-rubygems if "%1" == "--disable-rubygems" goto :disable-rubygems if "%1" == "--enable-mjit-support" goto :enable-mjit-support @@ -143,6 +145,16 @@ goto :loop ; echo>>confargs.tmp %1 \ shift goto :loop ; +:enable-devel + echo>> ~tmp~.mak "RUBY_DEVEL=yes" \ + echo>>confargs.tmp %1 \ + shift +goto :loop ; +:disable-devel + echo>> ~tmp~.mak "RUBY_DEVEL=no" \ + echo>>confargs.tmp %1 \ + shift +goto :loop ; :enable-rubygems echo>> ~tmp~.mak "USE_RUBYGEMS=yes" \ echo>>confargs.tmp %1 \ diff --git a/win32/setup.mak b/win32/setup.mak index c84d4066eabbdc..39323c61c2464f 100644 --- a/win32/setup.mak +++ b/win32/setup.mak @@ -80,6 +80,9 @@ $(BANG)else HAVE_BASERUBY = no $(BANG)endif << +!if "$(RUBY_DEVEL)" == "yes" + RUBY_DEVEL = yes +!endif !if "$(GIT)" != "" @echo GIT = $(GIT)>> $(MAKEFILE) !endif @@ -197,7 +200,6 @@ echo MINOR = RUBY_VERSION_MINOR echo TEENY = RUBY_VERSION_TEENY echo ABI_VERSION = RUBY_ABI_VERSION #if defined RUBY_PATCHLEVEL && RUBY_PATCHLEVEL < 0 -echo RUBY_DEVEL = yes #endif set /a MSC_VER = _MSC_VER #if _MSC_VER >= 1920 From 26054c74619d36b2781e872fad15a1a0bfab1be1 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 11 Aug 2022 15:21:03 +0900 Subject: [PATCH 085/546] Fix paths of exts.mk to clean exts.mk files are one level under the top of extension directories. --- template/exts.mk.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/template/exts.mk.tmpl b/template/exts.mk.tmpl index c5f8478d76d79c..5595a08da1d1dd 100644 --- a/template/exts.mk.tmpl +++ b/template/exts.mk.tmpl @@ -154,7 +154,7 @@ ext/extinit.<%=objext%>: % end $(Q)<%= submake %><%=mflags%> V=$(V) $(@F) % if /^(dist|real)clean$/ =~ tgt - $(Q)$(RM) $(@D)/exts.mk + $(Q)$(RM) <%=t[%r[\A(?:\.[^/]+/)?(?:[^/]+/){2}]]%>exts.mk $(Q)$(RMDIRS) $(@D) % end % end From 0c9803b0fdfd17981bd9f59767adab0207c3a74d Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 11 Aug 2022 17:09:17 +0900 Subject: [PATCH 086/546] The "gems" build directory was rename as ".bundle" --- common.mk | 12 ++++++------ template/Makefile.in | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/common.mk b/common.mk index 7e7e0bb67a8ad2..9f89e0ec8af26c 100644 --- a/common.mk +++ b/common.mk @@ -687,18 +687,18 @@ realclean-platform: distclean-platform realclean-spec: distclean-spec realclean-rubyspec: realclean-spec -clean-ext:: ext/clean gems/clean timestamp/clean -distclean-ext:: ext/distclean gems/distclean timestamp/distclean -realclean-ext:: ext/realclean gems/realclean timestamp/realclean +clean-ext:: ext/clean .bundle/clean timestamp/clean +distclean-ext:: ext/distclean .bundle/distclean timestamp/distclean +realclean-ext:: ext/realclean .bundle/realclean timestamp/realclean ext/clean.mk ext/distclean.mk ext/realclean.mk:: ext/clean:: ext/clean.mk ext/distclean:: ext/distclean.mk ext/realclean:: ext/realclean.mk -timestamp/clean:: ext/clean gems/clean -timestamp/distclean:: ext/distclean gems/distclean -timestamp/realclean:: ext/realclean gems/realclean +timestamp/clean:: ext/clean .bundle/clean +timestamp/distclean:: ext/distclean .bundle/distclean +timestamp/realclean:: ext/realclean .bundle/realclean timestamp/clean timestamp/distclean timestamp/realclean:: $(Q)$(RM) $(TIMESTAMPDIR)/.*.time $(TIMESTAMPDIR)/$(arch)/.time diff --git a/template/Makefile.in b/template/Makefile.in index 756af363e4ba8e..09d69c8d9ccc13 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -532,12 +532,12 @@ ext/clean.mk ext/distclean.mk ext/realclean.mk:: ext/clean:: ext/clean.sub ext/distclean:: ext/distclean.sub ext/realclean:: ext/realclean.sub -gems/clean:: gems/clean.sub -gems/distclean:: gems/distclean.sub -gems/realclean:: gems/realclean.sub +.bundle/clean:: .bundle/clean.sub +.bundle/distclean:: .bundle/distclean.sub +.bundle/realclean:: .bundle/realclean.sub ext/clean.sub ext/distclean.sub ext/realclean.sub \ -gems/clean.sub gems/distclean.sub gems/realclean.sub:: +.bundle/clean.sub .bundle/distclean.sub .bundle/realclean.sub:: $(Q) set dummy `echo "${EXTS}" | tr , ' '`; shift; \ test "$$#" = 0 && set .; \ set dummy `\ @@ -553,7 +553,7 @@ gems/clean.sub gems/distclean.sub gems/realclean.sub:: fi; \ done || true -ext/distclean ext/realclean gems/distclean gems/realclean:: +ext/distclean ext/realclean .bundle/distclean .bundle/realclean:: $(Q) set dummy `echo "${EXTS}" | tr , ' '`; shift; \ test "$$#" = 0 && set .; \ cd $(@D) 2>/dev/null && \ From 32d1ce96e09773e809d575c17b916012d88d6ffc Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 11 Aug 2022 17:42:56 +0900 Subject: [PATCH 087/546] Fix race conditions when cleaning extensions Clean built directories by `make distclean`, and then clean leftover makefiles for skipped extensions. --- template/Makefile.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/template/Makefile.in b/template/Makefile.in index 09d69c8d9ccc13..7968619f9a1a16 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -536,6 +536,10 @@ ext/realclean:: ext/realclean.sub .bundle/distclean:: .bundle/distclean.sub .bundle/realclean:: .bundle/realclean.sub +ext/clean.sub .bundle/clean.sub:: ext/clean.mk +ext/distclean.sub .bundle/distclean.sub:: ext/distclean.mk +ext/realclean.sub .bundle/realclean.sub:: ext/realclean.mk + ext/clean.sub ext/distclean.sub ext/realclean.sub \ .bundle/clean.sub .bundle/distclean.sub .bundle/realclean.sub:: $(Q) set dummy `echo "${EXTS}" | tr , ' '`; shift; \ From cfb9624460a295e4e1723301486d89058c228e07 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Thu, 11 Aug 2022 03:16:49 -0700 Subject: [PATCH 088/546] Fix Array#[] with ArithmeticSequence with negative steps (#5739) * Fix Array#[] with ArithmeticSequence with negative steps Previously, Array#[] when called with an ArithmeticSequence with a negative step did not handle all cases correctly, especially cases involving infinite ranges, inverted ranges, and/or exclusive ends. Fixes [Bug #18247] * Add Array#slice tests for ArithmeticSequence with negative step to test_array Add tests of rb_arithmetic_sequence_beg_len_step C-API function. * Fix ext/-test-/arith_seq/beg_len_step/depend * Rename local variables * Fix a variable name Co-authored-by: Kenta Murata <3959+mrkn@users.noreply.github.com> --- array.c | 5 +- enumerator.c | 7 + .../arith_seq/beg_len_step/beg_len_step.c | 19 +++ ext/-test-/arith_seq/beg_len_step/depend | 161 ++++++++++++++++++ ext/-test-/arith_seq/beg_len_step/extconf.rb | 2 + spec/ruby/core/array/shared/slice.rb | 96 +++++++++++ .../arith_seq/test_arith_seq_beg_len_step.rb | 52 ++++++ test/ruby/test_array.rb | 90 ++++++++++ 8 files changed, 431 insertions(+), 1 deletion(-) create mode 100644 ext/-test-/arith_seq/beg_len_step/beg_len_step.c create mode 100644 ext/-test-/arith_seq/beg_len_step/depend create mode 100644 ext/-test-/arith_seq/beg_len_step/extconf.rb create mode 100644 test/-ext-/arith_seq/test_arith_seq_beg_len_step.rb diff --git a/array.c b/array.c index b2ebf3c0e9ebee..793a53f17b24b2 100644 --- a/array.c +++ b/array.c @@ -1373,13 +1373,16 @@ ary_make_partial_step(VALUE ary, VALUE klass, long offset, long len, long step) const VALUE *values = RARRAY_CONST_PTR_TRANSIENT(ary); const long orig_len = len; - if ((step > 0 && step >= len) || (step < 0 && (step < -len))) { + if (step > 0 && step >= len) { VALUE result = ary_new(klass, 1); VALUE *ptr = (VALUE *)ARY_EMBED_PTR(result); RB_OBJ_WRITE(result, ptr, values[offset]); ARY_SET_EMBED_LEN(result, 1); return result; } + else if (step < 0 && step < -len) { + step = -len; + } long ustep = (step < 0) ? -step : step; len = (len + ustep - 1) / ustep; diff --git a/enumerator.c b/enumerator.c index d7546ee9e82aa5..2c9858cda6d0f5 100644 --- a/enumerator.c +++ b/enumerator.c @@ -3802,6 +3802,13 @@ rb_arithmetic_sequence_beg_len_step(VALUE obj, long *begp, long *lenp, long *ste *stepp = step; if (step < 0) { + if (aseq.exclude_end && !NIL_P(aseq.end)) { + /* Handle exclusion before range reversal */ + aseq.end = LONG2NUM(NUM2LONG(aseq.end) + 1); + + /* Don't exclude the previous beginning */ + aseq.exclude_end = 0; + } VALUE tmp = aseq.begin; aseq.begin = aseq.end; aseq.end = tmp; diff --git a/ext/-test-/arith_seq/beg_len_step/beg_len_step.c b/ext/-test-/arith_seq/beg_len_step/beg_len_step.c new file mode 100644 index 00000000000000..40c8cbee82b9fa --- /dev/null +++ b/ext/-test-/arith_seq/beg_len_step/beg_len_step.c @@ -0,0 +1,19 @@ +#include "ruby/ruby.h" + +static VALUE +arith_seq_s_beg_len_step(VALUE mod, VALUE obj, VALUE len, VALUE err) +{ + VALUE r; + long beg, len2, step; + + r = rb_arithmetic_sequence_beg_len_step(obj, &beg, &len2, &step, NUM2LONG(len), NUM2INT(err)); + + return rb_ary_new_from_args(4, r, LONG2NUM(beg), LONG2NUM(len2), LONG2NUM(step)); +} + +void +Init_beg_len_step(void) +{ + VALUE cArithSeq = rb_path2class("Enumerator::ArithmeticSequence"); + rb_define_singleton_method(cArithSeq, "__beg_len_step__", arith_seq_s_beg_len_step, 3); +} diff --git a/ext/-test-/arith_seq/beg_len_step/depend b/ext/-test-/arith_seq/beg_len_step/depend new file mode 100644 index 00000000000000..36a2c4c71b1214 --- /dev/null +++ b/ext/-test-/arith_seq/beg_len_step/depend @@ -0,0 +1,161 @@ +# AUTOGENERATED DEPENDENCIES START +beg_len_step.o: $(RUBY_EXTCONF_H) +beg_len_step.o: $(arch_hdrdir)/ruby/config.h +beg_len_step.o: $(hdrdir)/ruby/assert.h +beg_len_step.o: $(hdrdir)/ruby/backward.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/assume.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/attributes.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/bool.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/inttypes.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/limits.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/long_long.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/stdalign.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/stdarg.h +beg_len_step.o: $(hdrdir)/ruby/defines.h +beg_len_step.o: $(hdrdir)/ruby/intern.h +beg_len_step.o: $(hdrdir)/ruby/internal/abi.h +beg_len_step.o: $(hdrdir)/ruby/internal/anyargs.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/char.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/double.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/int.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/long.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/short.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/assume.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/artificial.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/cold.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/const.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/constexpr.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/deprecated.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/error.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/forceinline.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/format.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/noalias.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/noexcept.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/noinline.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/nonnull.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/noreturn.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/pure.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/restrict.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/warning.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/weakref.h +beg_len_step.o: $(hdrdir)/ruby/internal/cast.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_since.h +beg_len_step.o: $(hdrdir)/ruby/internal/config.h +beg_len_step.o: $(hdrdir)/ruby/internal/constant_p.h +beg_len_step.o: $(hdrdir)/ruby/internal/core.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rarray.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rbasic.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rbignum.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rclass.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rdata.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rfile.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rhash.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/robject.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rregexp.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rstring.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rstruct.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +beg_len_step.o: $(hdrdir)/ruby/internal/ctype.h +beg_len_step.o: $(hdrdir)/ruby/internal/dllexport.h +beg_len_step.o: $(hdrdir)/ruby/internal/dosish.h +beg_len_step.o: $(hdrdir)/ruby/internal/error.h +beg_len_step.o: $(hdrdir)/ruby/internal/eval.h +beg_len_step.o: $(hdrdir)/ruby/internal/event.h +beg_len_step.o: $(hdrdir)/ruby/internal/fl_type.h +beg_len_step.o: $(hdrdir)/ruby/internal/gc.h +beg_len_step.o: $(hdrdir)/ruby/internal/glob.h +beg_len_step.o: $(hdrdir)/ruby/internal/globals.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/attribute.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/builtin.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/c_attribute.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/extension.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/feature.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/warning.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/array.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/bignum.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/class.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/compar.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/complex.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/cont.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/dir.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/enum.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/enumerator.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/error.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/eval.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/file.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/gc.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/hash.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/io.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/load.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/marshal.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/numeric.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/object.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/parse.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/proc.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/process.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/random.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/range.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/rational.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/re.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/ruby.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/select.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/signal.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/sprintf.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/string.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/struct.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/thread.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/time.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/variable.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/vm.h +beg_len_step.o: $(hdrdir)/ruby/internal/interpreter.h +beg_len_step.o: $(hdrdir)/ruby/internal/iterator.h +beg_len_step.o: $(hdrdir)/ruby/internal/memory.h +beg_len_step.o: $(hdrdir)/ruby/internal/method.h +beg_len_step.o: $(hdrdir)/ruby/internal/module.h +beg_len_step.o: $(hdrdir)/ruby/internal/newobj.h +beg_len_step.o: $(hdrdir)/ruby/internal/rgengc.h +beg_len_step.o: $(hdrdir)/ruby/internal/scan_args.h +beg_len_step.o: $(hdrdir)/ruby/internal/special_consts.h +beg_len_step.o: $(hdrdir)/ruby/internal/static_assert.h +beg_len_step.o: $(hdrdir)/ruby/internal/stdalign.h +beg_len_step.o: $(hdrdir)/ruby/internal/stdbool.h +beg_len_step.o: $(hdrdir)/ruby/internal/symbol.h +beg_len_step.o: $(hdrdir)/ruby/internal/value.h +beg_len_step.o: $(hdrdir)/ruby/internal/value_type.h +beg_len_step.o: $(hdrdir)/ruby/internal/variable.h +beg_len_step.o: $(hdrdir)/ruby/internal/warning_push.h +beg_len_step.o: $(hdrdir)/ruby/internal/xmalloc.h +beg_len_step.o: $(hdrdir)/ruby/missing.h +beg_len_step.o: $(hdrdir)/ruby/ruby.h +beg_len_step.o: $(hdrdir)/ruby/st.h +beg_len_step.o: $(hdrdir)/ruby/subst.h +beg_len_step.o: beg_len_step.c +# AUTOGENERATED DEPENDENCIES END diff --git a/ext/-test-/arith_seq/beg_len_step/extconf.rb b/ext/-test-/arith_seq/beg_len_step/extconf.rb new file mode 100644 index 00000000000000..e72b3ad01f9c09 --- /dev/null +++ b/ext/-test-/arith_seq/beg_len_step/extconf.rb @@ -0,0 +1,2 @@ +# frozen_string_literal: false +create_makefile("-test-/arith_seq/beg_len_step") diff --git a/spec/ruby/core/array/shared/slice.rb b/spec/ruby/core/array/shared/slice.rb index 3b09fdcbc6fc2c..8fb33738b9ce6f 100644 --- a/spec/ruby/core/array/shared/slice.rb +++ b/spec/ruby/core/array/shared/slice.rb @@ -784,6 +784,102 @@ def to.to_int() -2 end a.send(@method, (...-9)).should == [] end + ruby_version_is "3.2" do + describe "can be sliced with Enumerator::ArithmeticSequence" do + it "with infinite/inverted ranges and negative steps" do + @array = [0, 1, 2, 3, 4, 5] + @array.send(@method, (2..).step(-1)).should == [2, 1, 0] + @array.send(@method, (2..).step(-2)).should == [2, 0] + @array.send(@method, (2..).step(-3)).should == [2] + @array.send(@method, (2..).step(-4)).should == [2] + + @array.send(@method, (-3..).step(-1)).should == [3, 2, 1, 0] + @array.send(@method, (-3..).step(-2)).should == [3, 1] + @array.send(@method, (-3..).step(-3)).should == [3, 0] + @array.send(@method, (-3..).step(-4)).should == [3] + @array.send(@method, (-3..).step(-5)).should == [3] + + @array.send(@method, (..0).step(-1)).should == [5, 4, 3, 2, 1, 0] + @array.send(@method, (..0).step(-2)).should == [5, 3, 1] + @array.send(@method, (..0).step(-3)).should == [5, 2] + @array.send(@method, (..0).step(-4)).should == [5, 1] + @array.send(@method, (..0).step(-5)).should == [5, 0] + @array.send(@method, (..0).step(-6)).should == [5] + @array.send(@method, (..0).step(-7)).should == [5] + + @array.send(@method, (...0).step(-1)).should == [5, 4, 3, 2, 1] + @array.send(@method, (...0).step(-2)).should == [5, 3, 1] + @array.send(@method, (...0).step(-3)).should == [5, 2] + @array.send(@method, (...0).step(-4)).should == [5, 1] + @array.send(@method, (...0).step(-5)).should == [5] + @array.send(@method, (...0).step(-6)).should == [5] + + @array.send(@method, (...1).step(-1)).should == [5, 4, 3, 2] + @array.send(@method, (...1).step(-2)).should == [5, 3] + @array.send(@method, (...1).step(-3)).should == [5, 2] + @array.send(@method, (...1).step(-4)).should == [5] + @array.send(@method, (...1).step(-5)).should == [5] + + @array.send(@method, (..-5).step(-1)).should == [5, 4, 3, 2, 1] + @array.send(@method, (..-5).step(-2)).should == [5, 3, 1] + @array.send(@method, (..-5).step(-3)).should == [5, 2] + @array.send(@method, (..-5).step(-4)).should == [5, 1] + @array.send(@method, (..-5).step(-5)).should == [5] + @array.send(@method, (..-5).step(-6)).should == [5] + + @array.send(@method, (...-5).step(-1)).should == [5, 4, 3, 2] + @array.send(@method, (...-5).step(-2)).should == [5, 3] + @array.send(@method, (...-5).step(-3)).should == [5, 2] + @array.send(@method, (...-5).step(-4)).should == [5] + @array.send(@method, (...-5).step(-5)).should == [5] + + @array.send(@method, (4..1).step(-1)).should == [4, 3, 2, 1] + @array.send(@method, (4..1).step(-2)).should == [4, 2] + @array.send(@method, (4..1).step(-3)).should == [4, 1] + @array.send(@method, (4..1).step(-4)).should == [4] + @array.send(@method, (4..1).step(-5)).should == [4] + + @array.send(@method, (4...1).step(-1)).should == [4, 3, 2] + @array.send(@method, (4...1).step(-2)).should == [4, 2] + @array.send(@method, (4...1).step(-3)).should == [4] + @array.send(@method, (4...1).step(-4)).should == [4] + + @array.send(@method, (-2..1).step(-1)).should == [4, 3, 2, 1] + @array.send(@method, (-2..1).step(-2)).should == [4, 2] + @array.send(@method, (-2..1).step(-3)).should == [4, 1] + @array.send(@method, (-2..1).step(-4)).should == [4] + @array.send(@method, (-2..1).step(-5)).should == [4] + + @array.send(@method, (-2...1).step(-1)).should == [4, 3, 2] + @array.send(@method, (-2...1).step(-2)).should == [4, 2] + @array.send(@method, (-2...1).step(-3)).should == [4] + @array.send(@method, (-2...1).step(-4)).should == [4] + + @array.send(@method, (4..-5).step(-1)).should == [4, 3, 2, 1] + @array.send(@method, (4..-5).step(-2)).should == [4, 2] + @array.send(@method, (4..-5).step(-3)).should == [4, 1] + @array.send(@method, (4..-5).step(-4)).should == [4] + @array.send(@method, (4..-5).step(-5)).should == [4] + + @array.send(@method, (4...-5).step(-1)).should == [4, 3, 2] + @array.send(@method, (4...-5).step(-2)).should == [4, 2] + @array.send(@method, (4...-5).step(-3)).should == [4] + @array.send(@method, (4...-5).step(-4)).should == [4] + + @array.send(@method, (-2..-5).step(-1)).should == [4, 3, 2, 1] + @array.send(@method, (-2..-5).step(-2)).should == [4, 2] + @array.send(@method, (-2..-5).step(-3)).should == [4, 1] + @array.send(@method, (-2..-5).step(-4)).should == [4] + @array.send(@method, (-2..-5).step(-5)).should == [4] + + @array.send(@method, (-2...-5).step(-1)).should == [4, 3, 2] + @array.send(@method, (-2...-5).step(-2)).should == [4, 2] + @array.send(@method, (-2...-5).step(-3)).should == [4] + @array.send(@method, (-2...-5).step(-4)).should == [4] + end + end + end + it "can accept nil...nil ranges" do a = [0, 1, 2, 3, 4, 5] a.send(@method, eval("(nil...nil)")).should == a diff --git a/test/-ext-/arith_seq/test_arith_seq_beg_len_step.rb b/test/-ext-/arith_seq/test_arith_seq_beg_len_step.rb new file mode 100644 index 00000000000000..4320c1f20dc233 --- /dev/null +++ b/test/-ext-/arith_seq/test_arith_seq_beg_len_step.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: false +require 'test/unit' + +class Test_ArithSeq < Test::Unit::TestCase + def test_beg_len_step + assert_separately([], <<-"end;") #do + require '-test-/arith_seq/beg_len_step' + + r, = Enumerator::ArithmeticSequence.__beg_len_step__([1, 2, 3], 0, 0) + assert_equal(false, r) + + r, = Enumerator::ArithmeticSequence.__beg_len_step__([1, 2, 3], 1, 0) + assert_equal(false, r) + + r, = Enumerator::ArithmeticSequence.__beg_len_step__([1, 2, 3], 3, 0) + assert_equal(false, r) + + r, = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 0, 0) + assert_equal(nil, r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 1, 0) + assert_equal([true, 1, 0, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 2, 0) + assert_equal([true, 1, 1, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 3, 0) + assert_equal([true, 1, 2, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 4, 0) + assert_equal([true, 1, 3, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 5, 0) + assert_equal([true, 1, 3, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__((-10..10).step(2), 24, 0) + assert_equal([true, 14, 0, 2], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__((-10..10).step(3), 24, 0) + assert_equal([true, 14, 0, 3], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__((-10..10).step(3), 22, 0) + assert_equal([true, 12, 0, 3], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__((-10..10).step(-3), 22, 0) + assert_equal([true, 10, 3, -3], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 0, 1) + assert_equal([true, 1, 3, 1], r) + end; + end +end diff --git a/test/ruby/test_array.rb b/test/ruby/test_array.rb index e376d76a169d86..6ee468eaef29c7 100644 --- a/test/ruby/test_array.rb +++ b/test/ruby/test_array.rb @@ -1580,6 +1580,96 @@ def test_slice assert_equal_instance(a.values_at(*idx), a.slice((3..90)%2)) idx = 90.step(3, -2).to_a assert_equal_instance(a.values_at(*idx), a.slice((90 .. 3)% -2)) + + a = [0, 1, 2, 3, 4, 5] + assert_equal([2, 1, 0], a.slice((2..).step(-1))) + assert_equal([2, 0], a.slice((2..).step(-2))) + assert_equal([2], a.slice((2..).step(-3))) + assert_equal([2], a.slice((2..).step(-4))) + + assert_equal([3, 2, 1, 0], a.slice((-3..).step(-1))) + assert_equal([3, 1], a.slice((-3..).step(-2))) + assert_equal([3, 0], a.slice((-3..).step(-3))) + assert_equal([3], a.slice((-3..).step(-4))) + assert_equal([3], a.slice((-3..).step(-5))) + + assert_equal([5, 4, 3, 2, 1, 0], a.slice((..0).step(-1))) + assert_equal([5, 3, 1], a.slice((..0).step(-2))) + assert_equal([5, 2], a.slice((..0).step(-3))) + assert_equal([5, 1], a.slice((..0).step(-4))) + assert_equal([5, 0], a.slice((..0).step(-5))) + assert_equal([5], a.slice((..0).step(-6))) + assert_equal([5], a.slice((..0).step(-7))) + + assert_equal([5, 4, 3, 2, 1], a.slice((...0).step(-1))) + assert_equal([5, 3, 1], a.slice((...0).step(-2))) + assert_equal([5, 2], a.slice((...0).step(-3))) + assert_equal([5, 1], a.slice((...0).step(-4))) + assert_equal([5], a.slice((...0).step(-5))) + assert_equal([5], a.slice((...0).step(-6))) + + assert_equal([5, 4, 3, 2], a.slice((...1).step(-1))) + assert_equal([5, 3], a.slice((...1).step(-2))) + assert_equal([5, 2], a.slice((...1).step(-3))) + assert_equal([5], a.slice((...1).step(-4))) + assert_equal([5], a.slice((...1).step(-5))) + + assert_equal([5, 4, 3, 2, 1], a.slice((..-5).step(-1))) + assert_equal([5, 3, 1], a.slice((..-5).step(-2))) + assert_equal([5, 2], a.slice((..-5).step(-3))) + assert_equal([5, 1], a.slice((..-5).step(-4))) + assert_equal([5], a.slice((..-5).step(-5))) + assert_equal([5], a.slice((..-5).step(-6))) + + assert_equal([5, 4, 3, 2], a.slice((...-5).step(-1))) + assert_equal([5, 3], a.slice((...-5).step(-2))) + assert_equal([5, 2], a.slice((...-5).step(-3))) + assert_equal([5], a.slice((...-5).step(-4))) + assert_equal([5], a.slice((...-5).step(-5))) + + assert_equal([4, 3, 2, 1], a.slice((4..1).step(-1))) + assert_equal([4, 2], a.slice((4..1).step(-2))) + assert_equal([4, 1], a.slice((4..1).step(-3))) + assert_equal([4], a.slice((4..1).step(-4))) + assert_equal([4], a.slice((4..1).step(-5))) + + assert_equal([4, 3, 2], a.slice((4...1).step(-1))) + assert_equal([4, 2], a.slice((4...1).step(-2))) + assert_equal([4], a.slice((4...1).step(-3))) + assert_equal([4], a.slice((4...1).step(-4))) + + assert_equal([4, 3, 2, 1], a.slice((-2..1).step(-1))) + assert_equal([4, 2], a.slice((-2..1).step(-2))) + assert_equal([4, 1], a.slice((-2..1).step(-3))) + assert_equal([4], a.slice((-2..1).step(-4))) + assert_equal([4], a.slice((-2..1).step(-5))) + + assert_equal([4, 3, 2], a.slice((-2...1).step(-1))) + assert_equal([4, 2], a.slice((-2...1).step(-2))) + assert_equal([4], a.slice((-2...1).step(-3))) + assert_equal([4], a.slice((-2...1).step(-4))) + + assert_equal([4, 3, 2, 1], a.slice((4..-5).step(-1))) + assert_equal([4, 2], a.slice((4..-5).step(-2))) + assert_equal([4, 1], a.slice((4..-5).step(-3))) + assert_equal([4], a.slice((4..-5).step(-4))) + assert_equal([4], a.slice((4..-5).step(-5))) + + assert_equal([4, 3, 2], a.slice((4...-5).step(-1))) + assert_equal([4, 2], a.slice((4...-5).step(-2))) + assert_equal([4], a.slice((4...-5).step(-3))) + assert_equal([4], a.slice((4...-5).step(-4))) + + assert_equal([4, 3, 2, 1], a.slice((-2..-5).step(-1))) + assert_equal([4, 2], a.slice((-2..-5).step(-2))) + assert_equal([4, 1], a.slice((-2..-5).step(-3))) + assert_equal([4], a.slice((-2..-5).step(-4))) + assert_equal([4], a.slice((-2..-5).step(-5))) + + assert_equal([4, 3, 2], a.slice((-2...-5).step(-1))) + assert_equal([4, 2], a.slice((-2...-5).step(-2))) + assert_equal([4], a.slice((-2...-5).step(-3))) + assert_equal([4], a.slice((-2...-5).step(-4))) end def test_slice_out_of_range From c361cf44c03275405989022054d7c20efcc2a2ce Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Thu, 11 Aug 2022 08:51:04 -0500 Subject: [PATCH 089/546] [ruby/rdoc] [DOC] Make example formats explicit and consistent (https://github.com/ruby/rdoc/pull/913) https://github.com/ruby/rdoc/commit/7e6ef6c855 --- doc/rdoc/markup_reference.rb | 153 ++++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 66 deletions(-) diff --git a/doc/rdoc/markup_reference.rb b/doc/rdoc/markup_reference.rb index b3d1d97f4f61be..059511f478e009 100644 --- a/doc/rdoc/markup_reference.rb +++ b/doc/rdoc/markup_reference.rb @@ -29,16 +29,37 @@ # see :nodoc:, :doc:, and :notnew. # - \RDoc directives in single-line comments; # see other {Directives}[rdoc-ref:RDoc::MarkupReference@Directives]. -# - The Ruby code itself; -# see {Documentation Derived from Ruby Code}[rdoc-ref:RDoc::MarkupReference@Documentation+Derived+from+Ruby+Code] +# - The Ruby code itself (but not from C code); +# see {Documentation Derived from Ruby Code}[rdoc-ref:RDoc::MarkupReference@Documentation+Derived+from+Ruby+Code]. # # == Markup in Comments # -# A single-line or multi-line comment that immediately precedes -# the definition of a class, module, method, alias, constant, or attribute -# becomes the documentation for that defined object. +# The treatment of markup in comments varies according to the type of file: # -# (\RDoc ignores other such comments that do not precede definitions.) +# - .rb (Ruby code file): markup is parsed from Ruby comments. +# - .c (C code file): markup is parsed from C comments. +# - .rdoc (RDoc text file): markup is parsed from the entire file. +# +# The comment associated with +# a Ruby class, module, method, alias, constant, or attribute +# becomes the documentation for that defined object: +# +# - In a Ruby file, that comment immediately precedes +# the definition of the object. +# - In a C file, that comment immediately precedes +# the function that implements a method, +# or otherwise immediately precedes the definition of the object. +# +# In either a Ruby or a C file, +# \RDoc ignores comments that do not precede object definitions. +# +# In an \RDoc file, the text is not associated with any code object, +# but may (depending on how the documentation is built), +# become a separate page. +# +# Almost all examples on this page are all RDoc-like; +# that is, they have no comment markers like Ruby # +# or C /* ... */. # # === Margins # @@ -96,11 +117,11 @@ # # Example input: # -# # \RDoc produces HTML and command-line documentation for Ruby projects. -# # \RDoc includes the rdoc and ri tools for generating and displaying -# # documentation from the command-line. -# # -# # You'll love it. +# \RDoc produces HTML and command-line documentation for Ruby projects. +# \RDoc includes the rdoc and ri tools for generating and displaying +# documentation from the command-line. +# +# You'll love it. # # Rendered HTML: # >>> @@ -133,15 +154,15 @@ # # Example input: # -# # This is not verbatim text. -# # -# # This is verbatim text. -# # Whitespace is honored. # See? -# # Whitespace is honored. # See? -# # -# # This is still the same verbatim text block. -# # -# # This is not verbatim text. +# This is not verbatim text. +# +# This is verbatim text. +# Whitespace is honored. # See? +# Whitespace is honored. # See? +# +# This is still the same verbatim text block. +# +# This is not verbatim text. # # Rendered HTML: # >>> @@ -279,13 +300,13 @@ # # Example input: # -# # - An item. -# # - Another. -# # - An item spanning -# # multiple lines. -# # -# # * Yet another. -# # - Last one. +# - An item. +# - Another. +# - An item spanning +# multiple lines. +# +# * Yet another. +# - Last one. # # Rendered HTML: # >>> @@ -305,13 +326,13 @@ # # Example input: # -# # 100. An item. -# # 10. Another. -# # 1. An item spanning -# # multiple lines. -# # -# # 1. Yet another. -# # 1000. Last one. +# 100. An item. +# 10. Another. +# 1. An item spanning +# multiple lines. +# +# 1. Yet another. +# 1000. Last one. # # Rendered HTML: # >>> @@ -331,13 +352,13 @@ # # Example input: # -# # z. An item. -# # y. Another. -# # x. An item spanning -# # multiple lines. -# # -# # x. Yet another. -# # a. Last one. +# z. An item. +# y. Another. +# x. An item spanning +# multiple lines. +# +# x. Yet another. +# a. Last one. # # Rendered HTML: # >>> @@ -356,13 +377,13 @@ # # Example input: # -# # [foo] An item. -# # bat:: Another. -# # [bag] An item spanning -# # multiple lines. -# # -# # [bar baz] Yet another. -# # bam:: Last one. +# [foo] An item. +# bat:: Another. +# [bag] An item spanning +# multiple lines. +# +# [bar baz] Yet another. +# bam:: Last one. # # Rendered HTML: # >>> @@ -381,20 +402,20 @@ # # Examples: # -# # = Section 1 -# # == Section 1.1 -# # === Section 1.1.1 -# # === Section 1.1.2 -# # == Section 1.2 -# # = Section 2 -# # = Foo -# # == Bar -# # === Baz -# # ==== Bam -# # ===== Bat -# # ====== Bad -# # ============Still a Heading (Level 6) -# # \== Not a Heading +# = Section 1 +# == Section 1.1 +# === Section 1.1.1 +# === Section 1.1.2 +# == Section 1.2 +# = Section 2 +# = Foo +# == Bar +# === Baz +# ==== Bam +# ===== Bat +# ====== Bad +# ============Still a Heading (Level 6) +# \== Not a Heading # # A heading may contain only one type of nested block: # @@ -1147,10 +1168,10 @@ def dummy_instance_method(foo, bar); end; # # Here is the :call-seq: directive given for the method: # - # # :call-seq: - # # call_seq_directive(foo, bar) - # # Can be anything -> bar - # # Also anything more -> baz or bat + # :call-seq: + # call_seq_directive(foo, bar) + # Can be anything -> bar + # Also anything more -> baz or bat # def call_seq_directive nil From 49517b3bb436456407e0ee099c7442f3ab5ac53d Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Fri, 26 Feb 2021 12:14:48 -0800 Subject: [PATCH 090/546] Fix inspect for unicode codepoint 0x85 This is an inelegant hack, by manually checking for this specific code point in rb_str_inspect. Some testing indicates that this is the only code point affected. It's possible a better fix would be inside of lower-level encoding code, such that rb_enc_isprint would return false and not true for codepoint 0x85. Fixes [Bug #16842] --- string.c | 10 +++++++++- test/ruby/test_string.rb | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/string.c b/string.c index 85819e26a393a3..e74783cf923652 100644 --- a/string.c +++ b/string.c @@ -6777,7 +6777,15 @@ rb_str_inspect(VALUE str) prev = p; continue; } - if ((enc == resenc && rb_enc_isprint(c, enc)) || + /* The special casing of 0x85 (NEXT_LINE) here is because + * Oniguruma historically treats it as printable, but it + * doesn't match the print POSIX bracket class or character + * property in regexps. + * + * See Ruby Bug #16842 for details: + * https://bugs.ruby-lang.org/issues/16842 + */ + if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) || (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) { continue; } diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index d37924dec1904e..ab14a3c17bb7f2 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -2614,6 +2614,11 @@ def test_inspect_nul assert_equal '"\x0012"', s.inspect, bug8290 end + def test_inspect_next_line + bug16842 = '[ruby-core:98231]' + assert_equal '"\\u0085"', 0x85.chr(Encoding::UTF_8).inspect, bug16842 + end + def test_partition assert_equal(%w(he l lo), S("hello").partition(/l/)) assert_equal(%w(he l lo), S("hello").partition("l")) From a677aa0fcf6b43668b1e6ceac67b4a4121afd818 Mon Sep 17 00:00:00 2001 From: git Date: Fri, 12 Aug 2022 00:47:48 +0900 Subject: [PATCH 091/546] * 2022-08-12 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 780a93e8dcc0de..1b561623e94b58 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 11 +#define RUBY_RELEASE_DAY 12 #include "ruby/version.h" #include "ruby/internal/abi.h" From 8a3f401b24e1b5f3e3a0f44c568a3e66ed595d42 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Thu, 11 Aug 2022 11:43:39 -0500 Subject: [PATCH 092/546] [ruby/rdoc] Improvements to Text Markup examples (https://github.com/ruby/rdoc/pull/915) https://github.com/ruby/rdoc/commit/d00ddfe57c --- doc/rdoc/markup_reference.rb | 104 ++++++++++++++++------------------- 1 file changed, 48 insertions(+), 56 deletions(-) diff --git a/doc/rdoc/markup_reference.rb b/doc/rdoc/markup_reference.rb index 059511f478e009..c7578f3581107f 100644 --- a/doc/rdoc/markup_reference.rb +++ b/doc/rdoc/markup_reference.rb @@ -649,27 +649,27 @@ # # Example input: # -# Two words in a paragraph. +# Italicized words in a paragraph. # # >>> -# Two words in a block quote. +# Italicized words in a block quote. # -# - Two words in a bullet list item. +# - Italicized words in a list item. # -# ====== Two words in a Heading +# ====== Italicized words in a Heading # # Italicized passage containing *bold* and +monofont+. # # Rendered HTML: # >>> -# Two words in a paragraph. +# Italicized words in a paragraph. # # >>> -# Two words in a block quote. +# Italicized words in a block quote. # -# - Two words in a bullet list item. +# - Italicized words in a list item. # -# ====== Two words in a Heading +# ====== Italicized words in a Heading # # Italicized passage containing *bold* and +monofont+. # @@ -678,29 +678,25 @@ # # Example input: # -# _Word_ in a paragraph. +# _Italic_ in a paragraph. # # >>> -# _Word_ in a block quote. +# _Italic_ in a block quote. # -# - _Word_ in a bullet list item. +# - _Italic_ in a list item. # -# [_word_] _Word_ in a labeled list item. -# -# ====== _Word_ in a Heading +# ====== _Italic_ in a Heading # # Rendered HTML: # >>> -# _Word_ in a paragraph. +# _Italic_ in a paragraph. # # >>> -# _Word_ in a block quote. -# -# - _Word_ in a bullet list item. +# _Italic_ in a block quote. # -# [_word_] _Word_ in a labeled list item. +# - _Italic_ in a list item. # -# ====== _Word_ in a Heading +# ====== _Italic_ in a Heading # # ===== Bold # @@ -708,28 +704,28 @@ # # Example input: # -# Two words in a paragraph. +# Bold words in a paragraph. # # >>> -# Two words in a block quote. +# Bold words in a block quote. # -# - Two words in a bullet list item. +# - Bold words in a list item. # -# ====== Two words in a Heading +# ====== Bold words in a Heading # # Bold passage containing _italics_ and +monofont+. # # Rendered HTML: # # >>> -# Two words in a paragraph. +# Bold words in a paragraph. # # >>> -# Two words in a block quote. +# Bold words in a block quote. # -# - Two words in a bullet list item. +# - Bold words in a list item. # -# ====== Two words in a Heading +# ====== Bold words in a Heading # # Bold passage containing _italics_ and +monofont+. # @@ -738,30 +734,26 @@ # # Example input: # -# *Word* in a paragraph. +# *Bold* in a paragraph. # # >>> -# *Word* in a block quote. +# *Bold* in a block quote. # -# - *Word* in a bullet list item. +# - *Bold* in a list item. # -# [*word*] *Word* in a labeled list item. -# -# ===== *Word* in a Heading +# ===== *Bold* in a Heading # # Rendered HTML: # # >>> -# *Word* in a paragraph. +# *Bold* in a paragraph. # # >>> -# *Word* in a block quote. -# -# - *Word* in a bullet list item. +# *Bold* in a block quote. # -# [*word*] *Word* in a labeled list item. +# - *Bold* in a list item. # -# ===== *Word* in a Heading +# ===== *Bold* in a Heading # # ===== Monofont # @@ -771,28 +763,28 @@ # # Example input: # -# Two words in a paragraph. +# Monofont words in a paragraph. # # >>> -# Two words in a block quote. +# Monofont words in a block quote. # -# - Two words in a bullet list item. +# - Monofont words in a list item. # -# ====== Two words in heading +# ====== Monofont words in heading # # Monofont passage containing _italics_ and *bold*. # # Rendered HTML: # # >>> -# Two words in a paragraph. +# Monofont words in a paragraph. # # >>> -# Two words in a block quote. +# Monofont words in a block quote. # -# - Two words in a bullet list item. +# - Monofont words in a list item. # -# ====== Two words in heading +# ====== Monofont words in heading # # Monofont passage containing _italics_ and *bold*. # @@ -801,26 +793,26 @@ # # Example input: # -# +Word+ in a paragraph. +# +Monofont+ in a paragraph. # # >>> -# +Word+ in a block quote. +# +Monofont+ in a block quote. # -# - +Word+ in a bullet list item. +# - +Monofont+ in a list item. # -# ====== +Word+ in a Heading +# ====== +Monofont+ in a Heading # # Rendered HTML: # # >>> -# +Word+ in a paragraph. +# +Monofont+ in a paragraph. # # >>> -# +Word+ in a block quote. +# +Monofont+ in a block quote. # -# - +Word+ in a bullet list item. +# - +Monofont+ in a list item. # -# ====== +Word+ in a Heading +# ====== +Monofont+ in a Heading # # ==== Escaping Text Markup # From 5e3e4bc9840160f2a6f85cba2e05279d7668b014 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Thu, 11 Aug 2022 15:08:57 -0500 Subject: [PATCH 093/546] [ruby/rdoc] Mods to section Text Markup (https://github.com/ruby/rdoc/pull/916) https://github.com/ruby/rdoc/commit/5506d4d67e --- doc/rdoc/markup_reference.rb | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/doc/rdoc/markup_reference.rb b/doc/rdoc/markup_reference.rb index c7578f3581107f..04e90aad23c420 100644 --- a/doc/rdoc/markup_reference.rb +++ b/doc/rdoc/markup_reference.rb @@ -94,7 +94,7 @@ # - {Directive}[rdoc-ref:RDoc::MarkupReference@Directives]: # various special directions for the rendering. # - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]: -# text to be rendered in italic, bold, or monofont. +# text to be rendered in a special way. # # About the blocks: # @@ -632,18 +632,24 @@ # # ==== Text Markup # -# Text markup is metatext that marks text as: +# Text markup is metatext that affects HTML rendering: # -# - Italic. -# - Bold. -# - Monofont. +# - Typeface: italic, bold, monofont. +# - Character conversions: copyright, trademark, certain punctuation. +# - Links. +# - Escapes: marking text as "not markup." # -# Text markup may contain only one type of nested block: +# ===== Typeface Markup # -# - More text markup: +# Typeface markup can specify that text is to be rendered +# as italic, bold, or monofont. +# +# Typeface markup may contain only one type of nested block: +# +# - More typeface markup: # italic, bold, monofont. # -# ===== Italic +# ====== Italic # # Text may be marked as italic via HTML tag or . # @@ -698,7 +704,7 @@ # # ====== _Italic_ in a Heading # -# ===== Bold +# ====== Bold # # Text may be marked as bold via HTML tag . # @@ -755,7 +761,7 @@ # # ===== *Bold* in a Heading # -# ===== Monofont +# ====== Monofont # # Text may be marked as monofont # -- sometimes called 'typewriter font' -- From d9f1b8baa3a21b2a65af7fcab6a45c30b3f9efee Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Fri, 12 Aug 2022 14:04:04 +0900 Subject: [PATCH 094/546] [ruby/error_highlight] Add a note about the current limitation of ErrorHighlight.spot https://github.com/ruby/error_highlight/commit/489ce80a62 --- lib/error_highlight/base.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/error_highlight/base.rb b/lib/error_highlight/base.rb index dbd173a5cd4768..4c115cc8285c6b 100644 --- a/lib/error_highlight/base.rb +++ b/lib/error_highlight/base.rb @@ -22,6 +22,14 @@ module ErrorHighlight # snippet: String, # script_lines: [String], # } | nil + # + # Limitations: + # + # Currently, ErrorHighlight.spot only supports a single-line code fragment. + # Therefore, if the return value is not nil, first_lineno and last_lineno will have + # the same value. If the relevant code fragment spans multiple lines + # (e.g., Array#[] of +ary[(newline)expr(newline)]+), the method will return nil. + # This restriction may be removed in the future. def self.spot(obj, **opts) case obj when Exception From 04815ea968ebef616cfdb316840158f6c87a7277 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 12 Aug 2022 13:38:21 +0900 Subject: [PATCH 095/546] All extensions in bundled gems are built by build-ext now `RbInstall::GemInstaller#build_extensions` has nothing to do. --- tool/rbinstall.rb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tool/rbinstall.rb b/tool/rbinstall.rb index ba0d911fc23130..e6fff940eee4c3 100755 --- a/tool/rbinstall.rb +++ b/tool/rbinstall.rb @@ -904,11 +904,8 @@ def install RbInstall.no_write(options) {super} end - if RbConfig::CONFIG["LIBRUBY_RELATIVE"] == "yes" || RbConfig::CONFIG["CROSS_COMPILING"] == "yes" || ENV["DESTDIR"] - # TODO: always build extensions in bundled gems by build-ext and - # install the built binaries. - def build_extensions - end + # Now build-ext builds all extensions including bundled gems. + def build_extensions end def generate_bin_script(filename, bindir) From a3a6d2d9a390697dfd0ae9f808cb301680a6f24a Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 12 Aug 2022 13:40:36 +0900 Subject: [PATCH 096/546] No bundled gems to be installed from gem now --- tool/rbinstall.rb | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/tool/rbinstall.rb b/tool/rbinstall.rb index e6fff940eee4c3..e6d0f592f5e330 100755 --- a/tool/rbinstall.rb +++ b/tool/rbinstall.rb @@ -1071,28 +1071,8 @@ def install_default_gem(dir, srcdir, bindir) prepare "bundled gem cache", gem_dir+"/cache" install installed_gems, gem_dir+"/cache" end - next if gems.empty? - if defined?(Zlib) - silent = Gem::SilentUI.new - gems.each do |gem| - package = Gem::Package.new(gem) - inst = RbInstall::GemInstaller.new(package, options) - inst.spec.extension_dir = "#{extensions_dir}/#{inst.spec.full_name}" - begin - Gem::DefaultUserInteraction.use_ui(silent) {inst.install} - rescue Gem::InstallError - next - end - gemname = File.basename(gem) - puts "#{INDENT}#{gemname}" - end - # fix directory permissions - # TODO: Gem.install should accept :dir_mode option or something - File.chmod($dir_mode, *Dir.glob(install_dir+"/**/")) - # fix .gemspec permissions - File.chmod($data_mode, *Dir.glob(install_dir+"/specifications/*.gemspec")) - else - puts "skip installing bundled gems because of lacking zlib" + unless gems.empty? + puts "skipped bundled gems: #{gems.join(' ')}" end end From 9a11d50dc7542cda4fc21352807df9f86b0d1093 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 12 Aug 2022 10:18:38 +0900 Subject: [PATCH 097/546] [DOC] Use `true`/`false` for `@retval`s which are `bool` --- include/ruby/internal/encoding/ctype.h | 86 +++++++++++++------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h index 64aaf0a990898a..70b6cc7909e7db 100644 --- a/include/ruby/internal/encoding/ctype.h +++ b/include/ruby/internal/encoding/ctype.h @@ -36,8 +36,8 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() * @param[in] p Pointer to a possibly-middle of a character. * @param[in] end End of the string. * @param[in] enc Encoding. - * @retval 0 It isn't. - * @retval otherwise It is. + * @retval false It isn't. + * @retval true It is. */ static inline bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc) @@ -53,11 +53,11 @@ rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc) * encoding. The "character type" here is a set of macros defined in onigmo.h, * like `ONIGENC_CTYPE_PUNCT`. * - * @param[in] c An `OnigCodePoint` value. - * @param[in] t An `OnigCtype` value. - * @param[in] enc A `rb_encoding*` value. - * @retval 1 `c` is of `t` in `enc`. - * @retval 0 Otherwise. + * @param[in] c An `OnigCodePoint` value. + * @param[in] t An `OnigCtype` value. + * @param[in] enc A `rb_encoding*` value. + * @retval true `c` is of `t` in `enc`. + * @retval false Otherwise. */ static inline bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc) @@ -68,10 +68,10 @@ rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc) /** * Identical to rb_isascii(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 0 `c` is out of range of ASCII character set in `enc`. - * @retval 1 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval false `c` is out of range of ASCII character set in `enc`. + * @retval true Otherwise. * * @internal * @@ -87,10 +87,10 @@ rb_enc_isascii(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isalpha(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "ALPHA". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ALPHA". + * @retval false Otherwise. */ static inline bool rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc) @@ -101,10 +101,10 @@ rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_islower(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "LOWER". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "LOWER". + * @retval false Otherwise. */ static inline bool rb_enc_islower(OnigCodePoint c, rb_encoding *enc) @@ -115,10 +115,10 @@ rb_enc_islower(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isupper(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "UPPER". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "UPPER". + * @retval false Otherwise. */ static inline bool rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) @@ -129,10 +129,10 @@ rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_ispunct(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PUNCT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PUNCT". + * @retval false Otherwise. */ static inline bool rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc) @@ -143,10 +143,10 @@ rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isalnum(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "ANUM". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ANUM". + * @retval false Otherwise. */ static inline bool rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc) @@ -157,10 +157,10 @@ rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isprint(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PRINT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. */ static inline bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc) @@ -171,10 +171,10 @@ rb_enc_isprint(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isspace(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PRINT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. */ static inline bool rb_enc_isspace(OnigCodePoint c, rb_encoding *enc) @@ -185,10 +185,10 @@ rb_enc_isspace(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isdigit(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "DIGIT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "DIGIT". + * @retval false Otherwise. */ static inline bool rb_enc_isdigit(OnigCodePoint c, rb_encoding *enc) From a201cfd0cdb9f613f687dc6fee1908ec54e63c72 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 12 Aug 2022 13:36:11 +0900 Subject: [PATCH 098/546] Add missing `rb_enc_iscntrl` --- include/ruby/internal/encoding/ctype.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h index 70b6cc7909e7db..05c314aeb3d487 100644 --- a/include/ruby/internal/encoding/ctype.h +++ b/include/ruby/internal/encoding/ctype.h @@ -126,6 +126,20 @@ rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) return ONIGENC_IS_CODE_UPPER(enc, c); } +/** + * Identical to rb_iscntrl(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "CNTRL". + * @retval false Otherwise. + */ +static inline bool +rb_enc_iscntrl(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_CNTRL(enc, c); +} + /** * Identical to rb_ispunct(), except it additionally takes an encoding. * @@ -235,6 +249,7 @@ RBIMPL_SYMBOL_EXPORT_END() #define rb_enc_isdigit rb_enc_isdigit #define rb_enc_islower rb_enc_islower #define rb_enc_isprint rb_enc_isprint +#define rb_enc_iscntrl rb_enc_iscntrl #define rb_enc_ispunct rb_enc_ispunct #define rb_enc_isspace rb_enc_isspace #define rb_enc_isupper rb_enc_isupper From cd1a0b3caaa5446e9258c192cf483b6dfe8d7819 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 12 Jul 2022 16:44:57 +0900 Subject: [PATCH 099/546] Stop defining `RUBY_ABI_VERSION` if released versions As commented in include/ruby/internal/abi.h, since teeny versions of Ruby should guarantee ABI compatibility, `RUBY_ABI_VERSION` has no role in released versions of Ruby. --- configure.ac | 6 ++++++ ext/-test-/abi/extconf.rb | 1 + include/ruby/internal/abi.h | 6 +++++- test/-ext-/test_abi.rb | 2 ++ version.h | 11 +++++++++-- win32/Makefile.sub | 8 +++++++- win32/setup.mak | 4 +++- 7 files changed, 33 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index bc21c262922161..0f4a31af7e7dce 100644 --- a/configure.ac +++ b/configure.ac @@ -88,7 +88,9 @@ AC_SUBST(GIT) AC_SUBST(HAVE_GIT) eval `sed -n -e ['s/^@%:@define RUBY_[A-Z_]*VERSION_\([A-Z][A-Z][A-Z_0-9]*\) \([0-9][0-9]*\)$/\1=\2/p'] \ + -e ['s/^@%:@define \(RUBY_ABI_VERSION\) \([0-9][0-9]*\).*/\1=\2/p'] \ -e ['s/^@%:@define \(RUBY_PATCHLEVEL\) \(.*\)/\1=\2/p'] \ + $srcdir/include/ruby/internal/abi.h \ $srcdir/include/ruby/version.h $srcdir/version.h` for v in MAJOR MINOR TEENY; do AS_IF([eval "test \"\$$v\" = ''"], [ @@ -100,6 +102,9 @@ AC_SUBST(MINOR) AC_SUBST(TEENY) AC_SUBST(RUBY_API_VERSION, '$(MAJOR).$(MINOR)') AC_SUBST(RUBY_PROGRAM_VERSION, '$(MAJOR).$(MINOR).$(TEENY)') +AS_CASE([$RUBY_PATCHLEVEL], [-*], [ + AC_DEFINE_UNQUOTED(RUBY_ABI_VERSION, [${RUBY_ABI_VERSION}]) +], [RUBY_ABI_VERSION=]) AS_IF([test "$program_prefix" = NONE], [ program_prefix= @@ -4118,6 +4123,7 @@ AS_CASE(["$ruby_version"], AS_IF([test ${RUBY_LIB_VERSION_STYLE+set}], [ { echo "#define RUBY_LIB_VERSION_STYLE $RUBY_LIB_VERSION_STYLE" + echo '@%:@include "confdefs.h"' echo '#define STRINGIZE(x) x' test -f revision.h -o -f "${srcdir}/revision.h" || echo '#define RUBY_REVISION 0' echo '#include "version.h"' diff --git a/ext/-test-/abi/extconf.rb b/ext/-test-/abi/extconf.rb index d786b15db98c7f..3b090b75531d90 100644 --- a/ext/-test-/abi/extconf.rb +++ b/ext/-test-/abi/extconf.rb @@ -1,3 +1,4 @@ # frozen_string_literal: false +return unless RUBY_PATCHLEVEL < 0 require_relative "../auto_ext.rb" auto_ext(inc: true) diff --git a/include/ruby/internal/abi.h b/include/ruby/internal/abi.h index e42a1777ff5eae..fe1977a9a181a0 100644 --- a/include/ruby/internal/abi.h +++ b/include/ruby/internal/abi.h @@ -1,6 +1,8 @@ #ifndef RUBY_ABI_H #define RUBY_ABI_H +#ifdef RUBY_ABI_VERSION /* should match the definition in config.h */ + /* This number represents Ruby's ABI version. * * In development Ruby, it should be bumped every time an ABI incompatible @@ -19,7 +21,7 @@ * - Backwards compatible refactors. * - Editing comments. * - * In released versions of Ruby, this number should not be changed since teeny + * In released versions of Ruby, this number is not defined since teeny * versions of Ruby should guarantee ABI compatibility. */ #define RUBY_ABI_VERSION 2 @@ -49,3 +51,5 @@ ruby_abi_version(void) #endif #endif + +#endif diff --git a/test/-ext-/test_abi.rb b/test/-ext-/test_abi.rb index 59e70107a5dd57..d3ea6bb9b105fe 100644 --- a/test/-ext-/test_abi.rb +++ b/test/-ext-/test_abi.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +return unless RUBY_PATCHLEVEL < 0 + class TestABI < Test::Unit::TestCase def test_require_lib_with_incorrect_abi_on_dev_ruby omit "ABI is not checked" unless abi_checking_supported? diff --git a/version.h b/version.h index 1b561623e94b58..d79ba396f78b2e 100644 --- a/version.h +++ b/version.h @@ -38,18 +38,25 @@ #define RUBY_RELEASE_DAY_STR STRINGIZE(RUBY_RELEASE_DAY) #endif +#ifdef RUBY_ABI_VERSION +# define RUBY_ABI_VERSION_SUFFIX "+"STRINGIZE(RUBY_ABI_VERSION) +#else +# define RUBY_ABI_VERSION_SUFFIX "" +#endif #if !defined RUBY_LIB_VERSION && defined RUBY_LIB_VERSION_STYLE # if RUBY_LIB_VERSION_STYLE == 3 # define RUBY_LIB_VERSION STRINGIZE(RUBY_API_VERSION_MAJOR)"."STRINGIZE(RUBY_API_VERSION_MINOR) \ - "."STRINGIZE(RUBY_API_VERSION_TEENY)"+"STRINGIZE(RUBY_ABI_VERSION) + "."STRINGIZE(RUBY_API_VERSION_TEENY) RUBY_ABI_VERSION_SUFFIX # elif RUBY_LIB_VERSION_STYLE == 2 # define RUBY_LIB_VERSION STRINGIZE(RUBY_API_VERSION_MAJOR)"."STRINGIZE(RUBY_API_VERSION_MINOR) \ - "+"STRINGIZE(RUBY_ABI_VERSION) + RUBY_ABI_VERSION_SUFFIX # endif #endif #if RUBY_PATCHLEVEL == -1 #define RUBY_PATCHLEVEL_STR "dev" +#elif defined RUBY_ABI_VERSION +#error RUBY_ABI_VERSION is defined in non-development branch #else #define RUBY_PATCHLEVEL_STR "" #endif diff --git a/win32/Makefile.sub b/win32/Makefile.sub index 9dc5ee6f515f2e..bea24450ed0390 100644 --- a/win32/Makefile.sub +++ b/win32/Makefile.sub @@ -148,7 +148,10 @@ PLATFORM_DIR = win32 arch = $(ARCH)-$(PLATFORM) sitearch = $(ARCH)-$(RT) !if !defined(ruby_version) -ruby_version = $(MAJOR).$(MINOR).0+$(ABI_VERSION) +ruby_version = $(MAJOR).$(MINOR).0 +!endif +!if defined(ABI_VERSION) +ruby_version = $(ruby_version)+$(ABI_VERSION) !endif !if !defined(RUBY_VERSION_NAME) RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version) @@ -624,6 +627,9 @@ $(CONFIG_H): $(MKFILES) $(srcdir)/win32/Makefile.sub $(win_srcdir)/Makefile.sub #endif !endif #define RUBY_MSVCRT_VERSION $(RT_VER) +!if defined(ABI_VERSION) +#define RUBY_ABI_VERSION $(ABI_VERSION) +!endif #define STDC_HEADERS 1 #define HAVE_SYS_TYPES_H 1 #define HAVE_SYS_STAT_H 1 diff --git a/win32/setup.mak b/win32/setup.mak index 39323c61c2464f..fd804a84cec6d5 100644 --- a/win32/setup.mak +++ b/win32/setup.mak @@ -178,6 +178,7 @@ main(void) -version-: nul verconf.mk verconf.mk: nul + @findstr /R /C:"^#define RUBY_ABI_VERSION " $(srcdir:/=\)\include\ruby\internal\abi.h > $(@) @$(CPP) -I$(srcdir) -I$(srcdir)/include <<"Creating $(@)" > $(*F).bat && cmd /c $(*F).bat > $(@) @echo off #define RUBY_REVISION 0 @@ -198,8 +199,9 @@ echo RUBY_RELEASE_DAY = %ruby_release_day:~-2% echo MAJOR = RUBY_VERSION_MAJOR echo MINOR = RUBY_VERSION_MINOR echo TEENY = RUBY_VERSION_TEENY -echo ABI_VERSION = RUBY_ABI_VERSION #if defined RUBY_PATCHLEVEL && RUBY_PATCHLEVEL < 0 +#include "$(@F)" +echo ABI_VERSION = RUBY_ABI_VERSION #endif set /a MSC_VER = _MSC_VER #if _MSC_VER >= 1920 From 4165fd0e763439421296fbc95d754ad53e6ae84f Mon Sep 17 00:00:00 2001 From: Kouhei Yanagita Date: Fri, 27 May 2022 15:46:46 +0900 Subject: [PATCH 100/546] Add Numeric#ceildiv and Integer#ceildiv --- complex.c | 1 + numeric.c | 49 +++++++++++++++++++++++++++++++++++++++ test/ruby/test_complex.rb | 1 + test/ruby/test_integer.rb | 17 ++++++++++++++ test/ruby/test_numeric.rb | 14 +++++++++++ 5 files changed, 82 insertions(+) diff --git a/complex.c b/complex.c index d625ced7fa865b..865466c499ba4a 100644 --- a/complex.c +++ b/complex.c @@ -2335,6 +2335,7 @@ Init_Complex(void) rb_undef_method(rb_cComplex, "%"); rb_undef_method(rb_cComplex, "div"); rb_undef_method(rb_cComplex, "divmod"); + rb_undef_method(rb_cComplex, "ceildiv"); rb_undef_method(rb_cComplex, "floor"); rb_undef_method(rb_cComplex, "ceil"); rb_undef_method(rb_cComplex, "modulo"); diff --git a/numeric.c b/numeric.c index 4f927f00fb655b..df0c016b9e5bac 100644 --- a/numeric.c +++ b/numeric.c @@ -656,6 +656,31 @@ num_div(VALUE x, VALUE y) return rb_funcall(num_funcall1(x, '/', y), rb_intern("floor"), 0); } +/* + * call-seq: + * ceildiv(other) -> integer + * + * Returns the quotient self/other as an integer, rounding up to the nearest integer. + * This method uses method +/+ in the derived class of +self+. + * (\Numeric itself does not define method +/+.) + * + * Of the Core and Standard Library classes, + * Float and Rational use this implementation. + * + * 3.0.ceildiv(3.0) # => 1 + * 4.0.ceildiv(3.0) # => 2 + * + * 4.0.ceildiv(-3.0) # => -1 + * -4.0.ceildiv(3.0) # => -1 + * -4.0.ceildiv(-3.0) # => 2 + */ +static VALUE +num_ceildiv(VALUE x, VALUE y) +{ + VALUE tmp = num_div(x, num_uminus(y)); + return num_uminus(tmp); +} + /* * call-seq: * self % other -> real_numeric @@ -4269,6 +4294,28 @@ rb_int_idiv(VALUE x, VALUE y) return num_div(x, y); } +/* + * call-seq: + * ceildiv(other) -> integer + * + * Returns the result of division +self+ by +other+. The result is rounded up to the nearest integer. + * + * 3.ceildiv(3) # => 1 + * 4.ceildiv(3) # => 2 + * + * 4.ceildiv(-3) # => -1 + * -4.ceildiv(3) # => -1 + * -4.ceildiv(-3) # => 2 + * + * 3.ceildiv(1.2) # => 3 + */ +VALUE +rb_int_ceildiv(VALUE x, VALUE y) +{ + VALUE tmp = rb_int_idiv(x, num_uminus(y)); + return num_uminus(tmp); +} + static VALUE fix_mod(VALUE x, VALUE y) { @@ -6200,6 +6247,7 @@ Init_Numeric(void) rb_define_method(rb_cNumeric, "<=>", num_cmp, 1); rb_define_method(rb_cNumeric, "eql?", num_eql, 1); rb_define_method(rb_cNumeric, "fdiv", num_fdiv, 1); + rb_define_method(rb_cNumeric, "ceildiv", num_ceildiv, 1); rb_define_method(rb_cNumeric, "div", num_div, 1); rb_define_method(rb_cNumeric, "divmod", num_divmod, 1); rb_define_method(rb_cNumeric, "%", num_modulo, 1); @@ -6255,6 +6303,7 @@ Init_Numeric(void) rb_define_method(rb_cInteger, "remainder", int_remainder, 1); rb_define_method(rb_cInteger, "divmod", rb_int_divmod, 1); rb_define_method(rb_cInteger, "fdiv", rb_int_fdiv, 1); + rb_define_method(rb_cInteger, "ceildiv", rb_int_ceildiv, 1); rb_define_method(rb_cInteger, "**", rb_int_pow, 1); rb_define_method(rb_cInteger, "pow", rb_int_powm, -1); /* in bignum.c */ diff --git a/test/ruby/test_complex.rb b/test/ruby/test_complex.rb index a3a75465755240..5cf52e812ed626 100644 --- a/test/ruby/test_complex.rb +++ b/test/ruby/test_complex.rb @@ -915,6 +915,7 @@ def test_respond assert_not_respond_to(c, :%) assert_not_respond_to(c, :div) assert_not_respond_to(c, :divmod) + assert_not_respond_to(c, :ceildiv) assert_not_respond_to(c, :floor) assert_not_respond_to(c, :ceil) assert_not_respond_to(c, :modulo) diff --git a/test/ruby/test_integer.rb b/test/ruby/test_integer.rb index a2b181c6422c84..c3e11498bec528 100644 --- a/test/ruby/test_integer.rb +++ b/test/ruby/test_integer.rb @@ -704,4 +704,21 @@ def o.to_int; 1; end def o.to_int; Object.new; end assert_raise_with_message(TypeError, /can't convert Object to Integer/) {Integer.try_convert(o)} end + + def test_ceildiv + assert_equal(0, 0.ceildiv(3)) + assert_equal(1, 1.ceildiv(3)) + assert_equal(1, 3.ceildiv(3)) + assert_equal(2, 4.ceildiv(3)) + + assert_equal(-1, 4.ceildiv(-3)) + assert_equal(-1, -4.ceildiv(3)) + assert_equal(2, -4.ceildiv(-3)) + + assert_equal(3, 3.ceildiv(1.2)) + assert_equal(3, 3.ceildiv(6/5r)) + + assert_equal(10, (10**100-11).ceildiv(10**99-1)) + assert_equal(11, (10**100-9).ceildiv(10**99-1)) + end end diff --git a/test/ruby/test_numeric.rb b/test/ruby/test_numeric.rb index 0593cb535d48b6..068f9a56eb2f43 100644 --- a/test/ruby/test_numeric.rb +++ b/test/ruby/test_numeric.rb @@ -482,4 +482,18 @@ def test_pow assert_equal(0, -2.pow(3, 1)) end + def test_ceildiv + assert_equal(0, 0.0.ceildiv(3.0)) + assert_equal(1, 1.0.ceildiv(3.0)) + assert_equal(1, 3.0.ceildiv(3.0)) + assert_equal(2, 4.0.ceildiv(3.0)) + + assert_equal(-1, 4.0.ceildiv(-3.0)) + assert_equal(-1, -4.0.ceildiv(3.0)) + assert_equal(2, -4.0.ceildiv(-3.0)) + + assert_equal(3, 3.0.ceildiv(1.2)) + assert_equal(3, 3.0.ceildiv(6/5r)) + assert_equal(3, (7r/2).ceildiv(6/5r)) + end end From 24e33b84b5adb29d1d2f541acfba65e225b91b55 Mon Sep 17 00:00:00 2001 From: Kouhei Yanagita Date: Thu, 21 Jul 2022 18:57:13 +0900 Subject: [PATCH 101/546] Remove Numeric#ceildiv --- complex.c | 1 - numeric.c | 26 -------------------------- test/ruby/test_complex.rb | 1 - test/ruby/test_numeric.rb | 14 -------------- 4 files changed, 42 deletions(-) diff --git a/complex.c b/complex.c index 865466c499ba4a..d625ced7fa865b 100644 --- a/complex.c +++ b/complex.c @@ -2335,7 +2335,6 @@ Init_Complex(void) rb_undef_method(rb_cComplex, "%"); rb_undef_method(rb_cComplex, "div"); rb_undef_method(rb_cComplex, "divmod"); - rb_undef_method(rb_cComplex, "ceildiv"); rb_undef_method(rb_cComplex, "floor"); rb_undef_method(rb_cComplex, "ceil"); rb_undef_method(rb_cComplex, "modulo"); diff --git a/numeric.c b/numeric.c index df0c016b9e5bac..9574bfe024bfa0 100644 --- a/numeric.c +++ b/numeric.c @@ -656,31 +656,6 @@ num_div(VALUE x, VALUE y) return rb_funcall(num_funcall1(x, '/', y), rb_intern("floor"), 0); } -/* - * call-seq: - * ceildiv(other) -> integer - * - * Returns the quotient self/other as an integer, rounding up to the nearest integer. - * This method uses method +/+ in the derived class of +self+. - * (\Numeric itself does not define method +/+.) - * - * Of the Core and Standard Library classes, - * Float and Rational use this implementation. - * - * 3.0.ceildiv(3.0) # => 1 - * 4.0.ceildiv(3.0) # => 2 - * - * 4.0.ceildiv(-3.0) # => -1 - * -4.0.ceildiv(3.0) # => -1 - * -4.0.ceildiv(-3.0) # => 2 - */ -static VALUE -num_ceildiv(VALUE x, VALUE y) -{ - VALUE tmp = num_div(x, num_uminus(y)); - return num_uminus(tmp); -} - /* * call-seq: * self % other -> real_numeric @@ -6247,7 +6222,6 @@ Init_Numeric(void) rb_define_method(rb_cNumeric, "<=>", num_cmp, 1); rb_define_method(rb_cNumeric, "eql?", num_eql, 1); rb_define_method(rb_cNumeric, "fdiv", num_fdiv, 1); - rb_define_method(rb_cNumeric, "ceildiv", num_ceildiv, 1); rb_define_method(rb_cNumeric, "div", num_div, 1); rb_define_method(rb_cNumeric, "divmod", num_divmod, 1); rb_define_method(rb_cNumeric, "%", num_modulo, 1); diff --git a/test/ruby/test_complex.rb b/test/ruby/test_complex.rb index 5cf52e812ed626..a3a75465755240 100644 --- a/test/ruby/test_complex.rb +++ b/test/ruby/test_complex.rb @@ -915,7 +915,6 @@ def test_respond assert_not_respond_to(c, :%) assert_not_respond_to(c, :div) assert_not_respond_to(c, :divmod) - assert_not_respond_to(c, :ceildiv) assert_not_respond_to(c, :floor) assert_not_respond_to(c, :ceil) assert_not_respond_to(c, :modulo) diff --git a/test/ruby/test_numeric.rb b/test/ruby/test_numeric.rb index 068f9a56eb2f43..0593cb535d48b6 100644 --- a/test/ruby/test_numeric.rb +++ b/test/ruby/test_numeric.rb @@ -482,18 +482,4 @@ def test_pow assert_equal(0, -2.pow(3, 1)) end - def test_ceildiv - assert_equal(0, 0.0.ceildiv(3.0)) - assert_equal(1, 1.0.ceildiv(3.0)) - assert_equal(1, 3.0.ceildiv(3.0)) - assert_equal(2, 4.0.ceildiv(3.0)) - - assert_equal(-1, 4.0.ceildiv(-3.0)) - assert_equal(-1, -4.0.ceildiv(3.0)) - assert_equal(2, -4.0.ceildiv(-3.0)) - - assert_equal(3, 3.0.ceildiv(1.2)) - assert_equal(3, 3.0.ceildiv(6/5r)) - assert_equal(3, (7r/2).ceildiv(6/5r)) - end end From 803a07263001e5466bba0f53fcc85784f4b5c686 Mon Sep 17 00:00:00 2001 From: Kouhei Yanagita Date: Fri, 22 Jul 2022 12:05:16 +0900 Subject: [PATCH 102/546] Improve performance of Integer#ceildiv This patch is suggested by nobu. Benchmark result: ``` require 'benchmark' n = 10 ** 7 Benchmark.bm do |x| x.report("Fixnum/Fixnum") { a, b = 5, 2; n.times { a.ceildiv(b) } } x.report("Bignum/Bignum") { a, b = 10**100, 10**99 - 1; n.times { a.ceildiv(b) } } x.report("Bignum/Fixnum") { a, b = 10**100, 3; n.times { a.ceildiv(b) } } end ``` Original: ``` user system total real Fixnum/Fixnum 3.340009 0.043029 3.383038 ( 3.384022) Bignum/Bignum 8.229500 0.118543 8.348043 ( 8.349574) Bignum/Fixnum 8.328971 0.097842 8.426813 ( 8.426952) ``` Improved: ``` user system total real Fixnum/Fixnum 0.699140 0.000961 0.700101 ( 0.700199) Bignum/Bignum 5.076165 0.083160 5.159325 ( 5.159360) Bignum/Fixnum 5.548684 0.115372 5.664056 ( 5.666735) ``` --- numeric.c | 23 ----------------------- numeric.rb | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/numeric.c b/numeric.c index 9574bfe024bfa0..4f927f00fb655b 100644 --- a/numeric.c +++ b/numeric.c @@ -4269,28 +4269,6 @@ rb_int_idiv(VALUE x, VALUE y) return num_div(x, y); } -/* - * call-seq: - * ceildiv(other) -> integer - * - * Returns the result of division +self+ by +other+. The result is rounded up to the nearest integer. - * - * 3.ceildiv(3) # => 1 - * 4.ceildiv(3) # => 2 - * - * 4.ceildiv(-3) # => -1 - * -4.ceildiv(3) # => -1 - * -4.ceildiv(-3) # => 2 - * - * 3.ceildiv(1.2) # => 3 - */ -VALUE -rb_int_ceildiv(VALUE x, VALUE y) -{ - VALUE tmp = rb_int_idiv(x, num_uminus(y)); - return num_uminus(tmp); -} - static VALUE fix_mod(VALUE x, VALUE y) { @@ -6277,7 +6255,6 @@ Init_Numeric(void) rb_define_method(rb_cInteger, "remainder", int_remainder, 1); rb_define_method(rb_cInteger, "divmod", rb_int_divmod, 1); rb_define_method(rb_cInteger, "fdiv", rb_int_fdiv, 1); - rb_define_method(rb_cInteger, "ceildiv", rb_int_ceildiv, 1); rb_define_method(rb_cInteger, "**", rb_int_pow, 1); rb_define_method(rb_cInteger, "pow", rb_int_powm, -1); /* in bignum.c */ diff --git a/numeric.rb b/numeric.rb index 9f2200d2a87642..c2091465f88fd5 100644 --- a/numeric.rb +++ b/numeric.rb @@ -227,6 +227,23 @@ def zero? Primitive.attr! 'inline' Primitive.cexpr! 'rb_int_zero_p(self)' end + + # call-seq: + # ceildiv(other) -> integer + # + # Returns the result of division +self+ by +other+. The result is rounded up to the nearest integer. + # + # 3.ceildiv(3) # => 1 + # 4.ceildiv(3) # => 2 + # + # 4.ceildiv(-3) # => -1 + # -4.ceildiv(3) # => -1 + # -4.ceildiv(-3) # => 2 + # + # 3.ceildiv(1.2) # => 3 + def ceildiv(other) + -div(-other) + end end # call-seq: From 844a0edbae6e74293e3d0cb1ceeeb66a4371d06d Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 11 Aug 2022 22:24:47 +0900 Subject: [PATCH 103/546] [Bug #18962] Do not read again once reached EOF `Ripper::Lexer#parse` re-parses the source code with syntax errors when `raise_errors: false`. Co-Authored-By: tompng --- parse.y | 1 + test/ripper/test_lexer.rb | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/parse.y b/parse.y index b4c3106b8cf141..d0f37a18ccb1ac 100644 --- a/parse.y +++ b/parse.y @@ -9299,6 +9299,7 @@ parser_yylex(struct parser_params *p) case '\004': /* ^D */ case '\032': /* ^Z */ case -1: /* end of script. */ + p->eofp = 1; return 0; /* white spaces */ diff --git a/test/ripper/test_lexer.rb b/test/ripper/test_lexer.rb index 83130668be79b0..4f3f4657efe4df 100644 --- a/test/ripper/test_lexer.rb +++ b/test/ripper/test_lexer.rb @@ -100,6 +100,20 @@ def test_stack_at_on_heredoc_beg assert_equal expect, Ripper.lex(src).map {|e| e[1]} end + def test_end_of_script_char + all_assertions do |all| + ["a", %w"[a ]", %w"{, }", "if"].each do |src, append| + expected = Ripper.lex(src).map {|e| e[1]} + ["\0b", "\4b", "\32b"].each do |eof| + c = "#{src}#{eof}#{append}" + all.for(c) do + assert_equal expected, Ripper.lex(c).map {|e| e[1]} + end + end + end + end + end + def test_slice assert_equal "string\#{nil}\n", Ripper.slice(%(< Date: Thu, 14 Jul 2022 17:39:03 +0900 Subject: [PATCH 104/546] Short-circuit `Process._fork` --- process.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/process.c b/process.c index 0e4dbc40f4d26c..57df2dc06fbf89 100644 --- a/process.c +++ b/process.c @@ -4328,12 +4328,30 @@ rb_fork_ruby(int *status) return pid; } +static rb_pid_t +proc_fork_pid(void) +{ + rb_pid_t pid = rb_fork_ruby(NULL); + + if (pid == -1) { + rb_sys_fail("fork(2)"); + } + + return pid; +} + rb_pid_t rb_call_proc__fork(void) { - VALUE pid = rb_funcall(rb_mProcess, rb_intern("_fork"), 0); - - return NUM2PIDT(pid); + ID id__fork; + CONST_ID(id__fork, "_fork"); + if (rb_method_basic_definition_p(CLASS_OF(rb_mProcess), id__fork)) { + return proc_fork_pid(); + } + else { + VALUE pid = rb_funcall(rb_mProcess, id__fork, 0); + return NUM2PIDT(pid); + } } #endif @@ -4360,12 +4378,7 @@ rb_call_proc__fork(void) VALUE rb_proc__fork(VALUE _obj) { - rb_pid_t pid = rb_fork_ruby(NULL); - - if (pid == -1) { - rb_sys_fail("fork(2)"); - } - + rb_pid_t pid = proc_fork_pid(); return PIDT2NUM(pid); } From 77fdb3a47d420eb66761d50db2244a10b1d7272a Mon Sep 17 00:00:00 2001 From: S-H-GAMELINKS Date: Tue, 9 Aug 2022 13:22:21 +0900 Subject: [PATCH 105/546] Introduce with_warn_vsprintf macro --- error.c | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/error.c b/error.c index 21a532a8faedc4..07fa04627e8d5d 100644 --- a/error.c +++ b/error.c @@ -357,47 +357,42 @@ warn_vsprintf(rb_encoding *enc, const char *file, int line, const char *fmt, va_ return rb_str_cat2(str, "\n"); } +#define with_warn_vsprintf(file, line, fmt) \ + VALUE str; \ + va_list args; \ + va_start(args, fmt); \ + str = warn_vsprintf(NULL, file, line, fmt, args); \ + va_end(args); + void rb_compile_warn(const char *file, int line, const char *fmt, ...) { - VALUE str; - va_list args; - - if (NIL_P(ruby_verbose)) return; - - va_start(args, fmt); - str = warn_vsprintf(NULL, file, line, fmt, args); - va_end(args); - rb_write_warning_str(str); + if (!NIL_P(ruby_verbose)) { + with_warn_vsprintf(file, line, fmt) { + rb_write_warning_str(str); + } + } } /* rb_compile_warning() reports only in verbose mode */ void rb_compile_warning(const char *file, int line, const char *fmt, ...) { - VALUE str; - va_list args; - - if (!RTEST(ruby_verbose)) return; - - va_start(args, fmt); - str = warn_vsprintf(NULL, file, line, fmt, args); - va_end(args); - rb_write_warning_str(str); + if (RTEST(ruby_verbose)) { + with_warn_vsprintf(file, line, fmt) { + rb_write_warning_str(str); + } + } } void rb_category_compile_warn(rb_warning_category_t category, const char *file, int line, const char *fmt, ...) { - VALUE str; - va_list args; - - if (NIL_P(ruby_verbose)) return; - - va_start(args, fmt); - str = warn_vsprintf(NULL, file, line, fmt, args); - va_end(args); - rb_warn_category(str, rb_warning_category_to_name(category)); + if (!NIL_P(ruby_verbose)) { + with_warn_vsprintf(file, line, fmt) { + rb_warn_category(str, rb_warning_category_to_name(category)); + } + } } RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0) From b1799267bf39bbca58a160c00c1581a694ca6ad9 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 12 Aug 2022 22:07:32 +0900 Subject: [PATCH 106/546] Bundle unreleased debug --- gems/bundled_gems | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gems/bundled_gems b/gems/bundled_gems index a053812f0a8146..8b71308f627ae6 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -13,4 +13,4 @@ matrix 0.4.2 https://github.com/ruby/matrix prime 0.1.2 https://github.com/ruby/prime rbs 2.6.0 https://github.com/ruby/rbs 5202d4eeed3257448f19004b4baac4bcf4127717 typeprof 0.21.3 https://github.com/ruby/typeprof -debug 1.6.2 https://github.com/ruby/debug +debug 1.6.2 https://github.com/ruby/debug e7c37486ff9579251e5d25645b8d38ec96708f12 From e44445596fb8ba70a2a9fc602458bfe10d493146 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 12 Aug 2022 23:51:55 +0900 Subject: [PATCH 107/546] Preserve each column positions in gems/bundled_gems --- tool/update-bundled_gems.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tool/update-bundled_gems.rb b/tool/update-bundled_gems.rb index 5b9c6b6974edaa..bed1cfc52bc3f1 100755 --- a/tool/update-bundled_gems.rb +++ b/tool/update-bundled_gems.rb @@ -16,5 +16,7 @@ $F[3..-1] = [] end end - $_ = [gem.name, gem.version, uri, *$F[3..-1]].join(" ") + f = [gem.name, gem.version.to_s, uri, *$F[3..-1]] + $_.gsub!(/\S+\s*/) {|s| f.shift.ljust(s.size)} + $_ = [$_, *f].join(" ") unless f.empty? end From 3a6405db1b987f658291e0047ae05f7f93d21b87 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 12 Aug 2022 23:52:48 +0900 Subject: [PATCH 108/546] Adjust columns in gems/bundled_gems [ci skip] --- gems/bundled_gems | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/gems/bundled_gems b/gems/bundled_gems index 8b71308f627ae6..5faa75c5e228ca 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -1,16 +1,16 @@ # gem-name version-to-bundle repository-url [optional-commit-hash-to-test-or-defaults-to-v-version] -minitest 5.16.2 https://github.com/seattlerb/minitest -power_assert 2.0.1 https://github.com/ruby/power_assert -rake 13.0.6 https://github.com/ruby/rake -test-unit 3.5.3 https://github.com/test-unit/test-unit -rexml 3.2.5 https://github.com/ruby/rexml -rss 0.2.9 https://github.com/ruby/rss -net-ftp 0.1.3 https://github.com/ruby/net-ftp -net-imap 0.2.3 https://github.com/ruby/net-imap -net-pop 0.1.1 https://github.com/ruby/net-pop -net-smtp 0.3.1 https://github.com/ruby/net-smtp -matrix 0.4.2 https://github.com/ruby/matrix -prime 0.1.2 https://github.com/ruby/prime -rbs 2.6.0 https://github.com/ruby/rbs 5202d4eeed3257448f19004b4baac4bcf4127717 -typeprof 0.21.3 https://github.com/ruby/typeprof -debug 1.6.2 https://github.com/ruby/debug e7c37486ff9579251e5d25645b8d38ec96708f12 +minitest 5.16.2 https://github.com/seattlerb/minitest +power_assert 2.0.1 https://github.com/ruby/power_assert +rake 13.0.6 https://github.com/ruby/rake +test-unit 3.5.3 https://github.com/test-unit/test-unit +rexml 3.2.5 https://github.com/ruby/rexml +rss 0.2.9 https://github.com/ruby/rss +net-ftp 0.1.3 https://github.com/ruby/net-ftp +net-imap 0.2.3 https://github.com/ruby/net-imap +net-pop 0.1.1 https://github.com/ruby/net-pop +net-smtp 0.3.1 https://github.com/ruby/net-smtp +matrix 0.4.2 https://github.com/ruby/matrix +prime 0.1.2 https://github.com/ruby/prime +rbs 2.6.0 https://github.com/ruby/rbs 5202d4eeed3257448f19004b4baac4bcf4127717 +typeprof 0.21.3 https://github.com/ruby/typeprof +debug 1.6.2 https://github.com/ruby/debug e7c37486ff9579251e5d25645b8d38ec96708f12 From fa65800ee54961e04acf65c54a84abb9c0f6210b Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Fri, 12 Aug 2022 19:00:10 -0500 Subject: [PATCH 109/546] [ruby/rdoc] [DOC] Enhances text about escapes (https://github.com/ruby/rdoc/pull/917) https://github.com/ruby/rdoc/commit/c40bac829c --- doc/rdoc/markup_reference.rb | 118 ++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 31 deletions(-) diff --git a/doc/rdoc/markup_reference.rb b/doc/rdoc/markup_reference.rb index 04e90aad23c420..92fe2e7cc36129 100644 --- a/doc/rdoc/markup_reference.rb +++ b/doc/rdoc/markup_reference.rb @@ -820,37 +820,6 @@ # # ====== +Monofont+ in a Heading # -# ==== Escaping Text Markup -# -# Text markup can be escaped with a backslash, as in \, which was obtained -# with \\. Except in verbatim sections and between \ tags, -# to produce a backslash you have to double it unless it is followed by a -# space, tab or newline. Otherwise, the HTML formatter will discard it, as it -# is used to escape potential links: -# -# * The \ must be doubled if not followed by white space: \\. -# * But not in \ tags: in a Regexp, \S matches non-space. -# * This is a link to {ruby-lang}[https://www.ruby-lang.org]. -# * This is not a link, however: \{ruby-lang.org}[https://www.ruby-lang.org]. -# * This will not be linked to \RDoc::RDoc#document -# -# generates: -# -# * The \ must be doubled if not followed by white space: \\. -# * But not in \ tags: in a Regexp, \S matches non-space. -# * This is a link to {ruby-lang}[https://www.ruby-lang.org] -# * This is not a link, however: \{ruby-lang.org}[https://www.ruby-lang.org] -# * This will not be linked to \RDoc::RDoc#document -# -# Inside \ tags, more precisely, leading backslashes are removed only if -# followed by a markup character (<*_+), a backslash, or a known link -# reference (a known class or method). So in the example above, the backslash -# of \S would be removed if there was a class or module named +S+ in -# the current context. -# -# This behavior is inherited from RDoc version 1, and has been kept for -# compatibility with existing RDoc documentation. -# # ==== Character Conversions # # Certain combinations of characters may be converted to special characters; @@ -1074,6 +1043,93 @@ # # {rdoc-image:https://www.ruby-lang.org/images/header-ruby-logo@2x.png}[./Alias.html] # +# === Escaping Text +# +# Text that would otherwise be interpreted as markup +# can be "escaped," so that it is not interpreted as markup; +# the escape character is the backslash ('\\'). +# +# In a verbatim text block or a code block, +# the escape character is always preserved: +# +# Example input: +# +# This is not verbatim text. +# +# This is verbatim text, with an escape character \. +# +# This is not a code block. +# +# def foo +# 'String with an escape character.' +# end +# +# Rendered HTML: +# +# >>> +# This is not verbatim text. +# +# This is verbatim text, with an escape character \. +# +# This is not a code block. +# +# def foo +# 'This is a code block with an escape character \.' +# end +# +# In typeface markup (italic, bold, or monofont), +# an escape character is preserved unless it is immediately +# followed by nested typeface markup. +# +# Example input: +# +# This list is about escapes; it contains: +# +# - Monofont text with unescaped nested _italic_. +# - Monofont text with escaped nested \_italic_. +# - Monofont text with an escape character \. +# +# Rendered HTML: +# +# >>> +# This list is about escapes; it contains: +# +# - Monofont text with unescaped nested _italic_. +# - Monofont text with escaped nested \_italic_. +# - Monofont text with an escape character \ . +# +# In other text-bearing blocks +# (paragraphs, block quotes, list items, headings): +# +# - A single escape character immediately followed by markup +# escapes the markup. +# - A single escape character followed by whitespace is preserved. +# - A single escape character anywhere else is ignored. +# - A double escape character is rendered as a single backslash. +# +# Example input: +# +# This list is about escapes; it contains: +# +# - An unescaped class name, RDoc, that will become a link. +# - An escaped class name, \RDoc, that will not become a link. +# - An escape character followed by whitespace \ . +# - An escape character \that is ignored. +# - A double escape character \\ that is rendered +# as a single backslash. +# +# Rendered HTML: +# +# >>> +# This list is about escapes; it contains: +# +# - An unescaped class name, RDoc, that will become a link. +# - An escaped class name, \RDoc, that will not become a link. +# - An escape character followed by whitespace \ . +# - An escape character \that is ignored. +# - A double escape character \\ that is rendered +# as a single backslash. +# # == Documentation Derived from Ruby Code # # [Class] From f9382a3bcba24d2b57d9dcb890928100a8e5f604 Mon Sep 17 00:00:00 2001 From: git Date: Sat, 13 Aug 2022 09:00:35 +0900 Subject: [PATCH 110/546] * 2022-08-13 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index d79ba396f78b2e..9d5c6428f2054e 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 12 +#define RUBY_RELEASE_DAY 13 #include "ruby/version.h" #include "ruby/internal/abi.h" From d1d1c9ba7a678124eb32833d5be0628c915bde06 Mon Sep 17 00:00:00 2001 From: Kouhei Yanagita Date: Sat, 13 Aug 2022 08:07:49 +0900 Subject: [PATCH 111/546] Add a NEWS entry about Integer#ceildiv [ci skip] --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index b391702821b654..6a6ed90e50cefd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -104,6 +104,9 @@ Note: We're only listing outstanding class updates. empty, instead of returning the default value or calling the default proc. [[Bug #16908]] +* Integer + * Integer#ceildiv has been added. [[Feature #18809]] + * Kernel * Kernel#binding raises RuntimeError if called from a non-Ruby frame (such as a method defined in C). [[Bug #18487]] From 0617cba197cdff626ee9c74cece480df31d384ef Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 13 Aug 2022 11:23:47 +0900 Subject: [PATCH 112/546] [DOC] Add the link to [Feature #18809] --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 6a6ed90e50cefd..6498621712270b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -279,3 +279,4 @@ The following deprecated APIs are removed. [Feature #18685]: https://bugs.ruby-lang.org/issues/18685 [Bug #18782]: https://bugs.ruby-lang.org/issues/18782 [Feature #18788]: https://bugs.ruby-lang.org/issues/18788 +[Feature #18809]: https://bugs.ruby-lang.org/issues/18809 From a58a429f8b85d9a68a9439ba58c76996750946da Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 14 Aug 2022 01:02:33 +0900 Subject: [PATCH 113/546] Silent configure does not output cached configurations --- template/Makefile.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/template/Makefile.in b/template/Makefile.in index 7968619f9a1a16..facbf062f42a6a 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -397,12 +397,13 @@ uncommon.mk: $(srcdir)/common.mk reconfig-args = $(srcdir)/$(CONFIGURE) $(yes_silence:yes=--silent) $(configure_args) config.status-args = ./config.status $(yes_silence:yes=--silent) --recheck reconfig-exec-0 = test -t 1 && { : $${CONFIGURE_TTY=yes}; export CONFIGURE_TTY; }; exec 3>&1; exit `exec 4>&1; { "$$@" 3>&- 4>&-; echo $$? 1>&4; } | fgrep -v '(cached)' 1>&3 3>&- 4>&-` -reconfig-exec-1 = set -x; "$$@" +reconfig-exec-1 = set -x; exec "$$@" +reconfig-exec-yes = $(reconfig-exec-1) reconfig config.status: $(srcdir)/$(CONFIGURE) $(srcdir)/enc/Makefile.in \ $(hdrdir)/ruby/version.h $(ABI_VERSION_HDR) @PWD= MINIRUBY="$(MINIRUBY)"; export MINIRUBY; \ - set $(SHELL) $($@-args); $(reconfig-exec-$(V)) + set $(SHELL) $($@-args); $(reconfig-exec-$(silence:no=$(V))) $(srcdir)/$(CONFIGURE): $(srcdir)/configure.ac $(CHDIR) $(srcdir) && exec $(AUTOCONF) -o $(@F) From a9abf60c971a6d42fd5ca0566c796162960454d2 Mon Sep 17 00:00:00 2001 From: git Date: Sun, 14 Aug 2022 14:45:07 +0900 Subject: [PATCH 114/546] * 2022-08-14 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 9d5c6428f2054e..ee6404291f14b3 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 13 +#define RUBY_RELEASE_DAY 14 #include "ruby/version.h" #include "ruby/internal/abi.h" From cb12d7c71bb5b5ef4ebfc7a2008f66dd8410ccd2 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 14 Aug 2022 19:25:56 +0900 Subject: [PATCH 115/546] Update dependencies --- common.mk | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/common.mk b/common.mk index 9f89e0ec8af26c..ec5e4ae2522ef1 100644 --- a/common.mk +++ b/common.mk @@ -13822,7 +13822,6 @@ signal.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h signal.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h signal.$(OBJEXT): $(CCAN_DIR)/list/list.h signal.$(OBJEXT): $(CCAN_DIR)/str/str.h -signal.$(OBJEXT): $(hdrdir)/ruby.h signal.$(OBJEXT): $(hdrdir)/ruby/ruby.h signal.$(OBJEXT): $(top_srcdir)/internal/array.h signal.$(OBJEXT): $(top_srcdir)/internal/compilers.h @@ -14008,7 +14007,6 @@ signal.$(OBJEXT): {$(VPATH)}internal/warning_push.h signal.$(OBJEXT): {$(VPATH)}internal/xmalloc.h signal.$(OBJEXT): {$(VPATH)}method.h signal.$(OBJEXT): {$(VPATH)}missing.h -signal.$(OBJEXT): {$(VPATH)}mjit.h signal.$(OBJEXT): {$(VPATH)}node.h signal.$(OBJEXT): {$(VPATH)}onigmo.h signal.$(OBJEXT): {$(VPATH)}oniguruma.h @@ -14024,7 +14022,6 @@ signal.$(OBJEXT): {$(VPATH)}thread_native.h signal.$(OBJEXT): {$(VPATH)}vm_core.h signal.$(OBJEXT): {$(VPATH)}vm_debug.h signal.$(OBJEXT): {$(VPATH)}vm_opts.h -signal.$(OBJEXT): {$(VPATH)}yjit.h sprintf.$(OBJEXT): $(hdrdir)/ruby/ruby.h sprintf.$(OBJEXT): $(top_srcdir)/internal/bignum.h sprintf.$(OBJEXT): $(top_srcdir)/internal/bits.h @@ -17593,16 +17590,14 @@ yjit.$(OBJEXT): $(top_srcdir)/internal/array.h yjit.$(OBJEXT): $(top_srcdir)/internal/class.h yjit.$(OBJEXT): $(top_srcdir)/internal/compile.h yjit.$(OBJEXT): $(top_srcdir)/internal/compilers.h +yjit.$(OBJEXT): $(top_srcdir)/internal/fixnum.h yjit.$(OBJEXT): $(top_srcdir)/internal/gc.h yjit.$(OBJEXT): $(top_srcdir)/internal/hash.h yjit.$(OBJEXT): $(top_srcdir)/internal/imemo.h -yjit.$(OBJEXT): $(top_srcdir)/internal/object.h -yjit.$(OBJEXT): $(top_srcdir)/internal/re.h yjit.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h yjit.$(OBJEXT): $(top_srcdir)/internal/serial.h yjit.$(OBJEXT): $(top_srcdir)/internal/static_assert.h yjit.$(OBJEXT): $(top_srcdir)/internal/string.h -yjit.$(OBJEXT): $(top_srcdir)/internal/struct.h yjit.$(OBJEXT): $(top_srcdir)/internal/variable.h yjit.$(OBJEXT): $(top_srcdir)/internal/vm.h yjit.$(OBJEXT): $(top_srcdir)/internal/warnings.h @@ -17620,6 +17615,7 @@ yjit.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h yjit.$(OBJEXT): {$(VPATH)}builtin.h yjit.$(OBJEXT): {$(VPATH)}config.h yjit.$(OBJEXT): {$(VPATH)}constant.h +yjit.$(OBJEXT): {$(VPATH)}debug.h yjit.$(OBJEXT): {$(VPATH)}debug_counter.h yjit.$(OBJEXT): {$(VPATH)}defines.h yjit.$(OBJEXT): {$(VPATH)}encoding.h @@ -17798,6 +17794,7 @@ yjit.$(OBJEXT): {$(VPATH)}thread_native.h yjit.$(OBJEXT): {$(VPATH)}vm_callinfo.h yjit.$(OBJEXT): {$(VPATH)}vm_core.h yjit.$(OBJEXT): {$(VPATH)}vm_debug.h +yjit.$(OBJEXT): {$(VPATH)}vm_insnhelper.h yjit.$(OBJEXT): {$(VPATH)}vm_opts.h yjit.$(OBJEXT): {$(VPATH)}vm_sync.h yjit.$(OBJEXT): {$(VPATH)}yjit.c From 89aa09afaf77920fd748aefaba99fe4b0f19e684 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Fri, 6 Aug 2021 15:36:33 -0700 Subject: [PATCH 116/546] [ruby/rinda] Handle situations where IPv4 multicast is not available Fixes [Bug #13864] https://github.com/ruby/rinda/commit/3cd620f38c --- test/rinda/test_rinda.rb | 71 +++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/test/rinda/test_rinda.rb b/test/rinda/test_rinda.rb index d8340e0fc48dff..dbe414b783fefd 100644 --- a/test/rinda/test_rinda.rb +++ b/test/rinda/test_rinda.rb @@ -583,6 +583,22 @@ def test_take_bug_8215 end end +module RingIPv4 + def ipv4_mc(rf) + begin + v4mc = rf.make_socket('239.0.0.1') + rescue Errno::ENETUNREACH, Errno::ENOBUFS, Errno::ENODEV + omit 'IPv4 multicast not available' + end + + begin + yield v4mc + ensure + v4mc.close + end + end +end + module RingIPv6 def prepare_ipv6(r) begin @@ -625,6 +641,7 @@ def ipv6_mc(rf, hops = nil) end class TestRingServer < Test::Unit::TestCase + include RingIPv4 def setup @port = Rinda::Ring_PORT @@ -697,27 +714,23 @@ def test_make_socket_unicast end def test_make_socket_ipv4_multicast - begin - v4mc = @rs.make_socket('239.0.0.1') - rescue Errno::ENOBUFS => e - omit "Missing multicast support in OS: #{e.message}" - end - - begin - if Socket.const_defined?(:SO_REUSEPORT) then - assert(v4mc.getsockopt(:SOCKET, :SO_REUSEPORT).bool) - else - assert(v4mc.getsockopt(:SOCKET, :SO_REUSEADDR).bool) - end - rescue TypeError - if /aix/ =~ RUBY_PLATFORM - omit "Known bug in getsockopt(2) on AIX" + ipv4_mc(@rs) do |v4mc| + begin + if Socket.const_defined?(:SO_REUSEPORT) then + assert(v4mc.getsockopt(:SOCKET, :SO_REUSEPORT).bool) + else + assert(v4mc.getsockopt(:SOCKET, :SO_REUSEADDR).bool) + end + rescue TypeError + if /aix/ =~ RUBY_PLATFORM + omit "Known bug in getsockopt(2) on AIX" + end + raise $! end - raise $! - end - assert_equal('0.0.0.0', v4mc.local_address.ip_address) - assert_equal(@port, v4mc.local_address.ip_port) + assert_equal('0.0.0.0', v4mc.local_address.ip_address) + assert_equal(@port, v4mc.local_address.ip_port) + end end def test_make_socket_ipv6_multicast @@ -746,7 +759,7 @@ def test_ring_server_ipv4_multicast @rs.shutdown begin @rs = Rinda::RingServer.new(@ts, [['239.0.0.1', '0.0.0.0']], @port) - rescue Errno::ENOBUFS => e + rescue Errno::ENOBUFS, Errno::ENODEV => e omit "Missing multicast support in OS: #{e.message}" end @@ -848,6 +861,7 @@ def wait_for(n) class TestRingFinger < Test::Unit::TestCase include RingIPv6 + include RingIPv4 def setup @rf = Rinda::RingFinger.new @@ -867,12 +881,10 @@ def test_make_socket_unicast end def test_make_socket_ipv4_multicast - v4mc = @rf.make_socket('239.0.0.1') - - assert_equal(1, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_LOOP).ipv4_multicast_loop) - assert_equal(1, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_TTL).ipv4_multicast_ttl) - ensure - v4mc.close if v4mc + ipv4_mc(@rf) do |v4mc| + assert_equal(1, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_LOOP).ipv4_multicast_loop) + assert_equal(1, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_TTL).ipv4_multicast_ttl) + end end def test_make_socket_ipv6_multicast @@ -884,10 +896,9 @@ def test_make_socket_ipv6_multicast def test_make_socket_ipv4_multicast_hops @rf.multicast_hops = 2 - v4mc = @rf.make_socket('239.0.0.1') - assert_equal(2, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_TTL).ipv4_multicast_ttl) - ensure - v4mc.close if v4mc + ipv4_mc(@rf) do |v4mc| + assert_equal(2, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_TTL).ipv4_multicast_ttl) + end end def test_make_socket_ipv6_multicast_hops From cbcf2dad399352fecfd3a670279972a3e058510d Mon Sep 17 00:00:00 2001 From: git Date: Mon, 15 Aug 2022 14:12:08 +0900 Subject: [PATCH 117/546] * 2022-08-15 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index ee6404291f14b3..428100b2f921ba 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 14 +#define RUBY_RELEASE_DAY 15 #include "ruby/version.h" #include "ruby/internal/abi.h" From e77c8397c21d77901891bd7e65dfba6478b9b6c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Thu, 11 Aug 2022 20:32:38 +0200 Subject: [PATCH 118/546] [rubygems/rubygems] Fix Ruby platform incorrectly removed on `bundle update` https://github.com/rubygems/rubygems/commit/0d321c9e3a --- lib/bundler/definition.rb | 2 +- .../install/gemfile/specific_platform_spec.rb | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 21c06e55bab503..0ab0451695b36e 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -888,7 +888,7 @@ def remove_ruby_from_platforms_if_necessary!(dependencies) Bundler.local_platform == Gem::Platform::RUBY || !platforms.include?(Gem::Platform::RUBY) || (@new_platform && platforms.last == Gem::Platform::RUBY) || - !@originally_locked_specs.incomplete_ruby_specs?(dependencies) + !@originally_locked_specs.incomplete_ruby_specs?(expand_dependencies(dependencies)) remove_platform(Gem::Platform::RUBY) add_current_platform diff --git a/spec/bundler/install/gemfile/specific_platform_spec.rb b/spec/bundler/install/gemfile/specific_platform_spec.rb index fe1c3b71fe1913..bb5526203fc376 100644 --- a/spec/bundler/install/gemfile/specific_platform_spec.rb +++ b/spec/bundler/install/gemfile/specific_platform_spec.rb @@ -445,6 +445,47 @@ L end + it "does not remove ruby if gems for other platforms, and not present in the lockfile, exist in the Gemfile" do + build_repo4 do + build_gem "nokogiri", "1.13.8" + build_gem "nokogiri", "1.13.8" do |s| + s.platform = Gem::Platform.local + end + end + + gemfile <<~G + source "#{file_uri_for(gem_repo4)}" + + gem "nokogiri" + + gem "tzinfo", "~> 1.2", platform: :#{not_local_tag} + G + + original_lockfile = <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + nokogiri (1.13.8) + nokogiri (1.13.8-#{Gem::Platform.local}) + + PLATFORMS + #{lockfile_platforms_for([specific_local_platform, "ruby"])} + + DEPENDENCIES + nokogiri + tzinfo (~> 1.2) + + BUNDLED WITH + #{Bundler::VERSION} + L + + lockfile original_lockfile + + bundle "lock --update" + + expect(lockfile).to eq(original_lockfile) + end + it "can fallback to a source gem when platform gems are incompatible with current ruby version" do setup_multiplatform_gem_with_source_gem From 8d40ede2e005439cbc84abfd50c98932a33448f4 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Mon, 15 Aug 2022 08:16:13 -0500 Subject: [PATCH 119/546] [ruby/date] [DOC] Enhanced intro for Date (https://github.com/ruby/date/pull/72) https://github.com/ruby/date/commit/59a6673221 --- ext/date/date_core.c | 184 ++++++++++++------------------------------- 1 file changed, 51 insertions(+), 133 deletions(-) diff --git a/ext/date/date_core.c b/ext/date/date_core.c index c68f70e5f88f95..83d493c794842a 100644 --- a/ext/date/date_core.c +++ b/ext/date/date_core.c @@ -4505,6 +4505,9 @@ date_s__parse_internal(int argc, VALUE *argv, VALUE klass) * Note: * This method recognizes many forms in +string+, * but it is not a validator. + * For formats, see + * {"Specialized Format Strings" in Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-Specialized+Format+Strings] + * * If +string+ does not specify a valid date, * the result is unpredictable; * consider using Date._strptime instead. @@ -4537,6 +4540,8 @@ date_s__parse(int argc, VALUE *argv, VALUE klass) * Note: * This method recognizes many forms in +string+, * but it is not a validator. + * For formats, see + * {"Specialized Format Strings" in Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-Specialized+Format+Strings] * If +string+ does not specify a valid date, * the result is unpredictable; * consider using Date._strptime instead. @@ -4671,7 +4676,7 @@ date_s_iso8601(int argc, VALUE *argv, VALUE klass) * Date._rfc3339(string, limit: 128) -> hash * * Returns a hash of values parsed from +string+, which should be a valid - * {RFC 3339 format}[https://datatracker.ietf.org/doc/html/rfc3339]: + * {RFC 3339 format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-RFC+3339+Format]: * * d = Date.new(2001, 2, 3) * s = d.rfc3339 # => "2001-02-03T00:00:00+00:00" @@ -4699,7 +4704,7 @@ date_s__rfc3339(int argc, VALUE *argv, VALUE klass) * * Returns a new \Date object with values parsed from +string+, * which should be a valid - * {RFC 3339 format}[https://datatracker.ietf.org/doc/html/rfc3339]: + * {RFC 3339 format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-RFC+3339+Format]: * * d = Date.new(2001, 2, 3) * s = d.rfc3339 # => "2001-02-03T00:00:00+00:00" @@ -4811,7 +4816,7 @@ date_s_xmlschema(int argc, VALUE *argv, VALUE klass) * Date._rfc2822(string, limit: 128) -> hash * * Returns a hash of values parsed from +string+, which should be a valid - * {RFC 2822 date format}[https://datatracker.ietf.org/doc/html/rfc2822]: + * {RFC 2822 date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-RFC+2822+Format]: * * d = Date.new(2001, 2, 3) * s = d.rfc2822 # => "Sat, 3 Feb 2001 00:00:00 +0000" @@ -4841,7 +4846,7 @@ date_s__rfc2822(int argc, VALUE *argv, VALUE klass) * * Returns a new \Date object with values parsed from +string+, * which should be a valid - * {RFC 2822 date format}[https://datatracker.ietf.org/doc/html/rfc2822]: + * {RFC 2822 date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-RFC+2822+Format]: * * d = Date.new(2001, 2, 3) * s = d.rfc2822 # => "Sat, 3 Feb 2001 00:00:00 +0000" @@ -4885,7 +4890,7 @@ date_s_rfc2822(int argc, VALUE *argv, VALUE klass) * Date._httpdate(string, limit: 128) -> hash * * Returns a hash of values parsed from +string+, which should be a valid - * HTTP date format: + * {HTTP date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-HTTP+Format]: * * d = Date.new(2001, 2, 3) * s = d.httpdate # => "Sat, 03 Feb 2001 00:00:00 GMT" @@ -4911,7 +4916,7 @@ date_s__httpdate(int argc, VALUE *argv, VALUE klass) * * Returns a new \Date object with values parsed from +string+, * which should be a valid - * {RFC 2616 date format}[https://datatracker.ietf.org/doc/html/rfc2616]: + * {HTTP date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-HTTP+Format]: * * d = Date.new(2001, 2, 3) s = d.httpdate # => "Sat, 03 Feb 2001 00:00:00 GMT" @@ -4953,7 +4958,7 @@ date_s_httpdate(int argc, VALUE *argv, VALUE klass) * Date._jisx0301(string, limit: 128) -> hash * * Returns a hash of values parsed from +string+, which should be a valid - * JIS X 0301 date format: + * {JIS X 0301 date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-JIS+X+0301+Format]: * * d = Date.new(2001, 2, 3) * s = d.jisx0301 # => "H13.02.03" @@ -4979,7 +4984,7 @@ date_s__jisx0301(int argc, VALUE *argv, VALUE klass) * Date.jisx0301(string = '-4712-01-01', start = Date::ITALY, limit: 128) -> date * * Returns a new \Date object with values parsed from +string+, - * which should be a valid JIS X 0301 format: + * which should be a valid {JIS X 0301 format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-JIS+X+0301+Format]: * * d = Date.new(2001, 2, 3) * s = d.jisx0301 # => "H13.02.03" @@ -9383,151 +9388,65 @@ Init_date_core(void) negative_inf = -INFINITY; /* - * date and datetime class - Tadayoshi Funaba 1998-2011 - * - * 'date' provides two classes: Date and DateTime. - * - * == Terms and Definitions - * - * Some terms and definitions are based on ISO 8601 and JIS X 0301. - * - * === Calendar Date - * - * The calendar date is a particular day of a calendar year, - * identified by its ordinal number within a calendar month within - * that year. + * \Class \Date provides methods for storing and manipulating + * calendar dates. * - * In those classes, this is so-called "civil". + * Consider using + * {class Time}[https://docs.ruby-lang.org/en/master/Time.html] + * instead of class \Date if: * - * === Ordinal Date + * - You need both dates and times; \Date handles only dates. + * - You need only Gregorian dates (and not Julian dates); + * see {Julian and Gregorian Calendars}[rdoc-ref:calendars.rdoc]. * - * The ordinal date is a particular day of a calendar year identified - * by its ordinal number within the year. + * A \Date object, once created, is immutable, and cannot be modified. * - * In those classes, this is so-called "ordinal". + * == Creating a \Date * - * === Week Date + * You can create a date for the current date, using Date.today: * - * The week date is a date identified by calendar week and day numbers. + * Date.today # => # * - * The calendar week is a seven day period within a calendar year, - * starting on a Monday and identified by its ordinal number within - * the year; the first calendar week of the year is the one that - * includes the first Thursday of that year. In the Gregorian - * calendar, this is equivalent to the week which includes January 4. + * You can create a specific date from various combinations of arguments: * - * In those classes, this is so-called "commercial". + * - Date.new takes integer year, month, and day-of-month: * - * === Julian Day Number + * Date.new(1999, 12, 31) # => # * - * The Julian day number is in elapsed days since noon (Greenwich Mean - * Time) on January 1, 4713 BCE (in the Julian calendar). + * - Date.ordinal takes integer year and day-of-year: * - * In this document, the astronomical Julian day number is the same as - * the original Julian day number. And the chronological Julian day - * number is a variation of the Julian day number. Its days begin at - * midnight on local time. + * Date.ordinal(1999, 365) # => # * - * In this document, when the term "Julian day number" simply appears, - * it just refers to "chronological Julian day number", not the - * original. + * - Date.jd takes integer Julian day: * - * In those classes, those are so-called "ajd" and "jd". + * Date.jd(2451544) # => # * - * === Modified Julian Day Number + * - Date.commercial takes integer commercial data (year, week, day-of-week): * - * The modified Julian day number is in elapsed days since midnight - * (Coordinated Universal Time) on November 17, 1858 CE (in the - * Gregorian calendar). + * Date.commercial(1999, 52, 5) # => # * - * In this document, the astronomical modified Julian day number is - * the same as the original modified Julian day number. And the - * chronological modified Julian day number is a variation of the - * modified Julian day number. Its days begin at midnight on local - * time. + * - Date.parse takes a string, which it parses heuristically: * - * In this document, when the term "modified Julian day number" simply - * appears, it just refers to "chronological modified Julian day - * number", not the original. + * Date.parse('1999-12-31') # => # + * Date.parse('31-12-1999') # => # + * Date.parse('1999-365') # => # + * Date.parse('1999-W52-5') # => # * - * In those classes, those are so-called "amjd" and "mjd". - * - * == Date - * - * A subclass of Object that includes the Comparable module and - * easily handles date. - * - * A Date object is created with Date::new, Date::jd, Date::ordinal, - * Date::commercial, Date::parse, Date::strptime, Date::today, - * Time#to_date, etc. - * - * require 'date' + * - Date.strptime takes a date string and a format string, + * then parses the date string according to the format string: * - * Date.new(2001,2,3) - * #=> # - * Date.jd(2451944) - * #=> # - * Date.ordinal(2001,34) - * #=> # - * Date.commercial(2001,5,6) - * #=> # - * Date.parse('2001-02-03') - * #=> # - * Date.strptime('03-02-2001', '%d-%m-%Y') - * #=> # - * Time.new(2001,2,3).to_date - * #=> # + * Date.strptime('1999-12-31', '%Y-%m-%d') # => # + * Date.strptime('31-12-1999', '%d-%m-%Y') # => # + * Date.strptime('1999-365', '%Y-%j') # => # + * Date.strptime('1999-W52-5', '%G-W%V-%u') # => # + * Date.strptime('1999 52 5', '%Y %U %w') # => # + * Date.strptime('1999 52 5', '%Y %W %u') # => # + * Date.strptime('fri31dec99', '%a%d%b%y') # => # * - * All date objects are immutable; hence cannot modify themselves. + * See also the specialized methods in + * {"Specialized Format Strings" in Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-Specialized+Format+Strings] * - * The concept of a date object can be represented as a tuple - * of the day count, the offset and the day of calendar reform. - * - * The day count denotes the absolute position of a temporal - * dimension. The offset is relative adjustment, which determines - * decoded local time with the day count. The day of calendar - * reform denotes the start day of the new style. The old style - * of the West is the Julian calendar which was adopted by - * Caesar. The new style is the Gregorian calendar, which is the - * current civil calendar of many countries. - * - * The day count is virtually the astronomical Julian day number. - * The offset in this class is usually zero, and cannot be - * specified directly. - * - * A Date object can be created with an optional argument, - * the day of calendar reform as a Julian day number, which - * should be 2298874 to 2426355 or negative/positive infinity. - * The default value is +Date::ITALY+ (2299161=1582-10-15). - * See also sample/cal.rb. - * - * $ ruby sample/cal.rb -c it 10 1582 - * October 1582 - * S M Tu W Th F S - * 1 2 3 4 15 16 - * 17 18 19 20 21 22 23 - * 24 25 26 27 28 29 30 - * 31 - * - * $ ruby sample/cal.rb -c gb 9 1752 - * September 1752 - * S M Tu W Th F S - * 1 2 14 15 16 - * 17 18 19 20 21 22 23 - * 24 25 26 27 28 29 30 - * - * A Date object has various methods. See each reference. - * - * d = Date.parse('3rd Feb 2001') - * #=> # - * d.year #=> 2001 - * d.mon #=> 2 - * d.mday #=> 3 - * d.wday #=> 6 - * d += 1 #=> # - * d.strftime('%a %d %b %Y') #=> "Sun 04 Feb 2001" - * - * === Argument +limit+ + * == Argument +limit+ * * Certain singleton methods in \Date that parse string arguments * also take optional keyword argument +limit+, @@ -9541,7 +9460,6 @@ Init_date_core(void) * - Other non-numeric: raises TypeError. * */ - cDate = rb_define_class("Date", rb_cObject); /* Exception for invalid date/time */ From 0264424d58e0eb3ff6fc42b7b4164b6e3b8ea8ca Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 15 Aug 2022 09:14:35 -0400 Subject: [PATCH 120/546] Add test for GC thrashing of young object creation This test will prevent performance regressions like [Bug #18929]. --- test/ruby/test_gc.rb | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index a5d7f4dbaa83a9..a1e782daa93a21 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -402,6 +402,28 @@ def test_expand_heap eom end + def test_thrashing_for_young_objects + # This test prevents bugs like [Bug #18929] + + assert_separately %w[--disable-gem], __FILE__, __LINE__, <<-RUBY + # Warmup to make sure heap stabilizes + 1_000_000.times { Object.new } + + before_stats = GC.stat + + 1_000_000.times { Object.new } + + after_stats = GC.stat + + # Should not be thrashing in page creation + assert_equal before_stats[:heap_allocated_pages], after_stats[:heap_allocated_pages] + assert_equal 0, after_stats[:heap_tomb_pages] + assert_equal 0, after_stats[:total_freed_pages] + # Only young objects, so should not trigger major GC + assert_equal before_stats[:major_gc_count], after_stats[:major_gc_count] + RUBY + end + def test_gc_internals assert_not_nil GC::INTERNAL_CONSTANTS[:HEAP_PAGE_OBJ_LIMIT] assert_not_nil GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] From ee864beb7c6730083da656b55f4a9eeaed78bfa8 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 16 Aug 2022 02:05:12 +0900 Subject: [PATCH 121/546] Simplify around `USE_YJIT` macro (#6240) * Simplify around `USE_YJIT` macro - Use `USE_YJIT` macro only instead of `YJIT_BUILD`. - An intermediate macro `YJIT_SUPPORTED_P` is no longer used. * Bail out if YJIT is enabled on unsupported platforms --- iseq.c | 6 +++--- ruby.c | 20 ++++++++++---------- vm.c | 2 +- vm_core.h | 1 - yjit.h | 23 +++++++---------------- 5 files changed, 21 insertions(+), 31 deletions(-) diff --git a/iseq.c b/iseq.c index 3d40b88a0de1bc..f17a2d49b61278 100644 --- a/iseq.c +++ b/iseq.c @@ -175,7 +175,7 @@ rb_iseq_free(const rb_iseq_t *iseq) iseq_clear_ic_references(iseq); struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); mjit_free_iseq(iseq); /* Notify MJIT */ -#if YJIT_BUILD +#if USE_YJIT rb_yjit_iseq_free(body->yjit_payload); #endif ruby_xfree((void *)body->iseq_encoded); @@ -438,7 +438,7 @@ rb_iseq_update_references(rb_iseq_t *iseq) #if USE_MJIT mjit_update_references(iseq); #endif -#if YJIT_BUILD +#if USE_YJIT rb_yjit_iseq_update_references(body->yjit_payload); #endif } @@ -526,7 +526,7 @@ rb_iseq_mark(const rb_iseq_t *iseq) #if USE_MJIT mjit_mark_cc_entries(body); #endif -#if YJIT_BUILD +#if USE_YJIT rb_yjit_iseq_mark(body->yjit_payload); #endif } diff --git a/ruby.c b/ruby.c index 7c6742cac47905..66feeb797e6cf6 100644 --- a/ruby.c +++ b/ruby.c @@ -111,7 +111,7 @@ void rb_warning_category_update(unsigned int mask, unsigned int bits); enum feature_flag_bits { EACH_FEATURES(DEFINE_FEATURE, COMMA), feature_debug_flag_first, -#if defined(MJIT_FORCE_ENABLE) || !YJIT_BUILD +#if defined(MJIT_FORCE_ENABLE) || !USE_YJIT DEFINE_FEATURE(jit) = feature_mjit, #else DEFINE_FEATURE(jit) = feature_yjit, @@ -248,7 +248,7 @@ usage(const char *name, int help, int highlight, int columns) #define M(shortopt, longopt, desc) RUBY_OPT_MESSAGE(shortopt, longopt, desc) -#if YJIT_BUILD +#if USE_YJIT # define PLATFORM_JIT_OPTION "--yjit" #else # define PLATFORM_JIT_OPTION "--mjit" @@ -278,7 +278,7 @@ usage(const char *name, int help, int highlight, int columns) #if USE_MJIT M("--mjit", "", "enable C compiler-based JIT compiler (experimental)"), #endif -#if YJIT_BUILD +#if USE_YJIT M("--yjit", "", "enable in-process JIT compiler (experimental)"), #endif M("-h", "", "show this message, --help for more info"), @@ -312,7 +312,7 @@ usage(const char *name, int help, int highlight, int columns) #if USE_MJIT M("mjit", "", "C compiler-based JIT compiler (default: disabled)"), #endif -#if YJIT_BUILD +#if USE_YJIT M("yjit", "", "in-process JIT compiler (default: disabled)"), #endif }; @@ -323,7 +323,7 @@ usage(const char *name, int help, int highlight, int columns) #if USE_MJIT extern const struct ruby_opt_message mjit_option_messages[]; #endif -#if YJIT_BUILD +#if USE_YJIT static const struct ruby_opt_message yjit_options[] = { #if YJIT_STATS M("--yjit-stats", "", "Enable collecting YJIT statistics"), @@ -365,7 +365,7 @@ usage(const char *name, int help, int highlight, int columns) for (i = 0; mjit_option_messages[i].str; ++i) SHOW(mjit_option_messages[i]); #endif -#if YJIT_BUILD +#if USE_YJIT printf("%s""YJIT options (experimental):%s\n", sb, se); for (i = 0; i < numberof(yjit_options); ++i) SHOW(yjit_options[i]); @@ -1047,7 +1047,7 @@ set_option_encoding_once(const char *type, VALUE *name, const char *e, long elen #define yjit_opt_match_arg(s, l, name) \ opt_match(s, l, name) && (*(s) && *(s+1) ? 1 : (rb_raise(rb_eRuntimeError, "--yjit-" name " needs an argument"), 0)) -#if YJIT_BUILD +#if USE_YJIT static bool setup_yjit_options(const char *s) { @@ -1452,7 +1452,7 @@ proc_options(long argc, char **argv, ruby_cmdline_options_t *opt, int envopt) #endif } else if (is_option_with_optarg("yjit", '-', true, false, false)) { -#if YJIT_BUILD +#if USE_YJIT FEATURE_SET(opt->features, FEATURE_BIT(yjit)); setup_yjit_options(s); #else @@ -1831,7 +1831,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) rb_warning("-K is specified; it is for 1.8 compatibility and may cause odd behavior"); if (!(FEATURE_SET_BITS(opt->features) & feature_jit_mask)) { -#if YJIT_BUILD +#if USE_YJIT if (!FEATURE_USED_P(opt->features, yjit) && getenv("RUBY_YJIT_ENABLE")) { FEATURE_SET(opt->features, FEATURE_BIT(yjit)); } @@ -1847,7 +1847,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) opt->mjit.on = TRUE; /* set mjit.on for ruby_show_version() API and check to call mjit_init() */ } #endif -#if YJIT_BUILD +#if USE_YJIT if (FEATURE_SET_P(opt->features, yjit)) { rb_yjit_init(); } diff --git a/vm.c b/vm.c index 3bba390e7cba18..4b1a30e7cae45a 100644 --- a/vm.c +++ b/vm.c @@ -3938,7 +3938,7 @@ Init_vm_objects(void) } /* Stub for builtin function when not building YJIT units*/ -#if !YJIT_BUILD +#if !USE_YJIT void Init_builtin_yjit(void) {} #endif diff --git a/vm_core.h b/vm_core.h index c394862ecbab79..717f1168006ab3 100644 --- a/vm_core.h +++ b/vm_core.h @@ -495,7 +495,6 @@ struct rb_iseq_constant_body { #if USE_YJIT // YJIT stores some data on each iseq. - // Note: Cannot use YJIT_BUILD here since yjit.h includes this header. void *yjit_payload; #endif }; diff --git a/yjit.h b/yjit.h index cf420df251791d..1f507f1b519fae 100644 --- a/yjit.h +++ b/yjit.h @@ -15,24 +15,15 @@ # define YJIT_STATS RUBY_DEBUG #endif -// We generate x86 assembly -#if (defined(__x86_64__) && !defined(_WIN32)) || (defined(_WIN32) && defined(_M_AMD64)) // x64 platforms without mingw/msys -# define YJIT_SUPPORTED_P 1 -#else -# define YJIT_SUPPORTED_P 0 -#endif +#if USE_YJIT -// Is the output binary going to include YJIT? -#if USE_MJIT && USE_YJIT && YJIT_SUPPORTED_P -# define YJIT_BUILD 1 +// We generate x86 assembly +#if defined(_WIN32) ? defined(_M_AMD64) : defined(__x86_64__) +// x86_64 platforms without mingw/msys or x64-mswin #else -# define YJIT_BUILD 0 +# error YJIT unsupported platform #endif -#undef YJIT_SUPPORTED_P - -#if YJIT_BUILD - // Expose these as declarations since we are building YJIT. bool rb_yjit_enabled_p(void); unsigned rb_yjit_call_threshold(void); @@ -54,7 +45,7 @@ void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic); void rb_yjit_tracing_invalidate_all(void); #else -// !YJIT_BUILD +// !USE_YJIT // In these builds, YJIT could never be turned on. Provide dummy implementations. static inline bool rb_yjit_enabled_p(void) { return false; } @@ -76,6 +67,6 @@ static inline void rb_yjit_before_ractor_spawn(void) {} static inline void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic) {} static inline void rb_yjit_tracing_invalidate_all(void) {} -#endif // #if YJIT_BUILD +#endif // #if USE_YJIT #endif // #ifndef YJIT_H From e34720dcf4dcde543a0d9eabd4dd92bcdcb41c1f Mon Sep 17 00:00:00 2001 From: git Date: Tue, 16 Aug 2022 02:05:28 +0900 Subject: [PATCH 122/546] * 2022-08-16 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 428100b2f921ba..729809a0e63f05 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 15 +#define RUBY_RELEASE_DAY 16 #include "ruby/version.h" #include "ruby/internal/abi.h" From 4e66b3f47b2ad0d6cca1f2227dd38fdf117c0d3c Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 15 Aug 2022 09:35:49 -0400 Subject: [PATCH 123/546] [ruby/rdoc] [DOC] Remove duplicated line in RDoc::MarkupReference https://github.com/ruby/rdoc/commit/488f89aee4 --- doc/rdoc/markup_reference.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/rdoc/markup_reference.rb b/doc/rdoc/markup_reference.rb index 92fe2e7cc36129..66ec6786c07aa4 100644 --- a/doc/rdoc/markup_reference.rb +++ b/doc/rdoc/markup_reference.rb @@ -866,7 +866,6 @@ # # - On-page: ::dummy_singleton_method links to ::dummy_singleton_method. # - Off-pageRDoc::TokenStream::to_html links to RDoc::TokenStream::to_html. -# to \RDoc::TokenStream::to_html. # # Note: Occasionally \RDoc is not linked to a method whose name # has only special characters. Check whether the links you were expecting From e49db0f760722bf44ed2c5b31f67d929e9156dbe Mon Sep 17 00:00:00 2001 From: Penelope Phippen Date: Mon, 15 Aug 2022 15:45:51 -0400 Subject: [PATCH 124/546] Do not clone method entries when bind_call is used I noticed that this site unconditionally clones the method entry, which means that `bind_call` always allocates a `T_IMEMO`. While this clone is necessary for `bind`, it is not necessary for `bind_call`. I work at Stripe, and the sorbet_runtime gem uses bind call as part of it's [call validation](https://github.com/sorbet/sorbet/blob/master/gems/sorbet-runtime/lib/types/private/methods/call_validation.rb#L157) so this can save us a lot of allocations. This patch adds a `clone` parameter to `convert_umethod_to_method_components`, which then controls whether or not we do this cloning. This patch passed Stripe CI and works in our QA environment. I reviewed it with @tenderlove to talk about correctness also. --- proc.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/proc.c b/proc.c index c234ed3f9365b7..3c52fb06a775d0 100644 --- a/proc.c +++ b/proc.c @@ -2543,7 +2543,7 @@ rb_method_call_with_block(int argc, const VALUE *argv, VALUE method, VALUE passe */ static void -convert_umethod_to_method_components(const struct METHOD *data, VALUE recv, VALUE *methclass_out, VALUE *klass_out, VALUE *iclass_out, const rb_method_entry_t **me_out) +convert_umethod_to_method_components(const struct METHOD *data, VALUE recv, VALUE *methclass_out, VALUE *klass_out, VALUE *iclass_out, const rb_method_entry_t **me_out, const bool clone) { VALUE methclass = data->me->owner; VALUE iclass = data->me->defined_class; @@ -2565,9 +2565,19 @@ convert_umethod_to_method_components(const struct METHOD *data, VALUE recv, VALU } } - const rb_method_entry_t *me = rb_method_entry_clone(data->me); + const rb_method_entry_t *me; + if (clone) { + me = rb_method_entry_clone(data->me); + } else { + me = data->me; + } if (RB_TYPE_P(me->owner, T_MODULE)) { + if (!clone) { + // if we didn't previously clone the method entry, then we need to clone it now + // because this branch manipualtes it in rb_method_entry_complement_defined_class + me = rb_method_entry_clone(me); + } VALUE ic = rb_class_search_ancestor(klass, me->owner); if (ic) { klass = ic; @@ -2627,7 +2637,7 @@ umethod_bind(VALUE method, VALUE recv) const rb_method_entry_t *me; const struct METHOD *data; TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); - convert_umethod_to_method_components(data, recv, &methclass, &klass, &iclass, &me); + convert_umethod_to_method_components(data, recv, &methclass, &klass, &iclass, &me, true); struct METHOD *bound; method = TypedData_Make_Struct(rb_cMethod, struct METHOD, &method_data_type, bound); @@ -2669,7 +2679,7 @@ umethod_bind_call(int argc, VALUE *argv, VALUE method) else { VALUE methclass, klass, iclass; const rb_method_entry_t *me; - convert_umethod_to_method_components(data, recv, &methclass, &klass, &iclass, &me); + convert_umethod_to_method_components(data, recv, &methclass, &klass, &iclass, &me, false); struct METHOD bound = { recv, klass, 0, me }; return call_method_data(ec, &bound, argc, argv, passed_procval, RB_PASS_CALLED_KEYWORDS); From 0608a9a08693286a7d84845a216927ff2e3c9951 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Mon, 15 Aug 2022 16:14:12 -0700 Subject: [PATCH 125/546] Optimize Marshal dump/load for large (> 31-bit) FIXNUM (#6229) * Optimize Marshal dump of large fixnum Marshal's FIXNUM type only supports 31-bit fixnums, so on 64-bit platforms the 63-bit fixnums need to be represented in Marshal's BIGNUM. Previously this was done by converting to a bugnum and serializing the bignum object. This commit avoids allocating the intermediate bignum object, instead outputting the T_FIXNUM directly to a Marshal bignum. This maintains the same representation as the previous implementation, including not using LINKs for these large fixnums (an artifact of the previous implementation always allocating a new BIGNUM). This commit also avoids unnecessary st_lookups on immediate values, which we know will not be in that table. * Fastpath for loading FIXNUM from Marshal bignum * Run update-deps --- benchmark/marshal_dump_load_integer.yml | 22 +++++ common.mk | 3 + marshal.c | 126 +++++++++++++++++++----- test/ruby/test_marshal.rb | 22 ++++- 4 files changed, 148 insertions(+), 25 deletions(-) create mode 100644 benchmark/marshal_dump_load_integer.yml diff --git a/benchmark/marshal_dump_load_integer.yml b/benchmark/marshal_dump_load_integer.yml new file mode 100644 index 00000000000000..78ebf823d25fa7 --- /dev/null +++ b/benchmark/marshal_dump_load_integer.yml @@ -0,0 +1,22 @@ +prelude: | + smallint_array = 1000.times.map { |x| x } + bigint32_array = 1000.times.map { |x| x + 2**32 } + bigint64_array = 1000.times.map { |x| x + 2**64 } + + smallint_dump = Marshal.dump(smallint_array) + bigint32_dump = Marshal.dump(bigint32_array) + bigint64_dump = Marshal.dump(bigint64_array) +benchmark: + marshal_dump_integer_small: | + Marshal.dump(smallint_array) + marshal_dump_integer_over_32_bit: | + Marshal.dump(bigint32_array) + marshal_dump_integer_over_64_bit: | + Marshal.dump(bigint64_array) + marshal_load_integer_small: | + Marshal.load(smallint_dump) + marshal_load_integer_over_32_bit: | + Marshal.load(bigint32_dump) + marshal_load_integer_over_64_bit: | + Marshal.load(bigint64_dump) +loop_count: 4000 diff --git a/common.mk b/common.mk index ec5e4ae2522ef1..cf08764bc9e3ff 100644 --- a/common.mk +++ b/common.mk @@ -8715,12 +8715,15 @@ main.$(OBJEXT): {$(VPATH)}vm_debug.h marshal.$(OBJEXT): $(hdrdir)/ruby/ruby.h marshal.$(OBJEXT): $(top_srcdir)/internal/array.h marshal.$(OBJEXT): $(top_srcdir)/internal/bignum.h +marshal.$(OBJEXT): $(top_srcdir)/internal/bits.h marshal.$(OBJEXT): $(top_srcdir)/internal/class.h marshal.$(OBJEXT): $(top_srcdir)/internal/compilers.h marshal.$(OBJEXT): $(top_srcdir)/internal/encoding.h marshal.$(OBJEXT): $(top_srcdir)/internal/error.h +marshal.$(OBJEXT): $(top_srcdir)/internal/fixnum.h marshal.$(OBJEXT): $(top_srcdir)/internal/gc.h marshal.$(OBJEXT): $(top_srcdir)/internal/hash.h +marshal.$(OBJEXT): $(top_srcdir)/internal/numeric.h marshal.$(OBJEXT): $(top_srcdir)/internal/object.h marshal.$(OBJEXT): $(top_srcdir)/internal/serial.h marshal.$(OBJEXT): $(top_srcdir)/internal/static_assert.h diff --git a/marshal.c b/marshal.c index 325d5f126eccec..1eeebf7729c6fe 100644 --- a/marshal.c +++ b/marshal.c @@ -28,6 +28,7 @@ #include "internal/encoding.h" #include "internal/error.h" #include "internal/hash.h" +#include "internal/numeric.h" #include "internal/object.h" #include "internal/struct.h" #include "internal/symbol.h" @@ -171,6 +172,7 @@ struct dump_arg { st_table *data; st_table *compat_tbl; st_table *encodings; + unsigned long num_entries; }; struct dump_call_arg { @@ -754,6 +756,60 @@ w_objivar(VALUE obj, struct dump_call_arg *arg) w_ivar_each(obj, num, arg); } +// Optimized dump for fixnum larger than 31-bits +static void +w_bigfixnum(VALUE obj, struct dump_arg *arg) +{ + RUBY_ASSERT(FIXNUM_P(obj)); + + w_byte(TYPE_BIGNUM, arg); + +#if SIZEOF_LONG == SIZEOF_VALUE + long num, slen_num; + num = FIX2LONG(obj); +#else + long long num, slen_num; + num = NUM2LL(obj); +#endif + + char sign = num < 0 ? '-' : '+'; + w_byte(sign, arg); + + // Guaranteed not to overflow, as FIXNUM is 1-bit less than long + if (num < 0) num = -num; + + // calculate the size in shorts + int slen = 0; + { + slen_num = num; + while (slen_num) { + slen++; + slen_num = SHORTDN(slen_num); + } + } + + RUBY_ASSERT(slen > 0 && slen <= SIZEOF_LONG / 2); + + w_long((long)slen, arg); + + for (int i = 0; i < slen; i++) { + w_short(num & SHORTMASK, arg); + num = SHORTDN(num); + } + + // We aren't adding this object to the link table, but we need to increment + // the index. + arg->num_entries++; + + RUBY_ASSERT(num == 0); +} + +static void +w_remember(VALUE obj, struct dump_arg *arg) +{ + st_add_direct(arg->data, obj, arg->num_entries++); +} + static void w_object(VALUE obj, struct dump_arg *arg, int limit) { @@ -767,17 +823,6 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) rb_raise(rb_eArgError, "exceed depth limit"); } - if (limit > 0) limit--; - c_arg.limit = limit; - c_arg.arg = arg; - c_arg.obj = obj; - - if (st_lookup(arg->data, obj, &num)) { - w_byte(TYPE_LINK, arg); - w_long((long)num, arg); - return; - } - if (NIL_P(obj)) { w_byte(TYPE_NIL, arg); } @@ -797,19 +842,32 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) w_long(FIX2LONG(obj), arg); } else { - w_object(rb_int2big(FIX2LONG(obj)), arg, limit); + w_bigfixnum(obj, arg); } #endif } else if (SYMBOL_P(obj)) { w_symbol(obj, arg); } - else if (FLONUM_P(obj)) { - st_add_direct(arg->data, obj, arg->data->num_entries); - w_byte(TYPE_FLOAT, arg); - w_float(RFLOAT_VALUE(obj), arg); - } else { + if (st_lookup(arg->data, obj, &num)) { + w_byte(TYPE_LINK, arg); + w_long((long)num, arg); + return; + } + + if (limit > 0) limit--; + c_arg.limit = limit; + c_arg.arg = arg; + c_arg.obj = obj; + + if (FLONUM_P(obj)) { + w_remember(obj, arg); + w_byte(TYPE_FLOAT, arg); + w_float(RFLOAT_VALUE(obj), arg); + return; + } + VALUE v; if (!RBASIC_CLASS(obj)) { @@ -818,7 +876,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) } if (rb_obj_respond_to(obj, s_mdump, TRUE)) { - st_add_direct(arg->data, obj, arg->data->num_entries); + w_remember(obj, arg); v = dump_funcall(arg, obj, s_mdump, 0, 0); w_class(TYPE_USRMARSHAL, obj, arg, FALSE); @@ -848,11 +906,11 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) if (hasiv) { w_ivar(hasiv, ivobj, encname, &c_arg); } - st_add_direct(arg->data, obj, arg->data->num_entries); + w_remember(obj, arg); return; } - st_add_direct(arg->data, obj, arg->data->num_entries); + w_remember(obj, arg); hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj); { @@ -1044,6 +1102,7 @@ clear_dump_arg(struct dump_arg *arg) arg->symbols = 0; st_free_table(arg->data); arg->data = 0; + arg->num_entries = 0; if (arg->compat_tbl) { st_free_table(arg->compat_tbl); arg->compat_tbl = 0; @@ -1126,6 +1185,7 @@ rb_marshal_dump_limited(VALUE obj, VALUE port, int limit) arg->dest = 0; arg->symbols = st_init_numtable(); arg->data = rb_init_identtable(); + arg->num_entries = 0; arg->compat_tbl = 0; arg->encodings = 0; arg->str = rb_str_buf_new(0); @@ -1881,10 +1941,28 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ sign = r_byte(arg); len = r_long(arg); - data = r_bytes0(len * 2, arg); - v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0, - INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); - rb_str_resize(data, 0L); + + if (SIZEOF_VALUE >= 8 && len <= 4) { + // Representable within uintptr, likely FIXNUM + VALUE num = 0; + for (int i = 0; i < len; i++) { + num |= (VALUE)r_byte(arg) << (i * 16); + num |= (VALUE)r_byte(arg) << (i * 16 + 8); + } +#if SIZEOF_VALUE == SIZEOF_LONG + v = ULONG2NUM(num); +#else + v = ULL2NUM(num); +#endif + if (sign == '-') { + v = rb_int_uminus(v); + } + } else { + data = r_bytes0(len * 2, arg); + v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0, + INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); + rb_str_resize(data, 0L); + } v = r_entry(v, arg); v = r_leave(v, arg, false); } diff --git a/test/ruby/test_marshal.rb b/test/ruby/test_marshal.rb index 361d18dd4b4363..fc5cd9e93e7fb4 100644 --- a/test/ruby/test_marshal.rb +++ b/test/ruby/test_marshal.rb @@ -33,7 +33,7 @@ def fact(n) end def test_marshal - a = [1, 2, 3, [4,5,"foo"], {1=>"bar"}, 2.5, fact(30)] + a = [1, 2, 3, 2**32, 2**64, [4,5,"foo"], {1=>"bar"}, 2.5, fact(30)] assert_equal a, Marshal.load(Marshal.dump(a)) [[1,2,3,4], [81, 2, 118, 3146]].each { |w,x,y,z| @@ -47,6 +47,26 @@ def test_marshal } end + def test_marshal_integers + a = [] + [-2, -1, 0, 1, 2].each do |i| + 0.upto(65).map do |exp| + a << 2**exp + i + end + end + assert_equal a, Marshal.load(Marshal.dump(a)) + + a = [2**32, []]*2 + assert_equal a, Marshal.load(Marshal.dump(a)) + + a = [2**32, 2**32, []]*2 + assert_equal a, Marshal.load(Marshal.dump(a)) + end + + def test_marshal_small_bignum_backref + assert_equal [2**32, 2**32], Marshal.load("\x04\b[\al+\b\x00\x00\x00\x00\x01\x00@\x06") + end + StrClone = String.clone def test_marshal_cloned_class assert_instance_of(StrClone, Marshal.load(Marshal.dump(StrClone.new("abc")))) From 5389c9813b1970a1a5cb2bc8f67b098d38a99d1d Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 16 Aug 2022 13:49:11 +0900 Subject: [PATCH 126/546] Update the excluding message for Psych [ci skip] --- test/excludes/Psych/TestDateTime.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/excludes/Psych/TestDateTime.rb b/test/excludes/Psych/TestDateTime.rb index 1188b4d4a61a69..63d99be809279a 100644 --- a/test/excludes/Psych/TestDateTime.rb +++ b/test/excludes/Psych/TestDateTime.rb @@ -1 +1,4 @@ -exclude(:test_new_datetime, "Psych loses the start date") +exclude(:test_new_datetime, < Date: Thu, 9 Jun 2022 12:07:58 +0200 Subject: [PATCH 127/546] [ruby/cgi] Implement `CGI.url_encode` and `CGI.url_decode` [Feature #18822] Ruby is somewhat missing an RFC 3986 compliant escape method. https://github.com/ruby/cgi/commit/c2729c7f33 --- ext/cgi/escape/escape.c | 61 ++++++++++++++++++++++++++++++++++----- lib/cgi/util.rb | 49 ++++++++++++++++++++++++++----- test/cgi/test_cgi_util.rb | 49 ++++++++++++++++++++++++++++++- 3 files changed, 142 insertions(+), 17 deletions(-) diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index 068647747dd11b..c5b76de596efb5 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -200,7 +200,7 @@ url_unreserved_char(unsigned char c) } static VALUE -optimized_escape(VALUE str) +optimized_escape(VALUE str, int plus_escape) { long i, len, beg = 0; VALUE dest = 0; @@ -220,7 +220,7 @@ optimized_escape(VALUE str) rb_str_cat(dest, cstr + beg, i - beg); beg = i + 1; - if (c == ' ') { + if (plus_escape && c == ' ') { rb_str_cat_cstr(dest, "+"); } else { @@ -242,7 +242,7 @@ optimized_escape(VALUE str) } static VALUE -optimized_unescape(VALUE str, VALUE encoding) +optimized_unescape(VALUE str, VALUE encoding, int unescape_plus) { long i, len, beg = 0; VALUE dest = 0; @@ -265,7 +265,7 @@ optimized_unescape(VALUE str, VALUE encoding) | char_to_number(cstr[i+2])); clen = 2; } - else if (c == '+') { + else if (unescape_plus && c == '+') { buf[0] = ' '; } else { @@ -348,7 +348,7 @@ cgiesc_unescape_html(VALUE self, VALUE str) * call-seq: * CGI.escape(string) -> string * - * Returns URL-escaped string. + * Returns URL-escaped string (+application/x-www-form-urlencoded+). * */ static VALUE @@ -357,7 +357,7 @@ cgiesc_escape(VALUE self, VALUE str) StringValue(str); if (rb_enc_str_asciicompat_p(str)) { - return optimized_escape(str); + return optimized_escape(str, 1); } else { return rb_call_super(1, &str); @@ -376,7 +376,7 @@ accept_charset(int argc, VALUE *argv, VALUE self) * call-seq: * CGI.unescape(string, encoding=@@accept_charset) -> string * - * Returns URL-unescaped string. + * Returns URL-unescaped string (+application/x-www-form-urlencoded+). * */ static VALUE @@ -388,7 +388,50 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self) if (rb_enc_str_asciicompat_p(str)) { VALUE enc = accept_charset(argc-1, argv+1, self); - return optimized_unescape(str, enc); + return optimized_unescape(str, enc, 1); + } + else { + return rb_call_super(argc, argv); + } +} + +/* + * call-seq: + * CGI.escapeURIComponent(string) -> string + * + * Returns URL-escaped string following RFC 3986. + * + */ +static VALUE +cgiesc_escape_uri_component(VALUE self, VALUE str) +{ + StringValue(str); + + if (rb_enc_str_asciicompat_p(str)) { + return optimized_escape(str, 0); + } + else { + return rb_call_super(1, &str); + } +} + +/* + * call-seq: + * CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string + * + * Returns URL-unescaped string following RFC 3986. + * + */ +static VALUE +cgiesc_unescape_uri_component(int argc, VALUE *argv, VALUE self) +{ + VALUE str = (rb_check_arity(argc, 1, 2), argv[0]); + + StringValue(str); + + if (rb_enc_str_asciicompat_p(str)) { + VALUE enc = accept_charset(argc-1, argv+1, self); + return optimized_unescape(str, enc, 0); } else { return rb_call_super(argc, argv); @@ -414,6 +457,8 @@ InitVM_escape(void) rb_mUtil = rb_define_module_under(rb_cCGI, "Util"); rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1); rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1); + rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1); + rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1); rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1); rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1); rb_prepend_module(rb_mUtil, rb_mEscape); diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb index 55e61bf984fa67..5a5c77ac9764dd 100644 --- a/lib/cgi/util.rb +++ b/lib/cgi/util.rb @@ -5,24 +5,57 @@ module Util; end extend Util end module CGI::Util - @@accept_charset="UTF-8" unless defined?(@@accept_charset) - # URL-encode a string. + @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset) + + # URL-encode a string into application/x-www-form-urlencoded. + # Space characters (+" "+) are encoded with plus signs (+"+"+) # url_encoded_string = CGI.escape("'Stop!' said Fred") # # => "%27Stop%21%27+said+Fred" def escape(string) encoding = string.encoding - string.b.gsub(/([^ a-zA-Z0-9_.\-~]+)/) do |m| + buffer = string.b + buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m| '%' + m.unpack('H2' * m.bytesize).join('%').upcase - end.tr(' ', '+').force_encoding(encoding) + end + buffer.tr!(' ', '+') + buffer.force_encoding(encoding) end - # URL-decode a string with encoding(optional). + # URL-decode an application/x-www-form-urlencoded string with encoding(optional). # string = CGI.unescape("%27Stop%21%27+said+Fred") # # => "'Stop!' said Fred" - def unescape(string,encoding=@@accept_charset) - str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m| + def unescape(string, encoding = @@accept_charset) + str = string.tr('+', ' ') + str = str.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| + [m.delete('%')].pack('H*') + end + str.force_encoding(encoding) + str.valid_encoding? ? str : str.force_encoding(string.encoding) + end + + # URL-encode a string following RFC 3986 + # Space characters (+" "+) are encoded with (+"%20"+) + # url_encoded_string = CGI.escape("'Stop!' said Fred") + # # => "%27Stop%21%27%20said%20Fred" + def escapeURIComponent(string) + encoding = string.encoding + buffer = string.b + buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m| + '%' + m.unpack('H2' * m.bytesize).join('%').upcase + end + buffer.force_encoding(encoding) + end + + # URL-decode a string following RFC 3986 with encoding(optional). + # string = CGI.unescape("%27Stop%21%27+said%20Fred") + # # => "'Stop!'+said Fred" + def unescapeURIComponent(string, encoding = @@accept_charset) + str = string.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| [m.delete('%')].pack('H*') - end.force_encoding(encoding) + end + str.force_encoding(encoding) str.valid_encoding? ? str : str.force_encoding(string.encoding) end diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb index 5baf87db75dea3..a3be193a134cba 100644 --- a/test/cgi/test_cgi_util.rb +++ b/test/cgi/test_cgi_util.rb @@ -23,7 +23,6 @@ def teardown ENV.update(@environ) end - def test_cgi_escape assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape(@str1)) assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escape(@str1).ascii_only?) if defined?(::Encoding) @@ -70,6 +69,54 @@ def test_cgi_unescape_accept_charset end; end + def test_cgi_escapeURIComponent + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escapeURIComponent(@str1)) + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escapeURIComponent(@str1).ascii_only?) if defined?(::Encoding) + end + + def test_cgi_escapeURIComponent_with_unreserved_characters + assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~", + CGI.escapeURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"), + "should not encode any unreserved characters, as per RFC3986 Section 2.3") + end + + def test_cgi_escapeURIComponent_with_invalid_byte_sequence + assert_equal('%C0%3C%3C', CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8"))) + end + + def test_cgi_escapeURIComponent_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescapeURIComponent + str = CGI.unescapeURIComponent('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93') + assert_equal(@str1, str) + return unless defined?(::Encoding) + + assert_equal("foo+bar", CGI.unescapeURIComponent("foo+bar")) + + assert_equal(@str1.encoding, str.encoding) + assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescapeURIComponent("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) + end + + def test_cgi_unescapeURIComponent_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescapeURIComponent_accept_charset + return unless defined?(::Encoding) + + assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)} + assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}") + begin; + assert_equal("", CGI.unescapeURIComponent('')) + end; + end + def test_cgi_pretty assert_equal("\n \n \n\n",CGI.pretty("")) assert_equal("\n\t\n\t\n\n",CGI.pretty("","\t")) From b7577b4d9e0fd92522fc30e10fe712e245adee8c Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 17 Aug 2022 00:45:27 +0900 Subject: [PATCH 128/546] The tzdata 2022c removed Amsterdam Mean Time --- spec/ruby/core/time/shared/local.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/ruby/core/time/shared/local.rb b/spec/ruby/core/time/shared/local.rb index 43f331c4c1a073..997b7186f1193c 100644 --- a/spec/ruby/core/time/shared/local.rb +++ b/spec/ruby/core/time/shared/local.rb @@ -6,6 +6,7 @@ end end +=begin platform_is_not :windows do describe "timezone changes" do it "correctly adjusts the timezone change to 'CEST' on 'Europe/Amsterdam'" do @@ -16,6 +17,7 @@ end end end +=end end describe :time_local_10_arg, shared: true do From 5528648a913cee8a0b3c8b0bc59764e3c361c404 Mon Sep 17 00:00:00 2001 From: git Date: Wed, 17 Aug 2022 00:45:50 +0900 Subject: [PATCH 129/546] * 2022-08-17 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 729809a0e63f05..1b4afbfdebd92d 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 16 +#define RUBY_RELEASE_DAY 17 #include "ruby/version.h" #include "ruby/internal/abi.h" From cc443f6cde287944e00ab5d9b6ad868b3d9fc9db Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 17 Aug 2022 00:48:11 +0900 Subject: [PATCH 130/546] Refactor `RbInstall::Specs::FileCollector` - Split into `Ext` and `Lib` classes. - `Ext#files` should not include built extension libraries. - `Ext#files` should include scripts under its own `lib`. - `Lib#files` should be prefixed with `lib/`. --- tool/rbinstall.rb | 107 ++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 66 deletions(-) diff --git a/tool/rbinstall.rb b/tool/rbinstall.rb index e6d0f592f5e330..68c96bee854f8e 100755 --- a/tool/rbinstall.rb +++ b/tool/rbinstall.rb @@ -729,86 +729,60 @@ def self.no_write(options = nil) module Specs class FileCollector - def initialize(gemspec) + def self.for(srcdir, type, gemspec) + relative_base = (File.dirname(gemspec) if gemspec.include?("/")) + const_get(type.capitalize).new(gemspec, srcdir, relative_base) + end + + attr_reader :gemspec, :srcdir, :relative_base + def initialize(gemspec, srcdir, relative_base) @gemspec = gemspec - @base_dir = File.dirname(gemspec) + @srcdir = srcdir + @relative_base = relative_base end def collect - (ruby_libraries + built_libraries).sort + ruby_libraries.sort end - def skip_install?(files) - case type - when "ext" + class Ext < self + def skip_install?(files) # install ext only when it's configured !File.exist?("#{$ext_build_dir}/#{relative_base}/Makefile") - when "lib" - files.empty? end - end - - private - def type - /\/(ext|lib)?\/.*?\z/ =~ @base_dir - $1 - end - def ruby_libraries - case type - when "ext" - prefix = "#{$extout}/common/" - base = "#{prefix}#{relative_base}" - when "lib" - base = @base_dir - prefix = base.sub(/lib\/.*?\z/, "") - # for lib/net/net-smtp.gemspec - if m = File.basename(@gemspec, ".gemspec").match(/.*\-(.*)\z/) - base = "#{@base_dir}/#{m[1]}" unless remove_prefix(prefix, @base_dir).include?(m[1]) - end + def ruby_libraries + Dir.glob("lib/**/*.rb", base: "#{srcdir}/ext/#{relative_base}") end + end - files = if base - Dir.glob("#{base}{.rb,/**/*.rb}").collect do |ruby_source| - remove_prefix(prefix, ruby_source) - end - else - [@gemspec[%r[(?:[^/]+/)?[^/]+(?=\.gemspec\z)]] + '.rb'] - end - - case File.basename(@gemspec, ".gemspec") - when "net-http" - files << "lib/net/https.rb" - when "optparse" - files << "lib/optionparser.rb" + class Lib < self + def skip_install?(files) + files.empty? end - files - end - - def built_libraries - case type - when "ext" - prefix = "#{$extout}/#{CONFIG['arch']}/" - base = "#{prefix}#{relative_base}" - dlext = CONFIG['DLEXT'] - Dir.glob("#{base}{.#{dlext},/**/*.#{dlext}}").collect do |built_library| - remove_prefix(prefix, built_library) + def ruby_libraries + gemname = File.basename(gemspec, ".gemspec") + base = relative_base || gemname + # for lib/net/net-smtp.gemspec + if m = /.*(?=-(.*)\z)/.match(gemname) + base = File.join(base, *m.to_a.select {|n| !base.include?(n)}) + end + files = Dir.glob("lib/#{base}{.rb,/**/*.rb}", base: srcdir) + if !relative_base and files.empty? # no files at the toplevel + # pseudo gem like ruby2_keywords + files << "lib/#{gemname}.rb" end - when "lib" - [] - else - [] - end - end - def relative_base - /\/#{Regexp.escape(type)}\/(.*?)\z/ =~ @base_dir - $1 - end + case gemname + when "net-http" + files << "lib/net/https.rb" + when "optparse" + files << "lib/optionparser.rb" + end - def remove_prefix(prefix, string) - string.sub(/\A#{Regexp.escape(prefix)}/, "") + files + end end end end @@ -984,9 +958,10 @@ def install_default_gem(dir, srcdir, bindir) } default_spec_dir = Gem.default_specifications_dir - gems = Dir.glob("#{srcdir}/#{dir}/**/*.gemspec").map {|src| - spec = load_gemspec(src) - file_collector = RbInstall::Specs::FileCollector.new(src) + base = "#{srcdir}/#{dir}" + gems = Dir.glob("**/*.gemspec", base: base).map {|src| + spec = load_gemspec("#{base}/#{src}") + file_collector = RbInstall::Specs::FileCollector.for(srcdir, dir, src) files = file_collector.collect if file_collector.skip_install?(files) next From ac890ec0624e3d8a44d85d67127bc94322caa34e Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 17 Aug 2022 00:52:37 +0900 Subject: [PATCH 131/546] Make date in installed gemspec files stable Set `date` member to `RUBY_RELEASE_DATE` instead of the date at the build time, to make installed files reproducible. --- tool/rbinstall.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/tool/rbinstall.rb b/tool/rbinstall.rb index 68c96bee854f8e..c944ef74da898f 100755 --- a/tool/rbinstall.rb +++ b/tool/rbinstall.rb @@ -933,6 +933,7 @@ def load_gemspec(file, base = nil) end spec.loaded_from = base ? File.join(base, File.basename(file)) : file spec.files.reject! {|n| n.end_with?(".gemspec") or n.start_with?(".git")} + spec.date = RUBY_RELEASE_DATE spec end From 81fbc8d5531be5e61f639c2ee4509017b5e010a0 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 13 Aug 2022 23:50:00 +0900 Subject: [PATCH 132/546] Move `mjit_exec` to vm.c --- mjit.h | 95 -------------------------------------------------------- vm.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 95 deletions(-) diff --git a/mjit.h b/mjit.h index fad18208fbb900..48790f66174422 100644 --- a/mjit.h +++ b/mjit.h @@ -103,101 +103,6 @@ extern void mjit_cont_free(struct mjit_cont *cont); extern void mjit_mark_cc_entries(const struct rb_iseq_constant_body *const body); extern void mjit_notify_waitpid(int status); -# ifdef MJIT_HEADER -NOINLINE(static COLDFUNC VALUE mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body)); -# else -static inline VALUE mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body); -# endif -static VALUE -mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body) -{ - uintptr_t func_i = (uintptr_t)(body->jit_func); - ASSUME(func_i <= LAST_JIT_ISEQ_FUNC); - switch ((enum rb_mjit_iseq_func)func_i) { - case NOT_ADDED_JIT_ISEQ_FUNC: - RB_DEBUG_COUNTER_INC(mjit_exec_not_added); - if (body->total_calls == mjit_opts.min_calls) { - rb_mjit_add_iseq_to_process(iseq); - if (UNLIKELY(mjit_opts.wait)) { - return rb_mjit_wait_call(ec, body); - } - } - break; - case NOT_READY_JIT_ISEQ_FUNC: - RB_DEBUG_COUNTER_INC(mjit_exec_not_ready); - break; - case NOT_COMPILED_JIT_ISEQ_FUNC: - RB_DEBUG_COUNTER_INC(mjit_exec_not_compiled); - break; - default: // to avoid warning with LAST_JIT_ISEQ_FUNC - break; - } - return Qundef; -} - -// Try to execute the current iseq in ec. Use JIT code if it is ready. -// If it is not, add ISEQ to the compilation queue and return Qundef for MJIT. -// YJIT compiles on the thread running the iseq. -static inline VALUE -mjit_exec(rb_execution_context_t *ec) -{ - const rb_iseq_t *iseq = ec->cfp->iseq; - struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); - bool yjit_enabled = false; -#ifndef MJIT_HEADER - // Don't want to compile with YJIT or use code generated by YJIT - // when running inside code generated by MJIT. - yjit_enabled = rb_yjit_enabled_p(); -#endif - - if (mjit_call_p || yjit_enabled) { - body->total_calls++; - } - -#ifndef MJIT_HEADER - if (yjit_enabled && !mjit_call_p && body->total_calls == rb_yjit_call_threshold()) { - // If we couldn't generate any code for this iseq, then return - // Qundef so the interpreter will handle the call. - if (!rb_yjit_compile_iseq(iseq, ec)) { - return Qundef; - } - } -#endif - - if (!(mjit_call_p || yjit_enabled)) - return Qundef; - - RB_DEBUG_COUNTER_INC(mjit_exec); - - mjit_func_t func = body->jit_func; - - // YJIT tried compiling this function once before and couldn't do - // it, so return Qundef so the interpreter handles it. - if (yjit_enabled && func == 0) { - return Qundef; - } - - if (UNLIKELY((uintptr_t)func <= LAST_JIT_ISEQ_FUNC)) { -# ifdef MJIT_HEADER - RB_DEBUG_COUNTER_INC(mjit_frame_JT2VM); -# else - RB_DEBUG_COUNTER_INC(mjit_frame_VM2VM); -# endif - return mjit_exec_slowpath(ec, iseq, body); - } - -# ifdef MJIT_HEADER - RB_DEBUG_COUNTER_INC(mjit_frame_JT2JT); -# else - RB_DEBUG_COUNTER_INC(mjit_frame_VM2JT); -# endif - RB_DEBUG_COUNTER_INC(mjit_exec_call_func); - // Under SystemV x64 calling convention - // ec -> RDI - // cfp -> RSI - return func(ec, ec->cfp); -} - void mjit_child_after_fork(void); # ifdef MJIT_HEADER diff --git a/vm.c b/vm.c index 4b1a30e7cae45a..394fa333885cd3 100644 --- a/vm.c +++ b/vm.c @@ -377,6 +377,103 @@ extern VALUE rb_vm_invoke_bmethod(rb_execution_context_t *ec, rb_proc_t *proc, V const rb_callable_method_entry_t *me); static VALUE vm_invoke_proc(rb_execution_context_t *ec, rb_proc_t *proc, VALUE self, int argc, const VALUE *argv, int kw_splat, VALUE block_handler); +#if USE_MJIT +# ifdef MJIT_HEADER +NOINLINE(static COLDFUNC VALUE mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body)); +# else +static inline VALUE mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body); +# endif +static VALUE +mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body) +{ + uintptr_t func_i = (uintptr_t)(body->jit_func); + ASSUME(func_i <= LAST_JIT_ISEQ_FUNC); + switch ((enum rb_mjit_iseq_func)func_i) { + case NOT_ADDED_JIT_ISEQ_FUNC: + RB_DEBUG_COUNTER_INC(mjit_exec_not_added); + if (body->total_calls == mjit_opts.min_calls) { + rb_mjit_add_iseq_to_process(iseq); + if (UNLIKELY(mjit_opts.wait)) { + return rb_mjit_wait_call(ec, body); + } + } + break; + case NOT_READY_JIT_ISEQ_FUNC: + RB_DEBUG_COUNTER_INC(mjit_exec_not_ready); + break; + case NOT_COMPILED_JIT_ISEQ_FUNC: + RB_DEBUG_COUNTER_INC(mjit_exec_not_compiled); + break; + default: // to avoid warning with LAST_JIT_ISEQ_FUNC + break; + } + return Qundef; +} + +// Try to execute the current iseq in ec. Use JIT code if it is ready. +// If it is not, add ISEQ to the compilation queue and return Qundef for MJIT. +// YJIT compiles on the thread running the iseq. +static inline VALUE +mjit_exec(rb_execution_context_t *ec) +{ + const rb_iseq_t *iseq = ec->cfp->iseq; + struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); + bool yjit_enabled = false; +# ifndef MJIT_HEADER + // Don't want to compile with YJIT or use code generated by YJIT + // when running inside code generated by MJIT. + yjit_enabled = rb_yjit_enabled_p(); +# endif + + if (mjit_call_p || yjit_enabled) { + body->total_calls++; + } + +# ifndef MJIT_HEADER + if (yjit_enabled && !mjit_call_p && body->total_calls == rb_yjit_call_threshold()) { + // If we couldn't generate any code for this iseq, then return + // Qundef so the interpreter will handle the call. + if (!rb_yjit_compile_iseq(iseq, ec)) { + return Qundef; + } + } +# endif + + if (!(mjit_call_p || yjit_enabled)) + return Qundef; + + RB_DEBUG_COUNTER_INC(mjit_exec); + + mjit_func_t func = body->jit_func; + + // YJIT tried compiling this function once before and couldn't do + // it, so return Qundef so the interpreter handles it. + if (yjit_enabled && func == 0) { + return Qundef; + } + + if (UNLIKELY((uintptr_t)func <= LAST_JIT_ISEQ_FUNC)) { +# ifdef MJIT_HEADER + RB_DEBUG_COUNTER_INC(mjit_frame_JT2VM); +# else + RB_DEBUG_COUNTER_INC(mjit_frame_VM2VM); +# endif + return mjit_exec_slowpath(ec, iseq, body); + } + +# ifdef MJIT_HEADER + RB_DEBUG_COUNTER_INC(mjit_frame_JT2JT); +# else + RB_DEBUG_COUNTER_INC(mjit_frame_VM2JT); +# endif + RB_DEBUG_COUNTER_INC(mjit_exec_call_func); + // Under SystemV x64 calling convention + // ec -> RDI + // cfp -> RSI + return func(ec, ec->cfp); +} +#endif + #include "vm_insnhelper.c" #ifndef MJIT_HEADER From b880576e595adab0e5ed0a25369ec7035c2cbfc1 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 14 Aug 2022 00:13:24 +0900 Subject: [PATCH 133/546] yjit.h is not necessary for all sources using mjit.h --- mjit.h | 1 - mjit_compile.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/mjit.h b/mjit.h index 48790f66174422..344b20b9013501 100644 --- a/mjit.h +++ b/mjit.h @@ -17,7 +17,6 @@ #include "debug_counter.h" #include "ruby.h" #include "vm_core.h" -#include "yjit.h" // Special address values of a function generated from the // corresponding iseq by MJIT: diff --git a/mjit_compile.c b/mjit_compile.c index 2c7996c2589398..390e3d2850ad97 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -21,6 +21,7 @@ #include "internal/variable.h" #include "mjit.h" #include "mjit_unit.h" +#include "yjit.h" #include "vm_core.h" #include "vm_callinfo.h" #include "vm_exec.h" From 72adee6e36249b39460913ce54ca9296bea80225 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 14 Aug 2022 15:33:30 +0900 Subject: [PATCH 134/546] Update dependencies --- common.mk | 5 ----- 1 file changed, 5 deletions(-) diff --git a/common.mk b/common.mk index cf08764bc9e3ff..e34139d4854b72 100644 --- a/common.mk +++ b/common.mk @@ -3499,7 +3499,6 @@ cont.$(OBJEXT): {$(VPATH)}thread_native.h cont.$(OBJEXT): {$(VPATH)}vm_core.h cont.$(OBJEXT): {$(VPATH)}vm_debug.h cont.$(OBJEXT): {$(VPATH)}vm_opts.h -cont.$(OBJEXT): {$(VPATH)}yjit.h debug.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h debug.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h debug.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -6381,7 +6380,6 @@ eval.$(OBJEXT): {$(VPATH)}vm.h eval.$(OBJEXT): {$(VPATH)}vm_core.h eval.$(OBJEXT): {$(VPATH)}vm_debug.h eval.$(OBJEXT): {$(VPATH)}vm_opts.h -eval.$(OBJEXT): {$(VPATH)}yjit.h explicit_bzero.$(OBJEXT): {$(VPATH)}config.h explicit_bzero.$(OBJEXT): {$(VPATH)}explicit_bzero.c explicit_bzero.$(OBJEXT): {$(VPATH)}internal/attr/format.h @@ -6834,7 +6832,6 @@ gc.$(OBJEXT): {$(VPATH)}vm_core.h gc.$(OBJEXT): {$(VPATH)}vm_debug.h gc.$(OBJEXT): {$(VPATH)}vm_opts.h gc.$(OBJEXT): {$(VPATH)}vm_sync.h -gc.$(OBJEXT): {$(VPATH)}yjit.h goruby.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h goruby.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h goruby.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -11297,7 +11294,6 @@ process.$(OBJEXT): {$(VPATH)}thread_native.h process.$(OBJEXT): {$(VPATH)}util.h process.$(OBJEXT): {$(VPATH)}vm_core.h process.$(OBJEXT): {$(VPATH)}vm_opts.h -process.$(OBJEXT): {$(VPATH)}yjit.h ractor.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h ractor.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h ractor.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -15427,7 +15423,6 @@ thread.$(OBJEXT): {$(VPATH)}vm_core.h thread.$(OBJEXT): {$(VPATH)}vm_debug.h thread.$(OBJEXT): {$(VPATH)}vm_opts.h thread.$(OBJEXT): {$(VPATH)}vm_sync.h -thread.$(OBJEXT): {$(VPATH)}yjit.h time.$(OBJEXT): $(hdrdir)/ruby/ruby.h time.$(OBJEXT): $(top_srcdir)/internal/array.h time.$(OBJEXT): $(top_srcdir)/internal/bignum.h From ddb81b9307f7114fcb37f6fcc37b7692b1a40cb4 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 17 Aug 2022 17:39:40 +0900 Subject: [PATCH 135/546] Extract common code from `rb_enc_vsprintf` and `rb_str_vcatf` --- sprintf.c | 58 ++++++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/sprintf.c b/sprintf.c index 09b9bf15d2d60a..2b2b34b5b4b190 100644 --- a/sprintf.c +++ b/sprintf.c @@ -1145,35 +1145,44 @@ ruby__sfvextra(rb_printf_buffer *fp, size_t valsize, void *valp, long *sz, int s return cp; } -VALUE -rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap) +static void +ruby_vsprintf0(VALUE result, char *p, const char *fmt, va_list ap) { rb_printf_buffer_extra buffer; #define f buffer.base - VALUE result; + VALUE klass = RBASIC(result)->klass; f._flags = __SWR | __SSTR; f._bf._size = 0; - f._w = 120; - result = rb_str_buf_new(f._w); - if (enc) { - if (rb_enc_mbminlen(enc) > 1) { - /* the implementation deeply depends on plain char */ - rb_raise(rb_eArgError, "cannot construct wchar_t based encoding string: %s", - rb_enc_name(enc)); - } - rb_enc_associate(result, enc); - } + f._w = rb_str_capacity(result); f._bf._base = (unsigned char *)result; - f._p = (unsigned char *)RSTRING_PTR(result); + f._p = (unsigned char *)p; RBASIC_CLEAR_CLASS(result); f.vwrite = ruby__sfvwrite; f.vextra = ruby__sfvextra; buffer.value = 0; BSD_vfprintf(&f, fmt, ap); - RBASIC_SET_CLASS_RAW(result, rb_cString); + RBASIC_SET_CLASS_RAW(result, klass); rb_str_resize(result, (char *)f._p - RSTRING_PTR(result)); #undef f +} + +VALUE +rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap) +{ + const int initial_len = 120; + VALUE result; + + result = rb_str_buf_new(initial_len); + if (enc) { + if (rb_enc_mbminlen(enc) > 1) { + /* the implementation deeply depends on plain char */ + rb_raise(rb_eArgError, "cannot construct wchar_t based encoding string: %s", + rb_enc_name(enc)); + } + rb_enc_associate(result, enc); + } + ruby_vsprintf0(result, RSTRING_PTR(result), fmt, ap); return result; } @@ -1213,26 +1222,9 @@ rb_sprintf(const char *format, ...) VALUE rb_str_vcatf(VALUE str, const char *fmt, va_list ap) { - rb_printf_buffer_extra buffer; -#define f buffer.base - VALUE klass; - StringValue(str); rb_str_modify(str); - f._flags = __SWR | __SSTR; - f._bf._size = 0; - f._w = rb_str_capacity(str); - f._bf._base = (unsigned char *)str; - f._p = (unsigned char *)RSTRING_END(str); - klass = RBASIC(str)->klass; - RBASIC_CLEAR_CLASS(str); - f.vwrite = ruby__sfvwrite; - f.vextra = ruby__sfvextra; - buffer.value = 0; - BSD_vfprintf(&f, fmt, ap); - RBASIC_SET_CLASS_RAW(str, klass); - rb_str_resize(str, (char *)f._p - RSTRING_PTR(str)); -#undef f + ruby_vsprintf0(str, RSTRING_END(str), fmt, ap); return str; } From b4daf6e28e4e0291e8c4c90d4ec7c0a00f715965 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Wed, 17 Aug 2022 09:38:11 -0400 Subject: [PATCH 136/546] Fix flaky test for GC thrashing GC could be in an intermediate state after creating the objects, so we should finish GC by running a minor GC. --- test/ruby/test_gc.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index a1e782daa93a21..d2f1e21e33e4b1 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -413,6 +413,10 @@ def test_thrashing_for_young_objects 1_000_000.times { Object.new } + # Previous loop may have caused GC to be in an intermediate state, + # running a minor GC here will guarantee that GC will be complete + GC.start(full_mark: false) + after_stats = GC.stat # Should not be thrashing in page creation From b4539dba7ae85de192b6d0114e3edf70a4cf6ae6 Mon Sep 17 00:00:00 2001 From: Jemma Issroff Date: Wed, 17 Aug 2022 13:24:50 -0400 Subject: [PATCH 137/546] Added vm setivar benchmark from yjit-bench --- benchmark/vm_ivar_set_on_instance.yml | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 benchmark/vm_ivar_set_on_instance.yml diff --git a/benchmark/vm_ivar_set_on_instance.yml b/benchmark/vm_ivar_set_on_instance.yml new file mode 100644 index 00000000000000..f0d5c169648af0 --- /dev/null +++ b/benchmark/vm_ivar_set_on_instance.yml @@ -0,0 +1,35 @@ +prelude: | + class TheClass + def initialize + @v0 = 1 + @v1 = 2 + @v3 = 3 + @levar = 1 + end + + def set_value_loop + # 1M + i = 0 + while i < 1000000 + # 10 times to de-emphasize loop overhead + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + i += 1 + end + end + end + + obj = TheClass.new + +benchmark: + vm_ivar_set_on_instance: | + obj.set_value_loop +loop_count: 100 From bfefaf47f019c4f3b7f42a8fff5e580e030ff62c Mon Sep 17 00:00:00 2001 From: git Date: Thu, 18 Aug 2022 02:26:43 +0900 Subject: [PATCH 138/546] * 2022-08-18 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 1b4afbfdebd92d..b94d5439492329 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 17 +#define RUBY_RELEASE_DAY 18 #include "ruby/version.h" #include "ruby/internal/abi.h" From fbaa2f0b3b73271b692e7dc47cf19df6e05a7eeb Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 18 Aug 2022 11:05:34 +0900 Subject: [PATCH 139/546] Suppress detached head warnings [ci skip] Check out the revisions for testing as "detached" from the beginning. --- defs/gmake.mk | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/defs/gmake.mk b/defs/gmake.mk index 9d055d0f1aa2a6..93b73736681453 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -324,7 +324,10 @@ $(srcdir)/gems/src/$(1): | $(srcdir)/gems/src $(srcdir)/.bundle/gems/$(1)-$(2): | $(srcdir)/gems/src/$(1) .bundle/gems $(ECHO) Copying $(1)@$(3) to $$(@F) - $(Q) $(CHDIR) "$(srcdir)/gems/src/$(1)" && $(GIT) fetch origin $(3) && $(GIT) checkout $(3) + $(Q) $(CHDIR) "$(srcdir)/gems/src/$(1)" && \ + $(GIT) fetch origin $(3) && \ + $(GIT) checkout --detach $(3) && \ + : $(Q) $(BASERUBY) -C "$(srcdir)" \ -Itool/lib -rbundled_gem \ -e 'BundledGem.copy("gems/src/$(1)/$(1).gemspec", ".bundle")' From 2a55c61ee77df55e8715809958ea0439f3918cf2 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Thu, 18 Aug 2022 11:35:17 +0900 Subject: [PATCH 140/546] ext/pty/extconf.rb: Try libutil only on OpenBSD icc now seems to provide libutil.so that is not related to pty. This extconf.rb wrongly finds it and adds `-lutil`, but `ruby -rpty` fails because it cannot find libutil.so on the runtime. http://rubyci.s3.amazonaws.com/icc-x64/ruby-master/log/20220815T210005Z.fail.html.gz ``` Exception raised: <#> ``` This change makes extconf.rb check libutil only on OpenBSD. --- ext/pty/extconf.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ext/pty/extconf.rb b/ext/pty/extconf.rb index 038bdf4d2c337f..ba0c4286fd31d1 100644 --- a/ext/pty/extconf.rb +++ b/ext/pty/extconf.rb @@ -7,10 +7,12 @@ have_header("sys/stropts.h") have_func("setresuid") have_header("libutil.h") - have_header("util.h") # OpenBSD openpty have_header("pty.h") have_header("pwd.h") - util = have_library("util", "openpty") + if /openbsd/ =~ RUBY_PLATFORM + have_header("util.h") # OpenBSD openpty + util = have_library("util", "openpty") + end if have_func("posix_openpt") or (util or have_func("openpty")) or have_func("_getpty") or From 725626d8905fe1ac4a2cf1c3e2db6412bf8f381f Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 18 Aug 2022 11:37:54 +0900 Subject: [PATCH 141/546] [Bug #18964] Update the code range of appended portion --- ext/-test-/econv/append.c | 15 +++++++++++++++ ext/-test-/econv/extconf.rb | 3 +++ ext/-test-/econv/init.c | 11 +++++++++++ transcode.c | 34 ++++++++++++++++++++++++++++++---- 4 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 ext/-test-/econv/append.c create mode 100644 ext/-test-/econv/extconf.rb create mode 100644 ext/-test-/econv/init.c diff --git a/ext/-test-/econv/append.c b/ext/-test-/econv/append.c new file mode 100644 index 00000000000000..724cd136c02e40 --- /dev/null +++ b/ext/-test-/econv/append.c @@ -0,0 +1,15 @@ +#include "ruby/ruby.h" +#include "ruby/encoding.h" + +static VALUE +econv_append(VALUE self, VALUE src, VALUE dst) +{ + rb_econv_t *ec = DATA_PTR(self); + return rb_econv_str_append(ec, src, dst, 0); +} + +void +Init_econv_append(VALUE klass) +{ + rb_define_method(klass, "append", econv_append, 2); +} diff --git a/ext/-test-/econv/extconf.rb b/ext/-test-/econv/extconf.rb new file mode 100644 index 00000000000000..d786b15db98c7f --- /dev/null +++ b/ext/-test-/econv/extconf.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: false +require_relative "../auto_ext.rb" +auto_ext(inc: true) diff --git a/ext/-test-/econv/init.c b/ext/-test-/econv/init.c new file mode 100644 index 00000000000000..9772ebe71ce69f --- /dev/null +++ b/ext/-test-/econv/init.c @@ -0,0 +1,11 @@ +#include "ruby.h" + +#define init(n) {void Init_econv_##n(VALUE klass); Init_econv_##n(klass);} + +void +Init_econv(void) +{ + VALUE mBug = rb_define_module("Bug"); + VALUE klass = rb_define_class_under(mBug, "EConv", rb_path2class("Encoding::Converter")); + TEST_INIT_FUNCS(init); +} diff --git a/transcode.c b/transcode.c index 939e9567f9f6c6..5fafad398fb68c 100644 --- a/transcode.c +++ b/transcode.c @@ -1812,6 +1812,12 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name) return data.ascii_compat_name; } +/* + * Append `len` bytes pointed by `ss` to `dst` with converting with `ec`. + * + * If the result of the conversion is not compatible with the encoding of + * `dst`, `dst` may not be valid encoding. + */ VALUE rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) { @@ -1819,11 +1825,19 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) unsigned char *ds, *dp, *de; rb_econv_result_t res; int max_output; + enum ruby_coderange_type coderange; + rb_encoding *dst_enc = ec->destination_encoding; if (NIL_P(dst)) { dst = rb_str_buf_new(len); - if (ec->destination_encoding) - rb_enc_associate(dst, ec->destination_encoding); + if (dst_enc) { + rb_enc_associate(dst, dst_enc); + } + coderange = ENC_CODERANGE_7BIT; // scan from the start + } + else { + dst_enc = rb_enc_get(dst); + coderange = rb_enc_str_coderange(dst); } if (ec->last_tc) @@ -1832,13 +1846,13 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) max_output = 1; do { + int cr; long dlen = RSTRING_LEN(dst); if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) { unsigned long new_capa = (unsigned long)dlen + len + max_output; if (LONG_MAX < new_capa) rb_raise(rb_eArgError, "too long string"); - rb_str_resize(dst, new_capa); - rb_str_set_len(dst, dlen); + rb_str_modify_expand(dst, new_capa - dlen); } sp = (const unsigned char *)ss; se = sp + len; @@ -1846,6 +1860,18 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) de = ds + rb_str_capacity(dst); dp = ds += dlen; res = rb_econv_convert(ec, &sp, se, &dp, de, flags); + switch (coderange) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + cr = (int)coderange; + rb_str_coderange_scan_restartable((char *)ds, (char *)dp, dst_enc, &cr); + coderange = cr; + ENC_CODERANGE_SET(dst, coderange); + break; + case ENC_CODERANGE_UNKNOWN: + case ENC_CODERANGE_BROKEN: + break; + } len -= (const char *)sp - ss; ss = (const char *)sp; rb_str_set_len(dst, dlen + (dp - ds)); From b3718edee28d5155ebc383d17ab58867e20f4aa2 Mon Sep 17 00:00:00 2001 From: git Date: Thu, 18 Aug 2022 07:04:23 +0000 Subject: [PATCH 142/546] Update bundled gems list at 2022-08-18 --- NEWS.md | 2 +- gems/bundled_gems | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6498621712270b..5c4fadc7de295e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -179,7 +179,7 @@ Note: We're only listing outstanding class updates. * stringio 3.0.3 * timeout 0.3.0 * The following bundled gems are updated. - * minitest 5.16.2 + * minitest 5.16.3 * net-imap 0.2.3 * rbs 2.6.0 * typeprof 0.21.3 diff --git a/gems/bundled_gems b/gems/bundled_gems index 5faa75c5e228ca..63536571d9cef1 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -1,5 +1,5 @@ # gem-name version-to-bundle repository-url [optional-commit-hash-to-test-or-defaults-to-v-version] -minitest 5.16.2 https://github.com/seattlerb/minitest +minitest 5.16.3 https://github.com/seattlerb/minitest power_assert 2.0.1 https://github.com/ruby/power_assert rake 13.0.6 https://github.com/ruby/rake test-unit 3.5.3 https://github.com/test-unit/test-unit From fe61cad7490da8a879597f851f4a89856d44838e Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 26 Jul 2022 17:40:00 +0200 Subject: [PATCH 143/546] Implement SizedQueue#push(timeout: sec) [Feature #18944] If both `non_block=true` and `timeout:` are supplied, ArgumentError is raised. --- spec/ruby/shared/sizedqueue/enque.rb | 57 ++++++++++++++++++++++++++++ test/ruby/test_thread_queue.rb | 18 +++++++++ thread_sync.c | 48 +++++++---------------- thread_sync.rb | 23 +++++++++++ 4 files changed, 111 insertions(+), 35 deletions(-) diff --git a/spec/ruby/shared/sizedqueue/enque.rb b/spec/ruby/shared/sizedqueue/enque.rb index 6ef12349f899d3..126470594a43c4 100644 --- a/spec/ruby/shared/sizedqueue/enque.rb +++ b/spec/ruby/shared/sizedqueue/enque.rb @@ -47,4 +47,61 @@ t.join q.pop.should == 1 end + + describe "with a timeout" do + ruby_version_is "3.2" do + it "returns self if the item was pushed in time" do + q = @object.call(1) + q << 1 + + t = Thread.new { + q.send(@method, 2, timeout: 1).should == q + } + Thread.pass until t.status == "sleep" && q.num_waiting == 1 + q.pop + t.join + end + + it "does nothing if the timeout is nil" do + q = @object.call(1) + q << 1 + t = Thread.new { + q.send(@method, 2, timeout: nil).should == q + } + t.join(0.2).should == nil + q.pop + t.join + end + + it "returns nil if no item is available in time" do + q = @object.call(1) + q << 1 + t = Thread.new { + q.send(@method, 2, timeout: 0.1).should == nil + } + t.join + end + + it "raise TypeError if timeout is not a valid numeric" do + q = @object.call(1) + -> { q.send(@method, 2, timeout: "1") }.should raise_error( + TypeError, + "no implicit conversion to float from string", + ) + + -> { q.send(@method, 2, timeout: false) }.should raise_error( + TypeError, + "no implicit conversion to float from false", + ) + end + + it "raise ArgumentError if non_block = true is passed too" do + q = @object.call(1) + -> { q.send(@method, 2, true, timeout: 1) }.should raise_error( + ArgumentError, + "can't set a timeout if non_block is enabled", + ) + end + end + end end diff --git a/test/ruby/test_thread_queue.rb b/test/ruby/test_thread_queue.rb index 1c852474b4277f..bd5728389d774e 100644 --- a/test/ruby/test_thread_queue.rb +++ b/test/ruby/test_thread_queue.rb @@ -168,6 +168,24 @@ def test_sized_queue_pop_non_block end end + def test_sized_queue_push_timeout + q = Thread::SizedQueue.new(1) + + q << 1 + assert_equal 1, q.size + + t1 = Thread.new { q.push(2, timeout: 1) } + assert_equal t1, t1.join(2) + assert_nil t1.value + + t2 = Thread.new { q.push(2, timeout: 0.1) } + assert_equal t2, t2.join(0.2) + assert_nil t2.value + ensure + t1&.kill + t2&.kill + end + def test_sized_queue_push_interrupt q = Thread::SizedQueue.new(1) q.push(1) diff --git a/thread_sync.c b/thread_sync.c index 63db1c43922f52..4ae404ec055bb7 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -1229,39 +1229,15 @@ rb_szqueue_max_set(VALUE self, VALUE vmax) return vmax; } -static int -szqueue_push_should_block(int argc, const VALUE *argv) -{ - int should_block = 1; - rb_check_arity(argc, 1, 2); - if (argc > 1) { - should_block = !RTEST(argv[1]); - } - return should_block; -} - -/* - * Document-method: Thread::SizedQueue#push - * call-seq: - * push(object, non_block=false) - * enq(object, non_block=false) - * <<(object) - * - * Pushes +object+ to the queue. - * - * If there is no space left in the queue, waits until space becomes - * available, unless +non_block+ is true. If +non_block+ is true, the - * thread isn't suspended, and +ThreadError+ is raised. - */ - static VALUE -rb_szqueue_push(int argc, VALUE *argv, VALUE self) +rb_szqueue_push(rb_execution_context_t *ec, VALUE self, VALUE object, VALUE non_block, VALUE timeout) { + rb_hrtime_t end = queue_timeout2hrtime(timeout); + bool timed_out = false; struct rb_szqueue *sq = szqueue_ptr(self); - int should_block = szqueue_push_should_block(argc, argv); while (queue_length(self, &sq->q) >= sq->max) { - if (!should_block) { + if (RTEST(non_block)) { rb_raise(rb_eThreadError, "queue full"); } else if (queue_closed_p(self)) { @@ -1281,11 +1257,14 @@ rb_szqueue_push(int argc, VALUE *argv, VALUE self) struct queue_sleep_arg queue_sleep_arg = { .self = self, - .timeout = Qnil, - .end = 0 + .timeout = timeout, + .end = end }; - rb_ensure(queue_sleep, (VALUE)&queue_sleep_arg, szqueue_sleep_done, (VALUE)&queue_waiter); + if (!NIL_P(timeout) && rb_hrtime_now() >= end) { + timed_out = true; + break; + } } } @@ -1293,7 +1272,9 @@ rb_szqueue_push(int argc, VALUE *argv, VALUE self) raise_closed_queue_error(self); } - return queue_do_push(self, &sq->q, argv[0]); + if (timed_out) return Qnil; + + return queue_do_push(self, &sq->q, object); } static VALUE @@ -1611,13 +1592,10 @@ Init_thread_sync(void) rb_define_method(rb_cSizedQueue, "close", rb_szqueue_close, 0); rb_define_method(rb_cSizedQueue, "max", rb_szqueue_max_get, 0); rb_define_method(rb_cSizedQueue, "max=", rb_szqueue_max_set, 1); - rb_define_method(rb_cSizedQueue, "push", rb_szqueue_push, -1); rb_define_method(rb_cSizedQueue, "empty?", rb_szqueue_empty_p, 0); rb_define_method(rb_cSizedQueue, "clear", rb_szqueue_clear, 0); rb_define_method(rb_cSizedQueue, "length", rb_szqueue_length, 0); rb_define_method(rb_cSizedQueue, "num_waiting", rb_szqueue_num_waiting, 0); - rb_define_alias(rb_cSizedQueue, "enq", "push"); - rb_define_alias(rb_cSizedQueue, "<<", "push"); rb_define_alias(rb_cSizedQueue, "size", "length"); /* CVar */ diff --git a/thread_sync.rb b/thread_sync.rb index d567ca51af1047..7e4c341ad0c65e 100644 --- a/thread_sync.rb +++ b/thread_sync.rb @@ -41,5 +41,28 @@ def pop(non_block = false, timeout: nil) end alias_method :deq, :pop alias_method :shift, :pop + + # call-seq: + # push(object, non_block=false, timeout: nil) + # enq(object, non_block=false, timeout: nil) + # <<(object) + # + # Pushes +object+ to the queue. + # + # If there is no space left in the queue, waits until space becomes + # available, unless +non_block+ is true. If +non_block+ is true, the + # thread isn't suspended, and +ThreadError+ is raised. + # + # If +timeout+ seconds have passed and no space is available +nil+ is + # returned. + # Otherwise it returns +self+. + def push(object, non_block = false, timeout: nil) + if non_block && timeout + raise ArgumentError, "can't set a timeout if non_block is enabled" + end + Primitive.rb_szqueue_push(object, non_block, timeout) + end + alias_method :enq, :push + alias_method :<<, :push end end From b0b9f7201acab05c2a3ad92c3043a1f01df3e17f Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 25 Jul 2022 10:21:38 +0200 Subject: [PATCH 144/546] rb_str_resize: Only clear coderange on truncation If we are expanding the string or only stripping extra capacity then coderange won't change, so clearing it is wasteful. --- file.c | 3 +++ sprintf.c | 5 ++++- string.c | 13 ++++++------- test/ruby/test_sprintf.rb | 10 ++++++++++ 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/file.c b/file.c index 5265d3a3a5d141..92338bb7549b1e 100644 --- a/file.c +++ b/file.c @@ -4090,6 +4090,9 @@ static VALUE str_shrink(VALUE str) { rb_str_resize(str, RSTRING_LEN(str)); + // expand_path on Windows can sometimes mutate the string + // without clearing its coderange + ENC_CODERANGE_CLEAR(str); return str; } diff --git a/sprintf.c b/sprintf.c index 2b2b34b5b4b190..1ee293b6d96808 100644 --- a/sprintf.c +++ b/sprintf.c @@ -937,6 +937,8 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) if (RTEST(ruby_verbose)) rb_warn("%s", mesg); } rb_str_resize(result, blen); + // rb_str_format mutates the string without updating coderange + ENC_CODERANGE_CLEAR(result); return result; } @@ -1163,6 +1165,8 @@ ruby_vsprintf0(VALUE result, char *p, const char *fmt, va_list ap) buffer.value = 0; BSD_vfprintf(&f, fmt, ap); RBASIC_SET_CLASS_RAW(result, klass); + // vfprintf mutates the string without updating coderange + ENC_CODERANGE_CLEAR(result); rb_str_resize(result, (char *)f._p - RSTRING_PTR(result)); #undef f } @@ -1183,7 +1187,6 @@ rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap) rb_enc_associate(result, enc); } ruby_vsprintf0(result, RSTRING_PTR(result), fmt, ap); - return result; } diff --git a/string.c b/string.c index e74783cf923652..6f211b200524bc 100644 --- a/string.c +++ b/string.c @@ -2498,7 +2498,6 @@ rb_str_modify_expand(VALUE str, long expand) else if (expand > 0) { RESIZE_CAPA_TERM(str, len + expand, termlen); } - ENC_CODERANGE_CLEAR(str); } /* As rb_str_modify(), but don't clear coderange */ @@ -3073,16 +3072,16 @@ rb_str_set_len(VALUE str, long len) VALUE rb_str_resize(VALUE str, long len) { - long slen; - int independent; - if (len < 0) { rb_raise(rb_eArgError, "negative string size (or size too big)"); } - independent = str_independent(str); - ENC_CODERANGE_CLEAR(str); - slen = RSTRING_LEN(str); + int independent = str_independent(str); + long slen = RSTRING_LEN(str); + + if (slen > len && ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) { + ENC_CODERANGE_CLEAR(str); + } { long capa; diff --git a/test/ruby/test_sprintf.rb b/test/ruby/test_sprintf.rb index f2e73eb58dce43..b05f4f3e448506 100644 --- a/test/ruby/test_sprintf.rb +++ b/test/ruby/test_sprintf.rb @@ -507,6 +507,16 @@ def test_named_typed_enc end end + def test_coderange + format_str = "wrong constant name %s" + interpolated_str = "\u3042" + assert_predicate format_str, :ascii_only? + refute_predicate interpolated_str, :ascii_only? + + str = format_str % interpolated_str + refute_predicate str, :ascii_only? + end + def test_named_default h = Hash.new('world') assert_equal("hello world", "hello %{location}" % h) From c53667691a52bcaea4314974201c53dfc282cd95 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 18 Aug 2022 23:25:05 +0900 Subject: [PATCH 145/546] [DOC] `offset` argument of Regexp#match --- re.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/re.c b/re.c index a633d1bb7b312d..5091f9a124de62 100644 --- a/re.c +++ b/re.c @@ -3499,13 +3499,18 @@ rb_reg_match2(VALUE re) * * With no block given, returns the MatchData object * that describes the match, if any, or +nil+ if none; - * the search begins at the given byte +offset+ in +self+: + * the search begins at the given character +offset+ in +string+: * * /abra/.match('abracadabra') # => # * /abra/.match('abracadabra', 4) # => # * /abra/.match('abracadabra', 8) # => nil * /abra/.match('abracadabra', 800) # => nil * + * string = "\u{5d0 5d1 5e8 5d0}cadabra" + * /abra/.match(string, 7) #=> # + * /abra/.match(string, 8) #=> nil + * /abra/.match(string.b, 8) #=> # + * * With a block given, calls the block if and only if a match is found; * returns the block's value: * From 7c1ed470976f3488dd655e092ec1015d6b2f8310 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 18 Aug 2022 23:33:23 +0900 Subject: [PATCH 146/546] Add tests for assert_pattern_list --- tool/test/testunit/test_assertion.rb | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tool/test/testunit/test_assertion.rb b/tool/test/testunit/test_assertion.rb index 8c83b447a7ad7d..d12a794a230841 100644 --- a/tool/test/testunit/test_assertion.rb +++ b/tool/test/testunit/test_assertion.rb @@ -26,4 +26,20 @@ def test_assert_raise return_in_assert_raise end end + + def test_assert_pattern_list + assert_pattern_list([/foo?/], "foo") + assert_not_pattern_list([/foo?/], "afoo") + assert_not_pattern_list([/foo?/], "foo?") + assert_pattern_list([:*, /foo?/, :*], "foo") + assert_pattern_list([:*, /foo?/], "afoo") + assert_not_pattern_list([:*, /foo?/], "afoo?") + assert_pattern_list([/foo?/, :*], "foo?") + end + + def assert_not_pattern_list(pattern_list, actual, message=nil) + assert_raise(Test::Unit::AssertionFailedError) do + assert_pattern_list(pattern_list, actual, message) + end + end end From 2652b0da6bc1463fc7968d71a6f8897459370514 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 19 Aug 2022 00:16:44 +0900 Subject: [PATCH 147/546] Harden SizedQueue#push timeout test --- test/ruby/test_thread_queue.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/ruby/test_thread_queue.rb b/test/ruby/test_thread_queue.rb index bd5728389d774e..723450ad23d971 100644 --- a/test/ruby/test_thread_queue.rb +++ b/test/ruby/test_thread_queue.rb @@ -179,11 +179,11 @@ def test_sized_queue_push_timeout assert_nil t1.value t2 = Thread.new { q.push(2, timeout: 0.1) } - assert_equal t2, t2.join(0.2) + assert_equal t2, t2.join(1) assert_nil t2.value ensure - t1&.kill - t2&.kill + t1&.kill&.join + t2&.kill&.join end def test_sized_queue_push_interrupt From 8c44b07fa4b37f3c2a382e11978b743053093159 Mon Sep 17 00:00:00 2001 From: git Date: Fri, 19 Aug 2022 00:17:06 +0900 Subject: [PATCH 148/546] * 2022-08-19 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index b94d5439492329..b551f340ceb21f 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 18 +#define RUBY_RELEASE_DAY 19 #include "ruby/version.h" #include "ruby/internal/abi.h" From d903e7672637d5a834847820a4e18b00ee30f380 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 18 Aug 2022 23:42:53 +0900 Subject: [PATCH 149/546] Allow strings in assert_pattern_list --- tool/lib/core_assertions.rb | 10 ++++++---- tool/test/testunit/test_assertion.rb | 8 ++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tool/lib/core_assertions.rb b/tool/lib/core_assertions.rb index 67373139caaaa9..321ca59f56acdc 100644 --- a/tool/lib/core_assertions.rb +++ b/tool/lib/core_assertions.rb @@ -548,11 +548,13 @@ def assert_pattern_list(pattern_list, actual, message=nil) anchored = false else if anchored - match = /\A#{pattern}/.match(rest) + match = rest.rindex(pattern, 0) else - match = pattern.match(rest) + match = rest.index(pattern) end - unless match + if match + post_match = $~ ? $~.post_match : rest[match+pattern.size..-1] + else msg = message(msg) { expect_msg = "Expected #{mu_pp pattern}\n" if /\n[^\n]/ =~ rest @@ -569,7 +571,7 @@ def assert_pattern_list(pattern_list, actual, message=nil) } assert false, msg end - rest = match.post_match + rest = post_match anchored = true end } diff --git a/tool/test/testunit/test_assertion.rb b/tool/test/testunit/test_assertion.rb index d12a794a230841..709b4955729643 100644 --- a/tool/test/testunit/test_assertion.rb +++ b/tool/test/testunit/test_assertion.rb @@ -35,6 +35,14 @@ def test_assert_pattern_list assert_pattern_list([:*, /foo?/], "afoo") assert_not_pattern_list([:*, /foo?/], "afoo?") assert_pattern_list([/foo?/, :*], "foo?") + + assert_not_pattern_list(["foo?"], "foo") + assert_not_pattern_list(["foo?"], "afoo") + assert_pattern_list(["foo?"], "foo?") + assert_not_pattern_list([:*, "foo?", :*], "foo") + assert_not_pattern_list([:*, "foo?"], "afoo") + assert_pattern_list([:*, "foo?"], "afoo?") + assert_pattern_list(["foo?", :*], "foo?") end def assert_not_pattern_list(pattern_list, actual, message=nil) From f1ccfa0c2c200c9443fbfc3f1ac3acbdd3e35559 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Wed, 13 Jul 2022 13:18:03 +0100 Subject: [PATCH 150/546] [ci-skip][Feature #18910][lldb] Provide class framework for lldb commands `lldb_cruby.py` manages lldb custom commands using functions. The file is a large list of Python functions, and an init handler to map some of the Python functions into the debugger, to enable execution of custom logic during a debugging session. Since LLDB 3.7 (September 2015) there has also been support for using python classes rather than bare functions, as long as those classes implement a specific interface. This PR Introduces some more defined structure to the LLDB helper functions by switching from the function based implementation to the class based one, and providing an auto-loading mechanism by which new functions can be loaded. The intention behind this change is to make working with the LLDB helpers easier, by reducing code duplication, providing a consistent structure and a clearer API for developers. The current function based approach has some advantages and disadvantages Advantages: - Adding new code is easy. - All the code is self contained and searchable. Disadvantages: - No visible organisation of the file contents. This means - Hard to tell which functions are utility functions and which are available to you in a debugging session - Lots of code duplication within lldb functions - Large files quickly become intimidating to work with - for example, `lldb_disasm.py` was implemented as a seperate Python module because it was easier to start with a clean slate than add significant amounts of code to `lldb_cruby.py` This PR attempts, to fix the disadvantages of the current approach and maintain, or enhance, the benefits. The new structure of a command looks like this; ``` class TestCommand(RbBaseCommand): # program is the keyword the user will type in lldb to execute this command program = "test" # help_string will be displayed in lldb when the user uses the help functions help_string = "This is a test command to show how to implement lldb commands" # call is where our command logic will be implemented def call(self, debugger, command, exe_ctx, result): pass ``` If the command fulfils the following criteria it will then be auto-loaded when an lldb session is started: - The package file must exist inside the `commands` directory and the filename must end in `_command.py` - The package must implement a class whose name ends in `Command` - The class inherits from `RbBaseCommand` or at minimum a class that shares the same interface as `RbBaseCommand` (at minimum this means defining `__init__` and `__call__`, and using `__call__` to call `call` which is defined in the subclasses). - The class must have a class variable `package` that is a String. This is the name of the command you'll call in the `lldb` debugger. --- .gitignore | 3 ++ misc/commands/command_template.py | 25 ++++++++++++ misc/constants.py | 4 ++ misc/lldb_cruby.py | 29 ++++++++++--- misc/rb_base_command.py | 68 +++++++++++++++++++++++++++++++ 5 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 misc/commands/command_template.py create mode 100644 misc/constants.py create mode 100644 misc/rb_base_command.py diff --git a/.gitignore b/.gitignore index 521f4ec807e3ae..c12ec27782a91b 100644 --- a/.gitignore +++ b/.gitignore @@ -219,6 +219,9 @@ lcov*.info /lib/ruby/[1-9]*.* /lib/ruby/vendor_ruby +# /misc/ +/misc/**/__pycache__ + # /spec/bundler /.rspec_status diff --git a/misc/commands/command_template.py b/misc/commands/command_template.py new file mode 100644 index 00000000000000..bbc4b09157ac46 --- /dev/null +++ b/misc/commands/command_template.py @@ -0,0 +1,25 @@ +# This is a command template for implementing a helper function inside LLDB. To +# use this file +# 1. Copy it and rename the copy so it ends with `_command.py`. +# 2. Rename the class to something descriptive that ends with Command. +# 3. Change the program variable to be a descriptive command name +# 4. Ensure you are inheriting from RbBaseCommand or another command that +# implements the same interfact + +# This test command inherits from RbBaseCommand which provides access to Ruby +# globals and utility helpers +class TestCommand(RbBaseCommand): + # program is the keyword the user will type in lldb to execute this command + program = "test" + + # help_string will be displayed in lldb when the user uses the help functions + help_string = "This is a test command to show how to implement lldb commands" + + # call is where our command logic will be implemented + def call(self, debugger, command, exe_ctx, result): + # This method will be called once the LLDB environment has been setup. + # You will have access to self.target, self.process, self.frame, and + # self.thread + # + # This is where we should implement our command logic + pass diff --git a/misc/constants.py b/misc/constants.py new file mode 100644 index 00000000000000..ec3050a399888b --- /dev/null +++ b/misc/constants.py @@ -0,0 +1,4 @@ +HEAP_PAGE_ALIGN_LOG = 16 +HEAP_PAGE_ALIGN_MASK = (~(~0 << HEAP_PAGE_ALIGN_LOG)) +HEAP_PAGE_ALIGN = (1 << HEAP_PAGE_ALIGN_LOG) +HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index c38b9c62a03651..de8628754c91d1 100755 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -9,15 +9,16 @@ from __future__ import print_function import lldb import os +import inspect +import sys import shlex import platform +import glob -HEAP_PAGE_ALIGN_LOG = 16 - -HEAP_PAGE_ALIGN_MASK = (~(~0 << HEAP_PAGE_ALIGN_LOG)) -HEAP_PAGE_ALIGN = (1 << HEAP_PAGE_ALIGN_LOG) -HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN +from constants import * +# BEGIN FUNCTION STYLE DECLS +# This will be refactored to use class style decls in the misc/commands dir class BackTrace: VM_FRAME_MAGIC_METHOD = 0x11110001 VM_FRAME_MAGIC_BLOCK = 0x22220001 @@ -740,9 +741,27 @@ def rb_rclass_ext(debugger, command, result, internal_dict): rclass_addr = target.EvaluateExpression(command).Cast(uintptr_t) rclass_ext_addr = (rclass_addr.GetValueAsUnsigned() + rclass_t.GetByteSize()) debugger.HandleCommand("p *(rb_classext_t *)%0#x" % rclass_ext_addr) +# END FUNCTION STYLE DECLS + + +load_dir, _ = os.path.split(os.path.realpath(__file__)) + +for fname in glob.glob(f"{load_dir}/commands/*_command.py"): + _, basename = os.path.split(fname) + mname, _ = os.path.splitext(basename) + exec(f"import commands.{mname}") def __lldb_init_module(debugger, internal_dict): + # Register all classes that subclass RbBaseCommand + + for memname, mem in inspect.getmembers(sys.modules["lldb_rb.rb_base_command"]): + if inspect.isclass(mem): + for sclass in mem.__subclasses__(): + sclass.register_lldb_command(debugger, f"{__name__}.{sclass.__module__}") + + + ## FUNCTION INITS - These should be removed when converted to class commands debugger.HandleCommand("command script add -f lldb_cruby.lldb_rp rp") debugger.HandleCommand("command script add -f lldb_cruby.count_objects rb_count_objects") debugger.HandleCommand("command script add -f lldb_cruby.stack_dump_raw SDR") diff --git a/misc/rb_base_command.py b/misc/rb_base_command.py new file mode 100644 index 00000000000000..44b2996d80d528 --- /dev/null +++ b/misc/rb_base_command.py @@ -0,0 +1,68 @@ +import lldb +from pydoc import locate + +class RbBaseCommand: + @classmethod + def register_lldb_command(cls, debugger, module_name): + # Add any commands contained in this module to LLDB + command = f"command script add -c {module_name}.{cls.__name__} {cls.program}" + debugger.HandleCommand(command) + + def __init__(self, debugger, _internal_dict): + self.internal_dict = _internal_dict + + def __call__(self, debugger, command, exe_ctx, result): + if not ("RUBY_Qfalse" in globals()): + self._lldb_init(debugger) + + self.build_environment(debugger) + self.call(debugger, command, exe_ctx, result) + + def call(self, debugger, command, exe_ctx, result): + raise NotImplementedError("subclasses must implement call") + + def get_short_help(self): + return self.__class__.help_string + + def get_long_help(self): + return self.__class__.help_string + + def build_environment(self, debugger): + self.target = debugger.GetSelectedTarget() + self.process = self.target.GetProcess() + self.thread = self.process.GetSelectedThread() + self.frame = self.thread.GetSelectedFrame() + + def _append_command_output(self, debugger, command, result): + output1 = result.GetOutput() + debugger.GetCommandInterpreter().HandleCommand(command, result) + output2 = result.GetOutput() + result.Clear() + result.write(output1) + result.write(output2) + + def _lldb_init(self, debugger): + target = debugger.GetSelectedTarget() + global SIZEOF_VALUE + SIZEOF_VALUE = target.FindFirstType("VALUE").GetByteSize() + + value_types = [] + g = globals() + + imemo_types = target.FindFirstType("enum imemo_type") + + for member in imemo_types.GetEnumMembers(): + g[member.GetName()] = member.GetValueAsUnsigned() + + for enum in target.FindFirstGlobalVariable("ruby_dummy_gdb_enums"): + enum = enum.GetType() + members = enum.GetEnumMembers() + for i in range(0, members.GetSize()): + member = members.GetTypeEnumMemberAtIndex(i) + name = member.GetName() + value = member.GetValueAsUnsigned() + g[name] = value + + if name.startswith("RUBY_T_"): + value_types.append(name) + g["value_types"] = value_types From 281bcc8e64accac2d3ab465abde4962de725857d Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Wed, 13 Jul 2022 13:18:45 +0100 Subject: [PATCH 151/546] [ci-skip][Feature #18910][lldb] Port heap_page command to new LLDB framework --- misc/commands/heap_page_command.py | 26 ++++++++++++++++++++++++++ misc/lldb_cruby.py | 14 -------------- 2 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 misc/commands/heap_page_command.py diff --git a/misc/commands/heap_page_command.py b/misc/commands/heap_page_command.py new file mode 100644 index 00000000000000..ee502a40b8e3f7 --- /dev/null +++ b/misc/commands/heap_page_command.py @@ -0,0 +1,26 @@ +import lldb + +from constants import * +from rb_base_command import RbBaseCommand + +class HeapPageCommand(RbBaseCommand): + program = "heap_page" + help_string = "prints out 'struct heap_page' for a VALUE pointer in the page" + + def call(self, debugger, command, exe_ctx, result): + self.t_heap_page_body = self.target.FindFirstType("struct heap_page_body") + self.t_heap_page_ptr = self.target.FindFirstType("struct heap_page").GetPointerType() + + page = self._get_page(self.frame.EvaluateExpression(command)) + page.Cast(self.t_heap_page_ptr) + + self._append_command_output(debugger, "p (struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) + self._append_command_output(debugger, "p *(struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) + + def _get_page(self, val): + addr = val.GetValueAsUnsigned() + page_addr = addr & ~(HEAP_PAGE_ALIGN_MASK) + address = lldb.SBAddress(page_addr, self.target) + body = self.target.CreateValueFromAddress("page", address, self.t_heap_page_body) + + return body.GetValueForExpressionPath("->header.page") diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index de8628754c91d1..5106f788819597 100755 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -469,19 +469,6 @@ def check_bits(page, bitmap_name, bitmap_index, bitmap_bit, v): else: return ' ' -def heap_page(debugger, command, ctx, result, internal_dict): - target = debugger.GetSelectedTarget() - process = target.GetProcess() - thread = process.GetSelectedThread() - frame = thread.GetSelectedFrame() - - val = frame.EvaluateExpression(command) - page = get_page(lldb, target, val) - page_type = target.FindFirstType("struct heap_page").GetPointerType() - page.Cast(page_type) - append_command_output(debugger, "p (struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) - append_command_output(debugger, "p *(struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) - def heap_page_body(debugger, command, ctx, result, internal_dict): target = debugger.GetSelectedTarget() process = target.GetProcess() @@ -766,7 +753,6 @@ def __lldb_init_module(debugger, internal_dict): debugger.HandleCommand("command script add -f lldb_cruby.count_objects rb_count_objects") debugger.HandleCommand("command script add -f lldb_cruby.stack_dump_raw SDR") debugger.HandleCommand("command script add -f lldb_cruby.dump_node dump_node") - debugger.HandleCommand("command script add -f lldb_cruby.heap_page heap_page") debugger.HandleCommand("command script add -f lldb_cruby.heap_page_body heap_page_body") debugger.HandleCommand("command script add -f lldb_cruby.rb_backtrace rbbt") debugger.HandleCommand("command script add -f lldb_cruby.dump_page dump_page") From a4ef2f16728b4b1eb49cc3aded26219cabdfa7e7 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Wed, 13 Jul 2022 13:19:41 +0100 Subject: [PATCH 152/546] [ci-skip][Feature #18910][lldb] Port rclass_ext to new LLDB Framework --- misc/commands/rclass_ext_command.py | 14 ++++++++++++++ misc/lldb_cruby.py | 19 ------------------- 2 files changed, 14 insertions(+), 19 deletions(-) create mode 100644 misc/commands/rclass_ext_command.py diff --git a/misc/commands/rclass_ext_command.py b/misc/commands/rclass_ext_command.py new file mode 100644 index 00000000000000..3d17f646bd649d --- /dev/null +++ b/misc/commands/rclass_ext_command.py @@ -0,0 +1,14 @@ +from rb_base_command import RbBaseCommand + +class RclassExtCommand(RbBaseCommand): + program = "rclass_ext" + help_string = "retrieves and prints the rb_classext_struct for the VALUE pointer passed in" + + def call(self, debugger, command, exe_ctx, result): + uintptr_t = self.target.FindFirstType("uintptr_t") + rclass_t = self.target.FindFirstType("struct RClass") + rclass_ext_t = self.target.FindFirstType("rb_classext_t") + + rclass_addr = self.target.EvaluateExpression(command).Cast(uintptr_t) + rclass_ext_addr = (rclass_addr.GetValueAsUnsigned() + rclass_t.GetByteSize()) + debugger.HandleCommand("p *(rb_classext_t *)%0#x" % rclass_ext_addr) diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index 5106f788819597..6655d768ae24b1 100755 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -711,23 +711,6 @@ def rb_id2str(debugger, command, result, internal_dict): pos = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE id_str = rb_ary_entry(target, ary, pos, result) lldb_inspect(debugger, target, result, id_str) - -def rb_rclass_ext(debugger, command, result, internal_dict): - if not ('RUBY_Qfalse' in globals()): - lldb_init(debugger) - - target = debugger.GetSelectedTarget() - process = target.GetProcess() - thread = process.GetSelectedThread() - frame = thread.GetSelectedFrame() - - uintptr_t = target.FindFirstType("uintptr_t") - rclass_t = target.FindFirstType("struct RClass") - rclass_ext_t = target.FindFirstType("rb_classext_t") - - rclass_addr = target.EvaluateExpression(command).Cast(uintptr_t) - rclass_ext_addr = (rclass_addr.GetValueAsUnsigned() + rclass_t.GetByteSize()) - debugger.HandleCommand("p *(rb_classext_t *)%0#x" % rclass_ext_addr) # END FUNCTION STYLE DECLS @@ -758,7 +741,5 @@ def __lldb_init_module(debugger, internal_dict): debugger.HandleCommand("command script add -f lldb_cruby.dump_page dump_page") debugger.HandleCommand("command script add -f lldb_cruby.dump_page_rvalue dump_page_rvalue") debugger.HandleCommand("command script add -f lldb_cruby.rb_id2str rb_id2str") - debugger.HandleCommand("command script add -f lldb_cruby.rb_rclass_ext rclass_ext") - lldb_init(debugger) print("lldb scripts for ruby has been installed.") From b26aec9daa03a4f3da225e9e4f7a43e916928712 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Wed, 13 Jul 2022 18:14:44 +0100 Subject: [PATCH 153/546] [ci-skip][Feature #18910][lldb] New directory structure Push the newly refactored lldb files into a sub-directory so that we're not cluttering up the misc directory --- misc/lldb_cruby.py | 6 +++--- misc/{ => lldb_rb}/commands/command_template.py | 5 +++++ misc/{ => lldb_rb}/commands/heap_page_command.py | 4 ++-- misc/{ => lldb_rb}/commands/rclass_ext_command.py | 2 +- misc/{ => lldb_rb}/constants.py | 0 misc/{ => lldb_rb}/rb_base_command.py | 0 6 files changed, 11 insertions(+), 6 deletions(-) rename misc/{ => lldb_rb}/commands/command_template.py (92%) rename misc/{ => lldb_rb}/commands/heap_page_command.py (92%) rename misc/{ => lldb_rb}/commands/rclass_ext_command.py (92%) rename misc/{ => lldb_rb}/constants.py (100%) rename misc/{ => lldb_rb}/rb_base_command.py (100%) diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index 6655d768ae24b1..e30acffc40200d 100755 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -15,7 +15,7 @@ import platform import glob -from constants import * +from lldb_rb.constants import * # BEGIN FUNCTION STYLE DECLS # This will be refactored to use class style decls in the misc/commands dir @@ -716,11 +716,11 @@ def rb_id2str(debugger, command, result, internal_dict): load_dir, _ = os.path.split(os.path.realpath(__file__)) -for fname in glob.glob(f"{load_dir}/commands/*_command.py"): +for fname in glob.glob(f"{load_dir}/lldb_rb/commands/*_command.py"): _, basename = os.path.split(fname) mname, _ = os.path.splitext(basename) - exec(f"import commands.{mname}") + exec(f"import lldb_rb.commands.{mname}") def __lldb_init_module(debugger, internal_dict): # Register all classes that subclass RbBaseCommand diff --git a/misc/commands/command_template.py b/misc/lldb_rb/commands/command_template.py similarity index 92% rename from misc/commands/command_template.py rename to misc/lldb_rb/commands/command_template.py index bbc4b09157ac46..843b66398ffeec 100644 --- a/misc/commands/command_template.py +++ b/misc/lldb_rb/commands/command_template.py @@ -6,6 +6,11 @@ # 4. Ensure you are inheriting from RbBaseCommand or another command that # implements the same interfact +import lldb + +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand + # This test command inherits from RbBaseCommand which provides access to Ruby # globals and utility helpers class TestCommand(RbBaseCommand): diff --git a/misc/commands/heap_page_command.py b/misc/lldb_rb/commands/heap_page_command.py similarity index 92% rename from misc/commands/heap_page_command.py rename to misc/lldb_rb/commands/heap_page_command.py index ee502a40b8e3f7..edb74a415bf1bc 100644 --- a/misc/commands/heap_page_command.py +++ b/misc/lldb_rb/commands/heap_page_command.py @@ -1,7 +1,7 @@ import lldb -from constants import * -from rb_base_command import RbBaseCommand +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand class HeapPageCommand(RbBaseCommand): program = "heap_page" diff --git a/misc/commands/rclass_ext_command.py b/misc/lldb_rb/commands/rclass_ext_command.py similarity index 92% rename from misc/commands/rclass_ext_command.py rename to misc/lldb_rb/commands/rclass_ext_command.py index 3d17f646bd649d..8bae91145764e8 100644 --- a/misc/commands/rclass_ext_command.py +++ b/misc/lldb_rb/commands/rclass_ext_command.py @@ -1,4 +1,4 @@ -from rb_base_command import RbBaseCommand +from lldb_rb.rb_base_command import RbBaseCommand class RclassExtCommand(RbBaseCommand): program = "rclass_ext" diff --git a/misc/constants.py b/misc/lldb_rb/constants.py similarity index 100% rename from misc/constants.py rename to misc/lldb_rb/constants.py diff --git a/misc/rb_base_command.py b/misc/lldb_rb/rb_base_command.py similarity index 100% rename from misc/rb_base_command.py rename to misc/lldb_rb/rb_base_command.py From 92603bbd6996000d2541edb22e0b2ecac646c3de Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Thu, 18 Aug 2022 11:44:25 +0100 Subject: [PATCH 154/546] [ci skip][Feature #18910][lldb] Dedup lldb_init by moving it fully into RbBaseCommand --- misc/lldb_cruby.py | 4 ++- misc/lldb_rb/rb_base_command.py | 55 +++++++++++++++++---------------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index e30acffc40200d..595d54dfab3dde 100755 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -741,5 +741,7 @@ def __lldb_init_module(debugger, internal_dict): debugger.HandleCommand("command script add -f lldb_cruby.dump_page dump_page") debugger.HandleCommand("command script add -f lldb_cruby.dump_page_rvalue dump_page_rvalue") debugger.HandleCommand("command script add -f lldb_cruby.rb_id2str rb_id2str") - lldb_init(debugger) + + lldb_rb.rb_base_command.RbBaseCommand.lldb_init(debugger) + print("lldb scripts for ruby has been installed.") diff --git a/misc/lldb_rb/rb_base_command.py b/misc/lldb_rb/rb_base_command.py index 44b2996d80d528..bf98b6761255bc 100644 --- a/misc/lldb_rb/rb_base_command.py +++ b/misc/lldb_rb/rb_base_command.py @@ -8,12 +8,39 @@ def register_lldb_command(cls, debugger, module_name): command = f"command script add -c {module_name}.{cls.__name__} {cls.program}" debugger.HandleCommand(command) + @classmethod + def lldb_init(cls, debugger): + target = debugger.GetSelectedTarget() + global SIZEOF_VALUE + SIZEOF_VALUE = target.FindFirstType("VALUE").GetByteSize() + + value_types = [] + g = globals() + + imemo_types = target.FindFirstType("enum imemo_type") + + for member in imemo_types.GetEnumMembers(): + g[member.GetName()] = member.GetValueAsUnsigned() + + for enum in target.FindFirstGlobalVariable("ruby_dummy_gdb_enums"): + enum = enum.GetType() + members = enum.GetEnumMembers() + for i in range(0, members.GetSize()): + member = members.GetTypeEnumMemberAtIndex(i) + name = member.GetName() + value = member.GetValueAsUnsigned() + g[name] = value + + if name.startswith("RUBY_T_"): + value_types.append(name) + g["value_types"] = value_types + def __init__(self, debugger, _internal_dict): self.internal_dict = _internal_dict def __call__(self, debugger, command, exe_ctx, result): if not ("RUBY_Qfalse" in globals()): - self._lldb_init(debugger) + RbBaseCommand.lldb_init(debugger) self.build_environment(debugger) self.call(debugger, command, exe_ctx, result) @@ -40,29 +67,3 @@ def _append_command_output(self, debugger, command, result): result.Clear() result.write(output1) result.write(output2) - - def _lldb_init(self, debugger): - target = debugger.GetSelectedTarget() - global SIZEOF_VALUE - SIZEOF_VALUE = target.FindFirstType("VALUE").GetByteSize() - - value_types = [] - g = globals() - - imemo_types = target.FindFirstType("enum imemo_type") - - for member in imemo_types.GetEnumMembers(): - g[member.GetName()] = member.GetValueAsUnsigned() - - for enum in target.FindFirstGlobalVariable("ruby_dummy_gdb_enums"): - enum = enum.GetType() - members = enum.GetEnumMembers() - for i in range(0, members.GetSize()): - member = members.GetTypeEnumMemberAtIndex(i) - name = member.GetName() - value = member.GetValueAsUnsigned() - g[name] = value - - if name.startswith("RUBY_T_"): - value_types.append(name) - g["value_types"] = value_types From 59da26789f11dfa56a029a72210998cdd2f84174 Mon Sep 17 00:00:00 2001 From: git Date: Fri, 19 Aug 2022 02:25:54 +0900 Subject: [PATCH 155/546] * remove trailing spaces. [ci skip] --- misc/lldb_rb/commands/command_template.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/misc/lldb_rb/commands/command_template.py b/misc/lldb_rb/commands/command_template.py index 843b66398ffeec..8d46f141591f2e 100644 --- a/misc/lldb_rb/commands/command_template.py +++ b/misc/lldb_rb/commands/command_template.py @@ -3,7 +3,7 @@ # 1. Copy it and rename the copy so it ends with `_command.py`. # 2. Rename the class to something descriptive that ends with Command. # 3. Change the program variable to be a descriptive command name -# 4. Ensure you are inheriting from RbBaseCommand or another command that +# 4. Ensure you are inheriting from RbBaseCommand or another command that # implements the same interfact import lldb @@ -23,8 +23,8 @@ class TestCommand(RbBaseCommand): # call is where our command logic will be implemented def call(self, debugger, command, exe_ctx, result): # This method will be called once the LLDB environment has been setup. - # You will have access to self.target, self.process, self.frame, and + # You will have access to self.target, self.process, self.frame, and # self.thread - # + # # This is where we should implement our command logic pass From f34280ec6b684eeb21ef3336c7002e1a83dcfd2c Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 17 Aug 2022 17:41:48 +0900 Subject: [PATCH 156/546] Scan the code range of the formatted portion --- sprintf.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sprintf.c b/sprintf.c index 1ee293b6d96808..04cead77317621 100644 --- a/sprintf.c +++ b/sprintf.c @@ -1153,6 +1153,10 @@ ruby_vsprintf0(VALUE result, char *p, const char *fmt, va_list ap) rb_printf_buffer_extra buffer; #define f buffer.base VALUE klass = RBASIC(result)->klass; + int coderange = ENC_CODERANGE(result); + long scanned = 0; + + if (coderange != ENC_CODERANGE_UNKNOWN) scanned = p - RSTRING_PTR(result); f._flags = __SWR | __SSTR; f._bf._size = 0; @@ -1165,9 +1169,13 @@ ruby_vsprintf0(VALUE result, char *p, const char *fmt, va_list ap) buffer.value = 0; BSD_vfprintf(&f, fmt, ap); RBASIC_SET_CLASS_RAW(result, klass); - // vfprintf mutates the string without updating coderange - ENC_CODERANGE_CLEAR(result); - rb_str_resize(result, (char *)f._p - RSTRING_PTR(result)); + p = RSTRING_PTR(result); + long blen = (char *)f._p - p; + if (scanned < blen) { + rb_str_coderange_scan_restartable(p + scanned, p + blen, rb_enc_get(result), &coderange); + ENC_CODERANGE_SET(result, coderange); + } + rb_str_resize(result, blen); #undef f } From 99116da7f05633697177c09cf4de5080028d77f8 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 17 Aug 2022 16:10:51 +0900 Subject: [PATCH 157/546] Scan the code range of the last added portion --- sprintf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sprintf.c b/sprintf.c index 04cead77317621..b16ab3f581c196 100644 --- a/sprintf.c +++ b/sprintf.c @@ -927,6 +927,10 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) flags = FNONE; } + if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { + scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); + ENC_CODERANGE_SET(result, coderange); + } sprint_exit: rb_str_tmp_frozen_release(orig, fmt); /* XXX - We cannot validate the number of arguments if (digit)$ style used. @@ -937,8 +941,6 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) if (RTEST(ruby_verbose)) rb_warn("%s", mesg); } rb_str_resize(result, blen); - // rb_str_format mutates the string without updating coderange - ENC_CODERANGE_CLEAR(result); return result; } From 35c794b26d406c39f90e188e3884003fe6aca532 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 18 Aug 2022 18:31:34 +0900 Subject: [PATCH 158/546] Fixed by [Bug #18964] --- file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/file.c b/file.c index 92338bb7549b1e..5265d3a3a5d141 100644 --- a/file.c +++ b/file.c @@ -4090,9 +4090,6 @@ static VALUE str_shrink(VALUE str) { rb_str_resize(str, RSTRING_LEN(str)); - // expand_path on Windows can sometimes mutate the string - // without clearing its coderange - ENC_CODERANGE_CLEAR(str); return str; } From f09536175853d883130c2e54a1d418b497890462 Mon Sep 17 00:00:00 2001 From: S-H-GAMELINKS Date: Wed, 29 Jun 2022 21:59:39 +0900 Subject: [PATCH 159/546] Repalce to NIL_P macro --- parse.y | 2 +- variable.c | 2 +- vm_insnhelper.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/parse.y b/parse.y index d0f37a18ccb1ac..cdb7b4b93c8dc2 100644 --- a/parse.y +++ b/parse.y @@ -13612,7 +13612,7 @@ ripper_validate_object(VALUE self, VALUE x) { if (x == Qfalse) return x; if (x == Qtrue) return x; - if (x == Qnil) return x; + if (NIL_P(x)) return x; if (x == Qundef) rb_raise(rb_eArgError, "Qundef given"); if (FIXNUM_P(x)) return x; diff --git a/variable.c b/variable.c index a3512adc99b976..056a1000b8edee 100644 --- a/variable.c +++ b/variable.c @@ -2605,7 +2605,7 @@ autoload_load_needed(VALUE _arguments) return Qfalse; } - if (autoload_data->mutex == Qnil) { + if (NIL_P(autoload_data->mutex)) { autoload_data->mutex = rb_mutex_new(); autoload_data->fork_gen = GET_VM()->fork_gen; } diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 1812f7ce71aead..837c71ffd5ee14 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -4841,7 +4841,7 @@ vm_objtostring(const rb_iseq_t *iseq, VALUE recv, CALL_DATA cd) // going to use this string for interpolation, it's fine to use the // frozen string. VALUE val = rb_mod_name(recv); - if (val == Qnil) { + if (NIL_P(val)) { val = rb_mod_to_s(recv); } return val; From 3541f32951fecdfea76ef6de028d50ba58cb07b9 Mon Sep 17 00:00:00 2001 From: S-H-GAMELINKS Date: Mon, 6 Jun 2022 23:13:55 +0900 Subject: [PATCH 160/546] Reuse opt_nl rule --- parse.y | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parse.y b/parse.y index cdb7b4b93c8dc2..426e4df772c74c 100644 --- a/parse.y +++ b/parse.y @@ -5779,8 +5779,7 @@ rbracket : opt_nl ']' rbrace : opt_nl '}' ; -trailer : /* none */ - | '\n' +trailer : opt_nl | ',' ; From a50df1ab0eb312e5cdcf010d2c1b362ec41f3c59 Mon Sep 17 00:00:00 2001 From: schneems Date: Tue, 26 Jul 2022 15:16:21 -0500 Subject: [PATCH 161/546] Setup SyntaxSuggest as default gem Adds the `syntax_suggest` syntax error display tool to Ruby through the same mechanism as `error_highlight` and `did_you_mean`. Reference ticket: https://bugs.ruby-lang.org/issues/18159 close #4845 ## What is syntax_suggest? When a syntax error is raised by requiring a file, dead_end will use a combination of indentation and lexing to identify the problem. > Note: Previously this tool was named `dead_end`. ## Known issues - SyntaxSearch's approach of showing syntax errors only works through integration with `require`, `load`, `autoload`, and `require_relative` (since it monkeypatches them to detect syntax errors). It does not work with direct Ruby file invocations https://github.com/zombocom/dead_end/issues/31. - This causes failure in the test suite (test_expected_backtrace_location_when_inheriting_from_basic_object_and_including_kernel) and confusion when inspecting backtraces if there's a different error when trying to require a file such as measuring memory (https://github.com/zombocom/syntax_suggest/issues/124#issuecomment-1006705016). - Discussed fix. We previously talked about opening up `SyntaxError` to be monkeypatched in the same way that other gems hook into `NoMethodError`. This is currently not possible and requires development work. When we last talked about it at RubyKaigi Nobu expressed an ability to make such a change. --- gem_prelude.rb | 7 +++++++ ruby.c | 6 ++++++ tool/sync_default_gems.rb | 1 + 3 files changed, 14 insertions(+) diff --git a/gem_prelude.rb b/gem_prelude.rb index 94ada316aa8bff..f382021ca3d933 100644 --- a/gem_prelude.rb +++ b/gem_prelude.rb @@ -17,3 +17,10 @@ rescue LoadError warn "`did_you_mean' was not loaded." end if defined?(DidYouMean) + +begin + require 'syntax_suggest/core_ext' +rescue LoadError + warn "`syntax_suggest' was not loaded." +end if defined?(SyntaxSuggest) + diff --git a/ruby.c b/ruby.c index 66feeb797e6cf6..26d263a1b12360 100644 --- a/ruby.c +++ b/ruby.c @@ -94,6 +94,8 @@ void rb_warning_category_update(unsigned int mask, unsigned int bits); SEP \ X(did_you_mean) \ SEP \ + X(syntax_suggest) \ + SEP \ X(rubyopt) \ SEP \ X(frozen_string_literal) \ @@ -307,6 +309,7 @@ usage(const char *name, int help, int highlight, int columns) M("gems", "", "rubygems (only for debugging, default: "DEFAULT_RUBYGEMS_ENABLED")"), M("error_highlight", "", "error_highlight (default: "DEFAULT_RUBYGEMS_ENABLED")"), M("did_you_mean", "", "did_you_mean (default: "DEFAULT_RUBYGEMS_ENABLED")"), + M("syntax_suggest", "", "syntax_suggest (default: "DEFAULT_RUBYGEMS_ENABLED")"), M("rubyopt", "", "RUBYOPT environment variable (default: enabled)"), M("frozen-string-literal", "", "freeze all string literals (default: disabled)"), #if USE_MJIT @@ -1553,6 +1556,9 @@ ruby_opt_init(ruby_cmdline_options_t *opt) if (opt->features.set & FEATURE_BIT(did_you_mean)) { rb_define_module("DidYouMean"); } + if (opt->features.set & FEATURE_BIT(syntax_suggest)) { + rb_define_module("SyntaxSuggest"); + } } rb_warning_category_update(opt->warn.mask, opt->warn.set); diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index 78620e15084e8b..ae3fcbce6152f1 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -73,6 +73,7 @@ pathname: "ruby/pathname", digest: "ruby/digest", error_highlight: "ruby/error_highlight", + syntax_suggest: "zombocom/syntax_suggest", un: "ruby/un", win32ole: "ruby/win32ole", } From 490af8dbdb66263f29d0b4e43752fbb298b94862 Mon Sep 17 00:00:00 2001 From: schneems Date: Tue, 26 Jul 2022 15:21:09 -0500 Subject: [PATCH 162/546] Sync SyntaxSuggest ``` $ tool/sync_default_gems.rb syntax_suggest ``` --- lib/syntax_suggest.rb | 3 + lib/syntax_suggest/api.rb | 199 ++++++++++++ lib/syntax_suggest/around_block_scan.rb | 224 +++++++++++++ lib/syntax_suggest/block_expand.rb | 74 +++++ lib/syntax_suggest/capture_code_context.rb | 233 ++++++++++++++ lib/syntax_suggest/clean_document.rb | 304 ++++++++++++++++++ lib/syntax_suggest/cli.rb | 129 ++++++++ lib/syntax_suggest/code_block.rb | 100 ++++++ lib/syntax_suggest/code_frontier.rb | 178 ++++++++++ lib/syntax_suggest/code_line.rb | 239 ++++++++++++++ lib/syntax_suggest/code_search.rb | 139 ++++++++ lib/syntax_suggest/core_ext.rb | 101 ++++++ .../display_code_with_line_numbers.rb | 70 ++++ lib/syntax_suggest/display_invalid_blocks.rb | 84 +++++ lib/syntax_suggest/explain_syntax.rb | 103 ++++++ lib/syntax_suggest/left_right_lex_count.rb | 168 ++++++++++ lib/syntax_suggest/lex_all.rb | 55 ++++ lib/syntax_suggest/lex_value.rb | 70 ++++ .../parse_blocks_from_indent_line.rb | 60 ++++ lib/syntax_suggest/pathname_from_message.rb | 59 ++++ lib/syntax_suggest/priority_engulf_queue.rb | 63 ++++ lib/syntax_suggest/priority_queue.rb | 105 ++++++ lib/syntax_suggest/ripper_errors.rb | 36 +++ lib/syntax_suggest/syntax_suggest.gemspec | 32 ++ lib/syntax_suggest/unvisited_lines.rb | 36 +++ lib/syntax_suggest/version.rb | 5 + 26 files changed, 2869 insertions(+) create mode 100644 lib/syntax_suggest.rb create mode 100644 lib/syntax_suggest/api.rb create mode 100644 lib/syntax_suggest/around_block_scan.rb create mode 100644 lib/syntax_suggest/block_expand.rb create mode 100644 lib/syntax_suggest/capture_code_context.rb create mode 100644 lib/syntax_suggest/clean_document.rb create mode 100644 lib/syntax_suggest/cli.rb create mode 100644 lib/syntax_suggest/code_block.rb create mode 100644 lib/syntax_suggest/code_frontier.rb create mode 100644 lib/syntax_suggest/code_line.rb create mode 100644 lib/syntax_suggest/code_search.rb create mode 100644 lib/syntax_suggest/core_ext.rb create mode 100644 lib/syntax_suggest/display_code_with_line_numbers.rb create mode 100644 lib/syntax_suggest/display_invalid_blocks.rb create mode 100644 lib/syntax_suggest/explain_syntax.rb create mode 100644 lib/syntax_suggest/left_right_lex_count.rb create mode 100644 lib/syntax_suggest/lex_all.rb create mode 100644 lib/syntax_suggest/lex_value.rb create mode 100644 lib/syntax_suggest/parse_blocks_from_indent_line.rb create mode 100644 lib/syntax_suggest/pathname_from_message.rb create mode 100644 lib/syntax_suggest/priority_engulf_queue.rb create mode 100644 lib/syntax_suggest/priority_queue.rb create mode 100644 lib/syntax_suggest/ripper_errors.rb create mode 100644 lib/syntax_suggest/syntax_suggest.gemspec create mode 100644 lib/syntax_suggest/unvisited_lines.rb create mode 100644 lib/syntax_suggest/version.rb diff --git a/lib/syntax_suggest.rb b/lib/syntax_suggest.rb new file mode 100644 index 00000000000000..1a45dfa6760a48 --- /dev/null +++ b/lib/syntax_suggest.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +require_relative "syntax_suggest/core_ext" diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb new file mode 100644 index 00000000000000..5b725e13d71972 --- /dev/null +++ b/lib/syntax_suggest/api.rb @@ -0,0 +1,199 @@ +# frozen_string_literal: true + +require_relative "version" + +require "tmpdir" +require "stringio" +require "pathname" +require "ripper" +require "timeout" + +module SyntaxSuggest + # Used to indicate a default value that cannot + # be confused with another input. + DEFAULT_VALUE = Object.new.freeze + + class Error < StandardError; end + TIMEOUT_DEFAULT = ENV.fetch("SYNTAX_SUGGEST_TIMEOUT", 1).to_i + + # SyntaxSuggest.handle_error [Public] + # + # Takes a `SyntaxError` exception, uses the + # error message to locate the file. Then the file + # will be analyzed to find the location of the syntax + # error and emit that location to stderr. + # + # Example: + # + # begin + # require 'bad_file' + # rescue => e + # SyntaxSuggest.handle_error(e) + # end + # + # By default it will re-raise the exception unless + # `re_raise: false`. The message output location + # can be configured using the `io: $stderr` input. + # + # If a valid filename cannot be determined, the original + # exception will be re-raised (even with + # `re_raise: false`). + def self.handle_error(e, re_raise: true, io: $stderr) + unless e.is_a?(SyntaxError) + io.puts("SyntaxSuggest: Must pass a SyntaxError, got: #{e.class}") + raise e + end + + file = PathnameFromMessage.new(e.message, io: io).call.name + raise e unless file + + io.sync = true + + call( + io: io, + source: file.read, + filename: file + ) + + raise e if re_raise + end + + # SyntaxSuggest.call [Private] + # + # Main private interface + def self.call(source:, filename: DEFAULT_VALUE, terminal: DEFAULT_VALUE, record_dir: DEFAULT_VALUE, timeout: TIMEOUT_DEFAULT, io: $stderr) + search = nil + filename = nil if filename == DEFAULT_VALUE + Timeout.timeout(timeout) do + record_dir ||= ENV["DEBUG"] ? "tmp" : nil + search = CodeSearch.new(source, record_dir: record_dir).call + end + + blocks = search.invalid_blocks + DisplayInvalidBlocks.new( + io: io, + blocks: blocks, + filename: filename, + terminal: terminal, + code_lines: search.code_lines + ).call + rescue Timeout::Error => e + io.puts "Search timed out SYNTAX_SUGGEST_TIMEOUT=#{timeout}, run with DEBUG=1 for more info" + io.puts e.backtrace.first(3).join($/) + end + + # SyntaxSuggest.record_dir [Private] + # + # Used to generate a unique directory to record + # search steps for debugging + def self.record_dir(dir) + time = Time.now.strftime("%Y-%m-%d-%H-%M-%s-%N") + dir = Pathname(dir) + dir.join(time).tap { |path| + path.mkpath + FileUtils.ln_sf(time, dir.join("last")) + } + end + + # SyntaxSuggest.valid_without? [Private] + # + # This will tell you if the `code_lines` would be valid + # if you removed the `without_lines`. In short it's a + # way to detect if we've found the lines with syntax errors + # in our document yet. + # + # code_lines = [ + # CodeLine.new(line: "def foo\n", index: 0) + # CodeLine.new(line: " def bar\n", index: 1) + # CodeLine.new(line: "end\n", index: 2) + # ] + # + # SyntaxSuggest.valid_without?( + # without_lines: code_lines[1], + # code_lines: code_lines + # ) # => true + # + # SyntaxSuggest.valid?(code_lines) # => false + def self.valid_without?(without_lines:, code_lines:) + lines = code_lines - Array(without_lines).flatten + + if lines.empty? + true + else + valid?(lines) + end + end + + # SyntaxSuggest.invalid? [Private] + # + # Opposite of `SyntaxSuggest.valid?` + def self.invalid?(source) + source = source.join if source.is_a?(Array) + source = source.to_s + + Ripper.new(source).tap(&:parse).error? + end + + # SyntaxSuggest.valid? [Private] + # + # Returns truthy if a given input source is valid syntax + # + # SyntaxSuggest.valid?(<<~EOM) # => true + # def foo + # end + # EOM + # + # SyntaxSuggest.valid?(<<~EOM) # => false + # def foo + # def bar # Syntax error here + # end + # EOM + # + # You can also pass in an array of lines and they'll be + # joined before evaluating + # + # SyntaxSuggest.valid?( + # [ + # "def foo\n", + # "end\n" + # ] + # ) # => true + # + # SyntaxSuggest.valid?( + # [ + # "def foo\n", + # " def bar\n", # Syntax error here + # "end\n" + # ] + # ) # => false + # + # As an FYI the CodeLine class instances respond to `to_s` + # so passing a CodeLine in as an object or as an array + # will convert it to it's code representation. + def self.valid?(source) + !invalid?(source) + end +end + +# Integration +require_relative "cli" + +# Core logic +require_relative "code_search" +require_relative "code_frontier" +require_relative "explain_syntax" +require_relative "clean_document" + +# Helpers +require_relative "lex_all" +require_relative "code_line" +require_relative "code_block" +require_relative "block_expand" +require_relative "ripper_errors" +require_relative "priority_queue" +require_relative "unvisited_lines" +require_relative "around_block_scan" +require_relative "priority_engulf_queue" +require_relative "pathname_from_message" +require_relative "display_invalid_blocks" +require_relative "parse_blocks_from_indent_line" diff --git a/lib/syntax_suggest/around_block_scan.rb b/lib/syntax_suggest/around_block_scan.rb new file mode 100644 index 00000000000000..2a57d1b19e6b7e --- /dev/null +++ b/lib/syntax_suggest/around_block_scan.rb @@ -0,0 +1,224 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # This class is useful for exploring contents before and after + # a block + # + # It searches above and below the passed in block to match for + # whatever criteria you give it: + # + # Example: + # + # def dog # 1 + # puts "bark" # 2 + # puts "bark" # 3 + # end # 4 + # + # scan = AroundBlockScan.new( + # code_lines: code_lines + # block: CodeBlock.new(lines: code_lines[1]) + # ) + # + # scan.scan_while { true } + # + # puts scan.before_index # => 0 + # puts scan.after_index # => 3 + # + # Contents can also be filtered using AroundBlockScan#skip + # + # To grab the next surrounding indentation use AroundBlockScan#scan_adjacent_indent + class AroundBlockScan + def initialize(code_lines:, block:) + @code_lines = code_lines + @orig_before_index = block.lines.first.index + @orig_after_index = block.lines.last.index + @orig_indent = block.current_indent + @skip_array = [] + @after_array = [] + @before_array = [] + @stop_after_kw = false + + @skip_hidden = false + @skip_empty = false + end + + def skip(name) + case name + when :hidden? + @skip_hidden = true + when :empty? + @skip_empty = true + else + raise "Unsupported skip #{name}" + end + self + end + + def stop_after_kw + @stop_after_kw = true + self + end + + def scan_while + stop_next = false + + kw_count = 0 + end_count = 0 + index = before_lines.reverse_each.take_while do |line| + next false if stop_next + next true if @skip_hidden && line.hidden? + next true if @skip_empty && line.empty? + + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if @stop_after_kw && kw_count > end_count + stop_next = true + end + + yield line + end.last&.index + + if index && index < before_index + @before_index = index + end + + stop_next = false + kw_count = 0 + end_count = 0 + index = after_lines.take_while do |line| + next false if stop_next + next true if @skip_hidden && line.hidden? + next true if @skip_empty && line.empty? + + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if @stop_after_kw && end_count > kw_count + stop_next = true + end + + yield line + end.last&.index + + if index && index > after_index + @after_index = index + end + self + end + + def capture_neighbor_context + lines = [] + kw_count = 0 + end_count = 0 + before_lines.reverse_each do |line| + next if line.empty? + break if line.indent < @orig_indent + next if line.indent != @orig_indent + + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if kw_count != 0 && kw_count == end_count + lines << line + break + end + + lines << line + end + + lines.reverse! + + kw_count = 0 + end_count = 0 + after_lines.each do |line| + next if line.empty? + break if line.indent < @orig_indent + next if line.indent != @orig_indent + + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if kw_count != 0 && kw_count == end_count + lines << line + break + end + + lines << line + end + + lines + end + + def on_falling_indent + last_indent = @orig_indent + before_lines.reverse_each do |line| + next if line.empty? + if line.indent < last_indent + yield line + last_indent = line.indent + end + end + + last_indent = @orig_indent + after_lines.each do |line| + next if line.empty? + if line.indent < last_indent + yield line + last_indent = line.indent + end + end + end + + def scan_neighbors + scan_while { |line| line.not_empty? && line.indent >= @orig_indent } + end + + def next_up + @code_lines[before_index.pred] + end + + def next_down + @code_lines[after_index.next] + end + + def scan_adjacent_indent + before_after_indent = [] + before_after_indent << (next_up&.indent || 0) + before_after_indent << (next_down&.indent || 0) + + indent = before_after_indent.min + scan_while { |line| line.not_empty? && line.indent >= indent } + + self + end + + def start_at_next_line + before_index + after_index + @before_index -= 1 + @after_index += 1 + self + end + + def code_block + CodeBlock.new(lines: lines) + end + + def lines + @code_lines[before_index..after_index] + end + + def before_index + @before_index ||= @orig_before_index + end + + def after_index + @after_index ||= @orig_after_index + end + + private def before_lines + @code_lines[0...before_index] || [] + end + + private def after_lines + @code_lines[after_index.next..-1] || [] + end + end +end diff --git a/lib/syntax_suggest/block_expand.rb b/lib/syntax_suggest/block_expand.rb new file mode 100644 index 00000000000000..396b2c3a1acc88 --- /dev/null +++ b/lib/syntax_suggest/block_expand.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # This class is responsible for taking a code block that exists + # at a far indentaion and then iteratively increasing the block + # so that it captures everything within the same indentation block. + # + # def dog + # puts "bow" + # puts "wow" + # end + # + # block = BlockExpand.new(code_lines: code_lines) + # .call(CodeBlock.new(lines: code_lines[1])) + # + # puts block.to_s + # # => puts "bow" + # puts "wow" + # + # + # Once a code block has captured everything at a given indentation level + # then it will expand to capture surrounding indentation. + # + # block = BlockExpand.new(code_lines: code_lines) + # .call(block) + # + # block.to_s + # # => def dog + # puts "bow" + # puts "wow" + # end + # + class BlockExpand + def initialize(code_lines:) + @code_lines = code_lines + end + + def call(block) + if (next_block = expand_neighbors(block)) + return next_block + end + + expand_indent(block) + end + + def expand_indent(block) + AroundBlockScan.new(code_lines: @code_lines, block: block) + .skip(:hidden?) + .stop_after_kw + .scan_adjacent_indent + .code_block + end + + def expand_neighbors(block) + expanded_lines = AroundBlockScan.new(code_lines: @code_lines, block: block) + .skip(:hidden?) + .stop_after_kw + .scan_neighbors + .scan_while { |line| line.empty? } # Slurp up empties + .lines + + if block.lines == expanded_lines + nil + else + CodeBlock.new(lines: expanded_lines) + end + end + + # Managable rspec errors + def inspect + "#" + end + end +end diff --git a/lib/syntax_suggest/capture_code_context.rb b/lib/syntax_suggest/capture_code_context.rb new file mode 100644 index 00000000000000..c74a366a258d3f --- /dev/null +++ b/lib/syntax_suggest/capture_code_context.rb @@ -0,0 +1,233 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Turns a "invalid block(s)" into useful context + # + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaninful + # + # This class handles the third part. + # + # The algorithm is very good at capturing all of a syntax + # error in a single block in number 2, however the results + # can contain ambiguities. Humans are good at pattern matching + # and filtering and can mentally remove extraneous data, but + # they can't add extra data that's not present. + # + # In the case of known ambiguious cases, this class adds context + # back to the ambiguitiy so the programmer has full information. + # + # Beyond handling these ambiguities, it also captures surrounding + # code context information: + # + # puts block.to_s # => "def bark" + # + # context = CaptureCodeContext.new( + # blocks: block, + # code_lines: code_lines + # ) + # + # lines = context.call.map(&:original) + # puts lines.join + # # => + # class Dog + # def bark + # end + # + class CaptureCodeContext + attr_reader :code_lines + + def initialize(blocks:, code_lines:) + @blocks = Array(blocks) + @code_lines = code_lines + @visible_lines = @blocks.map(&:visible_lines).flatten + @lines_to_output = @visible_lines.dup + end + + def call + @blocks.each do |block| + capture_first_kw_end_same_indent(block) + capture_last_end_same_indent(block) + capture_before_after_kws(block) + capture_falling_indent(block) + end + + @lines_to_output.select!(&:not_empty?) + @lines_to_output.uniq! + @lines_to_output.sort! + + @lines_to_output + end + + # Shows the context around code provided by "falling" indentation + # + # Converts: + # + # it "foo" do + # + # into: + # + # class OH + # def hello + # it "foo" do + # end + # end + # + # + def capture_falling_indent(block) + AroundBlockScan.new( + block: block, + code_lines: @code_lines + ).on_falling_indent do |line| + @lines_to_output << line + end + end + + # Shows surrounding kw/end pairs + # + # The purpose of showing these extra pairs is due to cases + # of ambiguity when only one visible line is matched. + # + # For example: + # + # 1 class Dog + # 2 def bark + # 4 def eat + # 5 end + # 6 end + # + # In this case either line 2 could be missing an `end` or + # line 4 was an extra line added by mistake (it happens). + # + # When we detect the above problem it shows the issue + # as only being on line 2 + # + # 2 def bark + # + # Showing "neighbor" keyword pairs gives extra context: + # + # 2 def bark + # 4 def eat + # 5 end + # + def capture_before_after_kws(block) + return unless block.visible_lines.count == 1 + + around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block) + .start_at_next_line + .capture_neighbor_context + + around_lines -= block.lines + + @lines_to_output.concat(around_lines) + end + + # When there is an invalid block with a keyword + # missing an end right before another end, + # it is unclear where which keyword is missing the + # end + # + # Take this example: + # + # class Dog # 1 + # def bark # 2 + # puts "woof" # 3 + # end # 4 + # + # However due to https://github.com/zombocom/syntax_suggest/issues/32 + # the problem line will be identified as: + # + # ❯ class Dog # 1 + # + # Because lines 2, 3, and 4 are technically valid code and are expanded + # first, deemed valid, and hidden. We need to un-hide the matching end + # line 4. Also work backwards and if there's a mis-matched keyword, show it + # too + def capture_last_end_same_indent(block) + return if block.visible_lines.length != 1 + return unless block.visible_lines.first.is_kw? + + visible_line = block.visible_lines.first + lines = @code_lines[visible_line.index..block.lines.last.index] + + # Find first end with same indent + # (this would return line 4) + # + # end # 4 + matching_end = lines.detect { |line| line.indent == block.current_indent && line.is_end? } + return unless matching_end + + @lines_to_output << matching_end + + # Work backwards from the end to + # see if there are mis-matched + # keyword/end pairs + # + # Return the first mis-matched keyword + # this would find line 2 + # + # def bark # 2 + # puts "woof" # 3 + # end # 4 + end_count = 0 + kw_count = 0 + kw_line = @code_lines[visible_line.index..matching_end.index].reverse.detect do |line| + end_count += 1 if line.is_end? + kw_count += 1 if line.is_kw? + + !kw_count.zero? && kw_count >= end_count + end + return unless kw_line + @lines_to_output << kw_line + end + + # The logical inverse of `capture_last_end_same_indent` + # + # When there is an invalid block with an `end` + # missing a keyword right after another `end`, + # it is unclear where which end is missing the + # keyword. + # + # Take this example: + # + # class Dog # 1 + # puts "woof" # 2 + # end # 3 + # end # 4 + # + # the problem line will be identified as: + # + # ❯ end # 4 + # + # This happens because lines 1, 2, and 3 are technically valid code and are expanded + # first, deemed valid, and hidden. We need to un-hide the matching keyword on + # line 1. Also work backwards and if there's a mis-matched end, show it + # too + def capture_first_kw_end_same_indent(block) + return if block.visible_lines.length != 1 + return unless block.visible_lines.first.is_end? + + visible_line = block.visible_lines.first + lines = @code_lines[block.lines.first.index..visible_line.index] + matching_kw = lines.reverse.detect { |line| line.indent == block.current_indent && line.is_kw? } + return unless matching_kw + + @lines_to_output << matching_kw + + kw_count = 0 + end_count = 0 + orphan_end = @code_lines[matching_kw.index..visible_line.index].detect do |line| + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + + end_count >= kw_count + end + + return unless orphan_end + @lines_to_output << orphan_end + end + end +end diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb new file mode 100644 index 00000000000000..b572189259e53f --- /dev/null +++ b/lib/syntax_suggest/clean_document.rb @@ -0,0 +1,304 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Parses and sanitizes source into a lexically aware document + # + # Internally the document is represented by an array with each + # index containing a CodeLine correlating to a line from the source code. + # + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaninful + # + # This class handles the first part. + # + # The reason this class exists is to format input source + # for better/easier/cleaner exploration. + # + # The CodeSearch class operates at the line level so + # we must be careful to not introduce lines that look + # valid by themselves, but when removed will trigger syntax errors + # or strange behavior. + # + # ## Join Trailing slashes + # + # Code with a trailing slash is logically treated as a single line: + # + # 1 it "code can be split" \ + # 2 "across multiple lines" do + # + # In this case removing line 2 would add a syntax error. We get around + # this by internally joining the two lines into a single "line" object + # + # ## Logically Consecutive lines + # + # Code that can be broken over multiple + # lines such as method calls are on different lines: + # + # 1 User. + # 2 where(name: "schneems"). + # 3 first + # + # Removing line 2 can introduce a syntax error. To fix this, all lines + # are joined into one. + # + # ## Heredocs + # + # A heredoc is an way of defining a multi-line string. They can cause many + # problems. If left as a single line, Ripper would try to parse the contents + # as ruby code rather than as a string. Even without this problem, we still + # hit an issue with indentation + # + # 1 foo = <<~HEREDOC + # 2 "Be yourself; everyone else is already taken."" + # 3 ― Oscar Wilde + # 4 puts "I look like ruby code" # but i'm still a heredoc + # 5 HEREDOC + # + # If we didn't join these lines then our algorithm would think that line 4 + # is separate from the rest, has a higher indentation, then look at it first + # and remove it. + # + # If the code evaluates line 5 by itself it will think line 5 is a constant, + # remove it, and introduce a syntax errror. + # + # All of these problems are fixed by joining the whole heredoc into a single + # line. + # + # ## Comments and whitespace + # + # Comments can throw off the way the lexer tells us that the line + # logically belongs with the next line. This is valid ruby but + # results in a different lex output than before: + # + # 1 User. + # 2 where(name: "schneems"). + # 3 # Comment here + # 4 first + # + # To handle this we can replace comment lines with empty lines + # and then re-lex the source. This removal and re-lexing preserves + # line index and document size, but generates an easier to work with + # document. + # + class CleanDocument + def initialize(source:) + lines = clean_sweep(source: source) + @document = CodeLine.from_source(lines.join, lines: lines) + end + + # Call all of the document "cleaners" + # and return self + def call + join_trailing_slash! + join_consecutive! + join_heredoc! + + self + end + + # Return an array of CodeLines in the + # document + def lines + @document + end + + # Renders the document back to a string + def to_s + @document.join + end + + # Remove comments and whitespace only lines + # + # replace with empty newlines + # + # source = <<~'EOM' + # # Comment 1 + # puts "hello" + # # Comment 2 + # puts "world" + # EOM + # + # lines = CleanDocument.new(source: source).lines + # expect(lines[0].to_s).to eq("\n") + # expect(lines[1].to_s).to eq("puts "hello") + # expect(lines[2].to_s).to eq("\n") + # expect(lines[3].to_s).to eq("puts "world") + # + # Important: This must be done before lexing. + # + # After this change is made, we lex the document because + # removing comments can change how the doc is parsed. + # + # For example: + # + # values = LexAll.new(source: <<~EOM)) + # User. + # # comment + # where(name: 'schneems') + # EOM + # expect( + # values.count {|v| v.type == :on_ignored_nl} + # ).to eq(1) + # + # After the comment is removed: + # + # values = LexAll.new(source: <<~EOM)) + # User. + # + # where(name: 'schneems') + # EOM + # expect( + # values.count {|v| v.type == :on_ignored_nl} + # ).to eq(2) + # + def clean_sweep(source:) + source.lines.map do |line| + if line.match?(/^\s*(#[^{].*)?$/) # https://rubular.com/r/LLE10D8HKMkJvs + $/ + else + line + end + end + end + + # Smushes all heredoc lines into one line + # + # source = <<~'EOM' + # foo = <<~HEREDOC + # lol + # hehehe + # HEREDOC + # EOM + # + # lines = CleanDocument.new(source: source).join_heredoc!.lines + # expect(lines[0].to_s).to eq(source) + # expect(lines[1].to_s).to eq("") + def join_heredoc! + start_index_stack = [] + heredoc_beg_end_index = [] + lines.each do |line| + line.lex.each do |lex_value| + case lex_value.type + when :on_heredoc_beg + start_index_stack << line.index + when :on_heredoc_end + start_index = start_index_stack.pop + end_index = line.index + heredoc_beg_end_index << [start_index, end_index] + end + end + end + + heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] } + + join_groups(heredoc_groups) + self + end + + # Smushes logically "consecutive" lines + # + # source = <<~'EOM' + # User. + # where(name: 'schneems'). + # first + # EOM + # + # lines = CleanDocument.new(source: source).join_consecutive!.lines + # expect(lines[0].to_s).to eq(source) + # expect(lines[1].to_s).to eq("") + # + # The one known case this doesn't handle is: + # + # Ripper.lex <<~EOM + # a && + # b || + # c + # EOM + # + # For some reason this introduces `on_ignore_newline` but with BEG type + # + def join_consecutive! + consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line| + take_while_including(code_line.index..-1) do |line| + line.ignore_newline_not_beg? + end + end + + join_groups(consecutive_groups) + self + end + + # Join lines with a trailing slash + # + # source = <<~'EOM' + # it "code can be split" \ + # "across multiple lines" do + # EOM + # + # lines = CleanDocument.new(source: source).join_consecutive!.lines + # expect(lines[0].to_s).to eq(source) + # expect(lines[1].to_s).to eq("") + def join_trailing_slash! + trailing_groups = @document.select(&:trailing_slash?).map do |code_line| + take_while_including(code_line.index..-1) { |x| x.trailing_slash? } + end + join_groups(trailing_groups) + self + end + + # Helper method for joining "groups" of lines + # + # Input is expected to be type Array> + # + # The outer array holds the various "groups" while the + # inner array holds code lines. + # + # All code lines are "joined" into the first line in + # their group. + # + # To preserve document size, empty lines are placed + # in the place of the lines that were "joined" + def join_groups(groups) + groups.each do |lines| + line = lines.first + + # Handle the case of multiple groups in a a row + # if one is already replaced, move on + next if @document[line.index].empty? + + # Join group into the first line + @document[line.index] = CodeLine.new( + lex: lines.map(&:lex).flatten, + line: lines.join, + index: line.index + ) + + # Hide the rest of the lines + lines[1..-1].each do |line| + # The above lines already have newlines in them, if add more + # then there will be double newline, use an empty line instead + @document[line.index] = CodeLine.new(line: "", index: line.index, lex: []) + end + end + self + end + + # Helper method for grabbing elements from document + # + # Like `take_while` except when it stops + # iterating, it also returns the line + # that caused it to stop + def take_while_including(range = 0..-1) + take_next_and_stop = false + @document[range].take_while do |line| + next if take_next_and_stop + + take_next_and_stop = !(yield line) + true + end + end + end +end diff --git a/lib/syntax_suggest/cli.rb b/lib/syntax_suggest/cli.rb new file mode 100644 index 00000000000000..b89fa5d0138577 --- /dev/null +++ b/lib/syntax_suggest/cli.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +require "pathname" +require "optparse" + +module SyntaxSuggest + # All the logic of the exe/syntax_suggest CLI in one handy spot + # + # Cli.new(argv: ["--help"]).call + # Cli.new(argv: [".rb"]).call + # Cli.new(argv: [".rb", "--record=tmp"]).call + # Cli.new(argv: [".rb", "--terminal"]).call + # + class Cli + attr_accessor :options + + # ARGV is Everything passed to the executable, does not include executable name + # + # All other intputs are dependency injection for testing + def initialize(argv:, exit_obj: Kernel, io: $stdout, env: ENV) + @options = {} + @parser = nil + options[:record_dir] = env["SYNTAX_SUGGEST_RECORD_DIR"] + options[:record_dir] = "tmp" if env["DEBUG"] + options[:terminal] = SyntaxSuggest::DEFAULT_VALUE + + @io = io + @argv = argv + @exit_obj = exit_obj + end + + def call + if @argv.empty? + # Display help if raw command + parser.parse! %w[--help] + return + else + # Mutates @argv + parse + return if options[:exit] + end + + file_name = @argv.first + if file_name.nil? + @io.puts "No file given" + @exit_obj.exit(1) + return + end + + file = Pathname(file_name) + if !file.exist? + @io.puts "file not found: #{file.expand_path} " + @exit_obj.exit(1) + return + end + + @io.puts "Record dir: #{options[:record_dir]}" if options[:record_dir] + + display = SyntaxSuggest.call( + io: @io, + source: file.read, + filename: file.expand_path, + terminal: options.fetch(:terminal, SyntaxSuggest::DEFAULT_VALUE), + record_dir: options[:record_dir] + ) + + if display.document_ok? + @exit_obj.exit(0) + else + @exit_obj.exit(1) + end + end + + def parse + parser.parse!(@argv) + + self + end + + def parser + @parser ||= OptionParser.new do |opts| + opts.banner = <<~EOM + Usage: syntax_suggest [options] + + Parses a ruby source file and searches for syntax error(s) such as + unexpected `end', expecting end-of-input. + + Example: + + $ syntax_suggest dog.rb + + # ... + + ❯ 10 defdog + ❯ 15 end + + ENV options: + + SYNTAX_SUGGEST_RECORD_DIR= + + Records the steps used to search for a syntax error + to the given directory + + Options: + EOM + + opts.version = SyntaxSuggest::VERSION + + opts.on("--help", "Help - displays this message") do |v| + @io.puts opts + options[:exit] = true + @exit_obj.exit + end + + opts.on("--record ", "Records the steps used to search for a syntax error to the given directory") do |v| + options[:record_dir] = v + end + + opts.on("--terminal", "Enable terminal highlighting") do |v| + options[:terminal] = true + end + + opts.on("--no-terminal", "Disable terminal highlighting") do |v| + options[:terminal] = false + end + end + end + end +end diff --git a/lib/syntax_suggest/code_block.rb b/lib/syntax_suggest/code_block.rb new file mode 100644 index 00000000000000..61e7986da47146 --- /dev/null +++ b/lib/syntax_suggest/code_block.rb @@ -0,0 +1,100 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Multiple lines form a singular CodeBlock + # + # Source code is made of multiple CodeBlocks. + # + # Example: + # + # code_block.to_s # => + # # def foo + # # puts "foo" + # # end + # + # code_block.valid? # => true + # code_block.in_valid? # => false + # + # + class CodeBlock + UNSET = Object.new.freeze + attr_reader :lines, :starts_at, :ends_at + + def initialize(lines: []) + @lines = Array(lines) + @valid = UNSET + @deleted = false + @starts_at = @lines.first.number + @ends_at = @lines.last.number + end + + def delete + @deleted = true + end + + def deleted? + @deleted + end + + def visible_lines + @lines.select(&:visible?).select(&:not_empty?) + end + + def mark_invisible + @lines.map(&:mark_invisible) + end + + def is_end? + to_s.strip == "end" + end + + def hidden? + @lines.all?(&:hidden?) + end + + # This is used for frontier ordering, we are searching from + # the largest indentation to the smallest. This allows us to + # populate an array with multiple code blocks then call `sort!` + # on it without having to specify the sorting criteria + def <=>(other) + out = current_indent <=> other.current_indent + return out if out != 0 + + # Stable sort + starts_at <=> other.starts_at + end + + def current_indent + @current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0 + end + + def invalid? + !valid? + end + + def valid? + if @valid == UNSET + # Performance optimization + # + # If all the lines were previously hidden + # and we expand to capture additional empty + # lines then the result cannot be invalid + # + # That means there's no reason to re-check all + # lines with ripper (which is expensive). + # Benchmark in commit message + @valid = if lines.all? { |l| l.hidden? || l.empty? } + true + else + SyntaxSuggest.valid?(lines.map(&:original).join) + end + else + @valid + end + end + + def to_s + @lines.join + end + end +end diff --git a/lib/syntax_suggest/code_frontier.rb b/lib/syntax_suggest/code_frontier.rb new file mode 100644 index 00000000000000..8e93b32514e0a9 --- /dev/null +++ b/lib/syntax_suggest/code_frontier.rb @@ -0,0 +1,178 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # The main function of the frontier is to hold the edges of our search and to + # evaluate when we can stop searching. + + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaninful + # + # The Code frontier is a critical part of the second step + # + # ## Knowing where we've been + # + # Once a code block is generated it is added onto the frontier. Then it will be + # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a + # smaller block will cause the smaller block to be evicted. + # + # CodeFrontier#<<(block) # Adds block to frontier + # CodeFrontier#pop # Removes block from frontier + # + # ## Knowing where we can go + # + # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line` + # when called, this method returns, a line of code with the highest indentation. + # + # The returned line of code can be used to build a CodeBlock and then that code block + # is added back to the frontier. Then, the lines are removed from the + # "unvisited" so we don't double-create the same block. + # + # CodeFrontier#next_indent_line # Shows next line + # CodeFrontier#register_indent_block(block) # Removes lines from unvisited + # + # ## Knowing when to stop + # + # The frontier knows how to check the entire document for a syntax error. When blocks + # are added onto the frontier, they're removed from the document. When all code containing + # syntax errors has been added to the frontier, the document will be parsable without a + # syntax error and the search can stop. + # + # CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors + # + # ## Filtering false positives + # + # Once the search is completed, the frontier may have multiple blocks that do not contain + # the syntax error. To limit the result to the smallest subset of "invalid blocks" call: + # + # CodeFrontier#detect_invalid_blocks + # + class CodeFrontier + def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines)) + @code_lines = code_lines + @unvisited = unvisited + @queue = PriorityEngulfQueue.new + + @check_next = true + end + + def count + @queue.length + end + + # Performance optimization + # + # Parsing with ripper is expensive + # If we know we don't have any blocks with invalid + # syntax, then we know we cannot have found + # the incorrect syntax yet. + # + # When an invalid block is added onto the frontier + # check document state + private def can_skip_check? + check_next = @check_next + @check_next = false + + if check_next + false + else + true + end + end + + # Returns true if the document is valid with all lines + # removed. By default it checks all blocks in present in + # the frontier array, but can be used for arbitrary arrays + # of codeblocks as well + def holds_all_syntax_errors?(block_array = @queue, can_cache: true) + return false if can_cache && can_skip_check? + + without_lines = block_array.to_a.flat_map do |block| + block.lines + end + + SyntaxSuggest.valid_without?( + without_lines: without_lines, + code_lines: @code_lines + ) + end + + # Returns a code block with the largest indentation possible + def pop + @queue.pop + end + + def next_indent_line + @unvisited.peek + end + + def expand? + return false if @queue.empty? + return true if @unvisited.empty? + + frontier_indent = @queue.peek.current_indent + unvisited_indent = next_indent_line.indent + + if ENV["SYNTAX_SUGGEST_DEBUG"] + puts "```" + puts @queue.peek.to_s + puts "```" + puts " @frontier indent: #{frontier_indent}" + puts " @unvisited indent: #{unvisited_indent}" + end + + # Expand all blocks before moving to unvisited lines + frontier_indent >= unvisited_indent + end + + # Keeps track of what lines have been added to blocks and which are not yet + # visited. + def register_indent_block(block) + @unvisited.visit_block(block) + self + end + + # When one element fully encapsulates another we remove the smaller + # block from the frontier. This prevents double expansions and all-around + # weird behavior. However this guarantee is quite expensive to maintain + def register_engulf_block(block) + end + + # Add a block to the frontier + # + # This method ensures the frontier always remains sorted (in indentation order) + # and that each code block's lines are removed from the indentation hash so we + # don't re-evaluate the same line multiple times. + def <<(block) + @unvisited.visit_block(block) + + @queue.push(block) + + @check_next = true if block.invalid? + + self + end + + # Example: + # + # combination([:a, :b, :c, :d]) + # # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]] + def self.combination(array) + guesses = [] + 1.upto(array.length).each do |size| + guesses.concat(array.combination(size).to_a) + end + guesses + end + + # Given that we know our syntax error exists somewhere in our frontier, we want to find + # the smallest possible set of blocks that contain all the syntax errors + def detect_invalid_blocks + self.class.combination(@queue.to_a.select(&:invalid?)).detect do |block_array| + holds_all_syntax_errors?(block_array, can_cache: false) + end || [] + end + end +end diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb new file mode 100644 index 00000000000000..dc738ab12890da --- /dev/null +++ b/lib/syntax_suggest/code_line.rb @@ -0,0 +1,239 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Represents a single line of code of a given source file + # + # This object contains metadata about the line such as + # amount of indentation, if it is empty or not, and + # lexical data, such as if it has an `end` or a keyword + # in it. + # + # Visibility of lines can be toggled off. Marking a line as invisible + # indicates that it should not be used for syntax checks. + # It's functionally the same as commenting it out. + # + # Example: + # + # line = CodeLine.from_source("def foo\n").first + # line.number => 1 + # line.empty? # => false + # line.visible? # => true + # line.mark_invisible + # line.visible? # => false + # + class CodeLine + TRAILING_SLASH = ("\\" + $/).freeze + + # Returns an array of CodeLine objects + # from the source string + def self.from_source(source, lines: nil) + lines ||= source.lines + lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } + lines.map.with_index do |line, index| + CodeLine.new( + line: line, + index: index, + lex: lex_array_for_line[index + 1] + ) + end + end + + attr_reader :line, :index, :lex, :line_number, :indent + def initialize(line:, index:, lex:) + @lex = lex + @line = line + @index = index + @original = line + @line_number = @index + 1 + strip_line = line.dup + strip_line.lstrip! + + if strip_line.empty? + @empty = true + @indent = 0 + else + @empty = false + @indent = line.length - strip_line.length + end + + set_kw_end + end + + # Used for stable sort via indentation level + # + # Ruby's sort is not "stable" meaning that when + # multiple elements have the same value, they are + # not guaranteed to return in the same order they + # were put in. + # + # So when multiple code lines have the same indentation + # level, they're sorted by their index value which is unique + # and consistent. + # + # This is mostly needed for consistency of the test suite + def indent_index + @indent_index ||= [indent, index] + end + alias_method :number, :line_number + + # Returns true if the code line is determined + # to contain a keyword that matches with an `end` + # + # For example: `def`, `do`, `begin`, `ensure`, etc. + def is_kw? + @is_kw + end + + # Returns true if the code line is determined + # to contain an `end` keyword + def is_end? + @is_end + end + + # Used to hide lines + # + # The search alorithm will group lines into blocks + # then if those blocks are determined to represent + # valid code they will be hidden + def mark_invisible + @line = "" + end + + # Means the line was marked as "invisible" + # Confusingly, "empty" lines are visible...they + # just don't contain any source code other than a newline ("\n"). + def visible? + !line.empty? + end + + # Opposite or `visible?` (note: different than `empty?`) + def hidden? + !visible? + end + + # An `empty?` line is one that was originally left + # empty in the source code, while a "hidden" line + # is one that we've since marked as "invisible" + def empty? + @empty + end + + # Opposite of `empty?` (note: different than `visible?`) + def not_empty? + !empty? + end + + # Renders the given line + # + # Also allows us to represent source code as + # an array of code lines. + # + # When we have an array of code line elements + # calling `join` on the array will call `to_s` + # on each element, which essentially converts + # it back into it's original source string. + def to_s + line + end + + # When the code line is marked invisible + # we retain the original value of it's line + # this is useful for debugging and for + # showing extra context + # + # DisplayCodeWithLineNumbers will render + # all lines given to it, not just visible + # lines, it uses the original method to + # obtain them. + attr_reader :original + + # Comparison operator, needed for equality + # and sorting + def <=>(other) + index <=> other.index + end + + # [Not stable API] + # + # Lines that have a `on_ignored_nl` type token and NOT + # a `BEG` type seem to be a good proxy for the ability + # to join multiple lines into one. + # + # This predicate method is used to determine when those + # two criteria have been met. + # + # The one known case this doesn't handle is: + # + # Ripper.lex <<~EOM + # a && + # b || + # c + # EOM + # + # For some reason this introduces `on_ignore_newline` but with BEG type + def ignore_newline_not_beg? + @ignore_newline_not_beg + end + + # Determines if the given line has a trailing slash + # + # lines = CodeLine.from_source(<<~EOM) + # it "foo" \ + # EOM + # expect(lines.first.trailing_slash?).to eq(true) + # + def trailing_slash? + last = @lex.last + return false unless last + return false unless last.type == :on_sp + + last.token == TRAILING_SLASH + end + + # Endless method detection + # + # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab + # Detecting a "oneliner" seems to need a state machine. + # This can be done by looking mostly at the "state" (last value): + # + # ENDFN -> BEG (token = '=' ) -> END + # + private def set_kw_end + oneliner_count = 0 + in_oneliner_def = nil + + kw_count = 0 + end_count = 0 + + @ignore_newline_not_beg = false + @lex.each do |lex| + kw_count += 1 if lex.is_kw? + end_count += 1 if lex.is_end? + + if lex.type == :on_ignored_nl + @ignore_newline_not_beg = !lex.expr_beg? + end + + if in_oneliner_def.nil? + in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN) + elsif lex.state.allbits?(Ripper::EXPR_ENDFN) + # Continue + elsif lex.state.allbits?(Ripper::EXPR_BEG) + in_oneliner_def = :BODY if lex.token == "=" + elsif lex.state.allbits?(Ripper::EXPR_END) + # We found an endless method, count it + oneliner_count += 1 if in_oneliner_def == :BODY + + in_oneliner_def = nil + else + in_oneliner_def = nil + end + end + + kw_count -= oneliner_count + + @is_kw = (kw_count - end_count) > 0 + @is_end = (end_count - kw_count) > 0 + end + end +end diff --git a/lib/syntax_suggest/code_search.rb b/lib/syntax_suggest/code_search.rb new file mode 100644 index 00000000000000..2a86dfea90eb18 --- /dev/null +++ b/lib/syntax_suggest/code_search.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Searches code for a syntax error + # + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaninful + # + # This class handles the part. + # + # The bulk of the heavy lifting is done in: + # + # - CodeFrontier (Holds information for generating blocks and determining if we can stop searching) + # - ParseBlocksFromLine (Creates blocks into the frontier) + # - BlockExpand (Expands existing blocks to search more code) + # + # ## Syntax error detection + # + # When the frontier holds the syntax error, we can stop searching + # + # search = CodeSearch.new(<<~EOM) + # def dog + # def lol + # end + # EOM + # + # search.call + # + # search.invalid_blocks.map(&:to_s) # => + # # => ["def lol\n"] + # + class CodeSearch + private + + attr_reader :frontier + + public + + attr_reader :invalid_blocks, :record_dir, :code_lines + + def initialize(source, record_dir: DEFAULT_VALUE) + record_dir = if record_dir == DEFAULT_VALUE + ENV["SYNTAX_SUGGEST_RECORD_DIR"] || ENV["SYNTAX_SUGGEST_DEBUG"] ? "tmp" : nil + else + record_dir + end + + if record_dir + @record_dir = SyntaxSuggest.record_dir(record_dir) + @write_count = 0 + end + + @tick = 0 + @source = source + @name_tick = Hash.new { |hash, k| hash[k] = 0 } + @invalid_blocks = [] + + @code_lines = CleanDocument.new(source: source).call.lines + + @frontier = CodeFrontier.new(code_lines: @code_lines) + @block_expand = BlockExpand.new(code_lines: @code_lines) + @parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines) + end + + # Used for debugging + def record(block:, name: "record") + return unless @record_dir + @name_tick[name] += 1 + filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}-(#{block.starts_at}__#{block.ends_at}).txt" + if ENV["SYNTAX_SUGGEST_DEBUG"] + puts "\n\n==== #{filename} ====" + puts "\n```#{block.starts_at}..#{block.ends_at}" + puts block.to_s + puts "```" + puts " block indent: #{block.current_indent}" + end + @record_dir.join(filename).open(mode: "a") do |f| + document = DisplayCodeWithLineNumbers.new( + lines: @code_lines.select(&:visible?), + terminal: false, + highlight_lines: block.lines + ).call + + f.write(" Block lines: #{block.starts_at..block.ends_at} (#{name}) \n\n#{document}") + end + end + + def push(block, name:) + record(block: block, name: name) + + block.mark_invisible if block.valid? + frontier << block + end + + # Parses the most indented lines into blocks that are marked + # and added to the frontier + def create_blocks_from_untracked_lines + max_indent = frontier.next_indent_line&.indent + + while (line = frontier.next_indent_line) && (line.indent == max_indent) + @parse_blocks_from_indent_line.each_neighbor_block(frontier.next_indent_line) do |block| + push(block, name: "add") + end + end + end + + # Given an already existing block in the frontier, expand it to see + # if it contains our invalid syntax + def expand_existing + block = frontier.pop + return unless block + + record(block: block, name: "before-expand") + + block = @block_expand.call(block) + push(block, name: "expand") + end + + # Main search loop + def call + until frontier.holds_all_syntax_errors? + @tick += 1 + + if frontier.expand? + expand_existing + else + create_blocks_from_untracked_lines + end + end + + @invalid_blocks.concat(frontier.detect_invalid_blocks) + @invalid_blocks.sort_by! { |block| block.starts_at } + self + end + end +end diff --git a/lib/syntax_suggest/core_ext.rb b/lib/syntax_suggest/core_ext.rb new file mode 100644 index 00000000000000..40f5fe13759c77 --- /dev/null +++ b/lib/syntax_suggest/core_ext.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +# Ruby 3.2+ has a cleaner way to hook into Ruby that doesn't use `require` +if SyntaxError.method_defined?(:detailed_message) + module SyntaxSuggest + class MiniStringIO + def initialize(isatty: $stderr.isatty) + @string = +"" + @isatty = isatty + end + + attr_reader :isatty + def puts(value = $/, **) + @string << value + end + + attr_reader :string + end + end + + SyntaxError.prepend Module.new { + def detailed_message(highlight: true, syntax_suggest: true, **kwargs) + return super unless syntax_suggest + + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + message = super + file = if highlight + SyntaxSuggest::PathnameFromMessage.new(super(highlight: false, **kwargs)).call.name + else + SyntaxSuggest::PathnameFromMessage.new(message).call.name + end + + io = SyntaxSuggest::MiniStringIO.new + + if file + SyntaxSuggest.call( + io: io, + source: file.read, + filename: file, + terminal: highlight + ) + annotation = io.string + + annotation + message + else + message + end + rescue => e + if ENV["SYNTAX_SUGGEST_DEBUG"] + $stderr.warn(e.message) + $stderr.warn(e.backtrace) + end + + # Ignore internal errors + message + end + } +else + autoload :Pathname, "pathname" + + # Monkey patch kernel to ensure that all `require` calls call the same + # method + module Kernel + module_function + + alias_method :syntax_suggest_original_require, :require + alias_method :syntax_suggest_original_require_relative, :require_relative + alias_method :syntax_suggest_original_load, :load + + def load(file, wrap = false) + syntax_suggest_original_load(file) + rescue SyntaxError => e + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + SyntaxSuggest.handle_error(e) + end + + def require(file) + syntax_suggest_original_require(file) + rescue SyntaxError => e + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + SyntaxSuggest.handle_error(e) + end + + def require_relative(file) + if Pathname.new(file).absolute? + syntax_suggest_original_require file + else + relative_from = caller_locations(1..1).first + relative_from_path = relative_from.absolute_path || relative_from.path + syntax_suggest_original_require File.expand_path("../#{file}", relative_from_path) + end + rescue SyntaxError => e + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + SyntaxSuggest.handle_error(e) + end + end +end diff --git a/lib/syntax_suggest/display_code_with_line_numbers.rb b/lib/syntax_suggest/display_code_with_line_numbers.rb new file mode 100644 index 00000000000000..23f4b2d1eeace7 --- /dev/null +++ b/lib/syntax_suggest/display_code_with_line_numbers.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Outputs code with highlighted lines + # + # Whatever is passed to this class will be rendered + # even if it is "marked invisible" any filtering of + # output should be done before calling this class. + # + # DisplayCodeWithLineNumbers.new( + # lines: lines, + # highlight_lines: [lines[2], lines[3]] + # ).call + # # => + # 1 + # 2 def cat + # ❯ 3 Dir.chdir + # ❯ 4 end + # 5 end + # 6 + class DisplayCodeWithLineNumbers + TERMINAL_HIGHLIGHT = "\e[1;3m" # Bold, italics + TERMINAL_END = "\e[0m" + + def initialize(lines:, highlight_lines: [], terminal: false) + @lines = Array(lines).sort + @terminal = terminal + @highlight_line_hash = Array(highlight_lines).each_with_object({}) { |line, h| h[line] = true } + @digit_count = @lines.last&.line_number.to_s.length + end + + def call + @lines.map do |line| + format_line(line) + end.join + end + + private def format_line(code_line) + # Handle trailing slash lines + code_line.original.lines.map.with_index do |contents, i| + format( + empty: code_line.empty?, + number: (code_line.number + i).to_s, + contents: contents, + highlight: @highlight_line_hash[code_line] + ) + end.join + end + + private def format(contents:, number:, empty:, highlight: false) + string = +"" + string << if highlight + "❯ " + else + " " + end + + string << number.rjust(@digit_count).to_s + if empty + string << contents + else + string << " " + string << TERMINAL_HIGHLIGHT if @terminal && highlight + string << contents + string << TERMINAL_END if @terminal + end + string + end + end +end diff --git a/lib/syntax_suggest/display_invalid_blocks.rb b/lib/syntax_suggest/display_invalid_blocks.rb new file mode 100644 index 00000000000000..bc1143f4b0d5da --- /dev/null +++ b/lib/syntax_suggest/display_invalid_blocks.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +require_relative "capture_code_context" +require_relative "display_code_with_line_numbers" + +module SyntaxSuggest + # Used for formatting invalid blocks + class DisplayInvalidBlocks + attr_reader :filename + + def initialize(code_lines:, blocks:, io: $stderr, filename: nil, terminal: DEFAULT_VALUE) + @io = io + @blocks = Array(blocks) + @filename = filename + @code_lines = code_lines + + @terminal = terminal == DEFAULT_VALUE ? io.isatty : terminal + end + + def document_ok? + @blocks.none? { |b| !b.hidden? } + end + + def call + if document_ok? + @io.puts "Syntax OK" + return self + end + + if filename + @io.puts("--> #{filename}") + @io.puts + end + @blocks.each do |block| + display_block(block) + end + + self + end + + private def display_block(block) + # Build explanation + explain = ExplainSyntax.new( + code_lines: block.lines + ).call + + # Enhance code output + # Also handles several ambiguious cases + lines = CaptureCodeContext.new( + blocks: block, + code_lines: @code_lines + ).call + + # Build code output + document = DisplayCodeWithLineNumbers.new( + lines: lines, + terminal: @terminal, + highlight_lines: block.lines + ).call + + # Output syntax error explanation + explain.errors.each do |e| + @io.puts e + end + @io.puts + + # Output code + @io.puts(document) + end + + private def code_with_context + lines = CaptureCodeContext.new( + blocks: @blocks, + code_lines: @code_lines + ).call + + DisplayCodeWithLineNumbers.new( + lines: lines, + terminal: @terminal, + highlight_lines: @invalid_lines + ).call + end + end +end diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb new file mode 100644 index 00000000000000..142ed2e269ea14 --- /dev/null +++ b/lib/syntax_suggest/explain_syntax.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require_relative "left_right_lex_count" + +module SyntaxSuggest + # Explains syntax errors based on their source + # + # example: + # + # source = "def foo; puts 'lol'" # Note missing end + # explain ExplainSyntax.new( + # code_lines: CodeLine.from_source(source) + # ).call + # explain.errors.first + # # => "Unmatched keyword, missing `end' ?" + # + # When the error cannot be determined by lexical counting + # then ripper is run against the input and the raw ripper + # errors returned. + # + # Example: + # + # source = "1 * " # Note missing a second number + # explain ExplainSyntax.new( + # code_lines: CodeLine.from_source(source) + # ).call + # explain.errors.first + # # => "syntax error, unexpected end-of-input" + class ExplainSyntax + INVERSE = { + "{" => "}", + "}" => "{", + "[" => "]", + "]" => "[", + "(" => ")", + ")" => "(", + "|" => "|" + }.freeze + + def initialize(code_lines:) + @code_lines = code_lines + @left_right = LeftRightLexCount.new + @missing = nil + end + + def call + @code_lines.each do |line| + line.lex.each do |lex| + @left_right.count_lex(lex) + end + end + + self + end + + # Returns an array of missing elements + # + # For example this: + # + # ExplainSyntax.new(code_lines: lines).missing + # # => ["}"] + # + # Would indicate that the source is missing + # a `}` character in the source code + def missing + @missing ||= @left_right.missing + end + + # Converts a missing string to + # an human understandable explanation. + # + # Example: + # + # explain.why("}") + # # => "Unmatched `{', missing `}' ?" + # + def why(miss) + case miss + when "keyword" + "Unmatched `end', missing keyword (`do', `def`, `if`, etc.) ?" + when "end" + "Unmatched keyword, missing `end' ?" + else + inverse = INVERSE.fetch(miss) { + raise "Unknown explain syntax char or key: #{miss.inspect}" + } + "Unmatched `#{inverse}', missing `#{miss}' ?" + end + end + + # Returns an array of syntax error messages + # + # If no missing pairs are found it falls back + # on the original ripper error messages + def errors + if missing.empty? + return RipperErrors.new(@code_lines.map(&:original).join).call.errors + end + + missing.map { |miss| why(miss) } + end + end +end diff --git a/lib/syntax_suggest/left_right_lex_count.rb b/lib/syntax_suggest/left_right_lex_count.rb new file mode 100644 index 00000000000000..6fcae7482b83b0 --- /dev/null +++ b/lib/syntax_suggest/left_right_lex_count.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Find mis-matched syntax based on lexical count + # + # Used for detecting missing pairs of elements + # each keyword needs an end, each '{' needs a '}' + # etc. + # + # Example: + # + # left_right = LeftRightLexCount.new + # left_right.count_kw + # left_right.missing.first + # # => "end" + # + # left_right = LeftRightLexCount.new + # source = "{ a: b, c: d" # Note missing '}' + # LexAll.new(source: source).each do |lex| + # left_right.count_lex(lex) + # end + # left_right.missing.first + # # => "}" + class LeftRightLexCount + def initialize + @kw_count = 0 + @end_count = 0 + + @count_for_char = { + "{" => 0, + "}" => 0, + "[" => 0, + "]" => 0, + "(" => 0, + ")" => 0, + "|" => 0 + } + end + + def count_kw + @kw_count += 1 + end + + def count_end + @end_count += 1 + end + + # Count source code characters + # + # Example: + # + # left_right = LeftRightLexCount.new + # left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG)) + # left_right.count_for_char("{") + # # => 1 + # left_right.count_for_char("}") + # # => 0 + def count_lex(lex) + case lex.type + when :on_tstring_content + # ^^^ + # Means it's a string or a symbol `"{"` rather than being + # part of a data structure (like a hash) `{ a: b }` + # ignore it. + when :on_words_beg, :on_symbos_beg, :on_qwords_beg, + :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg + # ^^^ + # Handle shorthand syntaxes like `%Q{ i am a string }` + # + # The start token will be the full thing `%Q{` but we + # need to count it as if it's a `{`. Any token + # can be used + char = lex.token[-1] + @count_for_char[char] += 1 if @count_for_char.key?(char) + when :on_embexpr_beg + # ^^^ + # Embedded string expressions like `"#{foo} <-embed"` + # are parsed with chars: + # + # `#{` as :on_embexpr_beg + # `}` as :on_embexpr_end + # + # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end + # because sometimes the lexer thinks something is an embed + # string end, when it is not like `lol = }` (no clue why). + # + # When we see `#{` count it as a `{` or we will + # have a mis-match count. + # + case lex.token + when "\#{" + @count_for_char["{"] += 1 + end + else + @end_count += 1 if lex.is_end? + @kw_count += 1 if lex.is_kw? + @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token) + end + end + + def count_for_char(char) + @count_for_char[char] + end + + # Returns an array of missing syntax characters + # or `"end"` or `"keyword"` + # + # left_right.missing + # # => ["}"] + def missing + out = missing_pairs + out << missing_pipe + out << missing_keyword_end + out.compact! + out + end + + PAIRS = { + "{" => "}", + "[" => "]", + "(" => ")" + }.freeze + + # Opening characters like `{` need closing characters # like `}`. + # + # When a mis-match count is detected, suggest the + # missing member. + # + # For example if there are 3 `}` and only two `{` + # return `"{"` + private def missing_pairs + PAIRS.map do |(left, right)| + case @count_for_char[left] <=> @count_for_char[right] + when 1 + right + when 0 + nil + when -1 + left + end + end + end + + # Keywords need ends and ends need keywords + # + # If we have more keywords, there's a missing `end` + # if we have more `end`-s, there's a missing keyword + private def missing_keyword_end + case @kw_count <=> @end_count + when 1 + "end" + when 0 + nil + when -1 + "keyword" + end + end + + # Pipes come in pairs. + # If there's an odd number of pipes then we + # are missing one + private def missing_pipe + if @count_for_char["|"].odd? + "|" + end + end + end +end diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb new file mode 100644 index 00000000000000..132cba9f5d3b4c --- /dev/null +++ b/lib/syntax_suggest/lex_all.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Ripper.lex is not guaranteed to lex the entire source document + # + # lex = LexAll.new(source: source) + # lex.each do |value| + # puts value.line + # end + class LexAll + include Enumerable + + def initialize(source:, source_lines: nil) + @lex = Ripper::Lexer.new(source, "-", 1).parse.sort_by(&:pos) + lineno = @lex.last.pos.first + 1 + source_lines ||= source.lines + last_lineno = source_lines.length + + until lineno >= last_lineno + lines = source_lines[lineno..-1] + + @lex.concat( + Ripper::Lexer.new(lines.join, "-", lineno + 1).parse.sort_by(&:pos) + ) + lineno = @lex.last.pos.first + 1 + end + + last_lex = nil + @lex.map! { |elem| + last_lex = LexValue.new(elem.pos.first, elem.event, elem.tok, elem.state, last_lex) + } + end + + def to_a + @lex + end + + def each + return @lex.each unless block_given? + @lex.each do |x| + yield x + end + end + + def [](index) + @lex[index] + end + + def last + @lex.last + end + end +end + +require_relative "lex_value" diff --git a/lib/syntax_suggest/lex_value.rb b/lib/syntax_suggest/lex_value.rb new file mode 100644 index 00000000000000..008cc105b5398c --- /dev/null +++ b/lib/syntax_suggest/lex_value.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Value object for accessing lex values + # + # This lex: + # + # [1, 0], :on_ident, "describe", CMDARG + # + # Would translate into: + # + # lex.line # => 1 + # lex.type # => :on_indent + # lex.token # => "describe" + class LexValue + attr_reader :line, :type, :token, :state + + def initialize(line, type, token, state, last_lex = nil) + @line = line + @type = type + @token = token + @state = state + + set_kw_end(last_lex) + end + + private def set_kw_end(last_lex) + @is_end = false + @is_kw = false + return if type != :on_kw + # + return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953 + + case token + when "if", "unless", "while", "until" + # Only count if/unless when it's not a "trailing" if/unless + # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375 + @is_kw = true unless expr_label? + when "def", "case", "for", "begin", "class", "module", "do" + @is_kw = true + when "end" + @is_end = true + end + end + + def fname? + state.allbits?(Ripper::EXPR_FNAME) + end + + def ignore_newline? + type == :on_ignored_nl + end + + def is_end? + @is_end + end + + def is_kw? + @is_kw + end + + def expr_beg? + state.anybits?(Ripper::EXPR_BEG) + end + + def expr_label? + state.allbits?(Ripper::EXPR_LABEL) + end + end +end diff --git a/lib/syntax_suggest/parse_blocks_from_indent_line.rb b/lib/syntax_suggest/parse_blocks_from_indent_line.rb new file mode 100644 index 00000000000000..d1071732fe2a7a --- /dev/null +++ b/lib/syntax_suggest/parse_blocks_from_indent_line.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # This class is responsible for generating initial code blocks + # that will then later be expanded. + # + # The biggest concern when guessing code blocks, is accidentally + # grabbing one that contains only an "end". In this example: + # + # def dog + # begonn # mispelled `begin` + # puts "bark" + # end + # end + # + # The following lines would be matched (from bottom to top): + # + # 1) end + # + # 2) puts "bark" + # end + # + # 3) begonn + # puts "bark" + # end + # + # At this point it has no where else to expand, and it will yield this inner + # code as a block + class ParseBlocksFromIndentLine + attr_reader :code_lines + + def initialize(code_lines:) + @code_lines = code_lines + end + + # Builds blocks from bottom up + def each_neighbor_block(target_line) + scan = AroundBlockScan.new(code_lines: code_lines, block: CodeBlock.new(lines: target_line)) + .skip(:empty?) + .skip(:hidden?) + .scan_while { |line| line.indent >= target_line.indent } + + neighbors = scan.code_block.lines + + block = CodeBlock.new(lines: neighbors) + if neighbors.length <= 2 || block.valid? + yield block + else + until neighbors.empty? + lines = [neighbors.pop] + while (block = CodeBlock.new(lines: lines)) && block.invalid? && neighbors.any? + lines.prepend neighbors.pop + end + + yield block if block + end + end + end + end +end diff --git a/lib/syntax_suggest/pathname_from_message.rb b/lib/syntax_suggest/pathname_from_message.rb new file mode 100644 index 00000000000000..ea1a90856e90ed --- /dev/null +++ b/lib/syntax_suggest/pathname_from_message.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Converts a SyntaxError message to a path + # + # Handles the case where the filename has a colon in it + # such as on a windows file system: https://github.com/zombocom/syntax_suggest/issues/111 + # + # Example: + # + # message = "/tmp/scratch:2:in `require_relative': /private/tmp/bad.rb:1: syntax error, unexpected `end' (SyntaxError)" + # puts PathnameFromMessage.new(message).call.name + # # => "/tmp/scratch.rb" + # + class PathnameFromMessage + EVAL_RE = /^\(eval\):\d+/ + STREAMING_RE = /^-:\d+/ + attr_reader :name + + def initialize(message, io: $stderr) + @line = message.lines.first + @parts = @line.split(":") + @guess = [] + @name = nil + @io = io + end + + def call + if skip_missing_file_name? + if ENV["SYNTAX_SUGGEST_DEBUG"] + @io.puts "SyntaxSuggest: Could not find filename from #{@line.inspect}" + end + else + until stop? + @guess << @parts.shift + @name = Pathname(@guess.join(":")) + end + + if @parts.empty? + @io.puts "SyntaxSuggest: Could not find filename from #{@line.inspect}" + @name = nil + end + end + + self + end + + def stop? + return true if @parts.empty? + return false if @guess.empty? + + @name&.exist? + end + + def skip_missing_file_name? + @line.match?(EVAL_RE) || @line.match?(STREAMING_RE) + end + end +end diff --git a/lib/syntax_suggest/priority_engulf_queue.rb b/lib/syntax_suggest/priority_engulf_queue.rb new file mode 100644 index 00000000000000..2d1e9b1b631b4e --- /dev/null +++ b/lib/syntax_suggest/priority_engulf_queue.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Keeps track of what elements are in the queue in + # priority and also ensures that when one element + # engulfs/covers/eats another that the larger element + # evicts the smaller element + class PriorityEngulfQueue + def initialize + @queue = PriorityQueue.new + end + + def to_a + @queue.to_a + end + + def empty? + @queue.empty? + end + + def length + @queue.length + end + + def peek + @queue.peek + end + + def pop + @queue.pop + end + + def push(block) + prune_engulf(block) + @queue << block + flush_deleted + + self + end + + private def flush_deleted + while @queue&.peek&.deleted? + @queue.pop + end + end + + private def prune_engulf(block) + # If we're about to pop off the same block, we can skip deleting + # things from the frontier this iteration since we'll get it + # on the next iteration + return if @queue.peek && (block <=> @queue.peek) == 1 + + if block.starts_at != block.ends_at # A block of size 1 cannot engulf another + @queue.to_a.each { |b| + if b.starts_at >= block.starts_at && b.ends_at <= block.ends_at + b.delete + true + end + } + end + end + end +end diff --git a/lib/syntax_suggest/priority_queue.rb b/lib/syntax_suggest/priority_queue.rb new file mode 100644 index 00000000000000..1abda2a444d0cd --- /dev/null +++ b/lib/syntax_suggest/priority_queue.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Holds elements in a priority heap on insert + # + # Instead of constantly calling `sort!`, put + # the element where it belongs the first time + # around + # + # Example: + # + # queue = PriorityQueue.new + # queue << 33 + # queue << 44 + # queue << 1 + # + # puts queue.peek # => 44 + # + class PriorityQueue + attr_reader :elements + + def initialize + @elements = [] + end + + def <<(element) + @elements << element + bubble_up(last_index, element) + end + + def pop + exchange(0, last_index) + max = @elements.pop + bubble_down(0) + max + end + + def length + @elements.length + end + + def empty? + @elements.empty? + end + + def peek + @elements.first + end + + def to_a + @elements + end + + # Used for testing, extremely not performant + def sorted + out = [] + elements = @elements.dup + while (element = pop) + out << element + end + @elements = elements + out.reverse + end + + private def last_index + @elements.size - 1 + end + + private def bubble_up(index, element) + return if index <= 0 + + parent_index = (index - 1) / 2 + parent = @elements[parent_index] + + return if (parent <=> element) >= 0 + + exchange(index, parent_index) + bubble_up(parent_index, element) + end + + private def bubble_down(index) + child_index = (index * 2) + 1 + + return if child_index > last_index + + not_the_last_element = child_index < last_index + left_element = @elements[child_index] + right_element = @elements[child_index + 1] + + child_index += 1 if not_the_last_element && (right_element <=> left_element) == 1 + + return if (@elements[index] <=> @elements[child_index]) >= 0 + + exchange(index, child_index) + bubble_down(child_index) + end + + def exchange(source, target) + a = @elements[source] + b = @elements[target] + @elements[source] = b + @elements[target] = a + end + end +end diff --git a/lib/syntax_suggest/ripper_errors.rb b/lib/syntax_suggest/ripper_errors.rb new file mode 100644 index 00000000000000..48eb206e4874b7 --- /dev/null +++ b/lib/syntax_suggest/ripper_errors.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Capture parse errors from ripper + # + # Example: + # + # puts RipperErrors.new(" def foo").call.errors + # # => ["syntax error, unexpected end-of-input, expecting ';' or '\\n'"] + class RipperErrors < Ripper + attr_reader :errors + + # Comes from ripper, called + # on every parse error, msg + # is a string + def on_parse_error(msg) + @errors ||= [] + @errors << msg + end + + alias_method :on_alias_error, :on_parse_error + alias_method :on_assign_error, :on_parse_error + alias_method :on_class_name_error, :on_parse_error + alias_method :on_param_error, :on_parse_error + alias_method :compile_error, :on_parse_error + + def call + @run_once ||= begin + @errors = [] + parse + true + end + self + end + end +end diff --git a/lib/syntax_suggest/syntax_suggest.gemspec b/lib/syntax_suggest/syntax_suggest.gemspec new file mode 100644 index 00000000000000..acf9be7710150e --- /dev/null +++ b/lib/syntax_suggest/syntax_suggest.gemspec @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +begin + require_relative "lib/syntax_suggest/version" +rescue LoadError # Fallback to load version file in ruby core repository + require_relative "version" +end + +Gem::Specification.new do |spec| + spec.name = "syntax_suggest" + spec.version = SyntaxSuggest::VERSION + spec.authors = ["schneems"] + spec.email = ["richard.schneeman+foo@gmail.com"] + + spec.summary = "Find syntax errors in your source in a snap" + spec.description = 'When you get an "unexpected end" in your syntax this gem helps you find it' + spec.homepage = "https://github.com/zombocom/syntax_suggest.git" + spec.license = "MIT" + spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0") + + spec.metadata["homepage_uri"] = spec.homepage + spec.metadata["source_code_uri"] = "https://github.com/zombocom/syntax_suggest.git" + + # Specify which files should be added to the gem when it is released. + # The `git ls-files -z` loads the files in the RubyGem that have been added into git. + spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do + `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features|assets)/}) } + end + spec.bindir = "exe" + spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.require_paths = ["lib"] +end diff --git a/lib/syntax_suggest/unvisited_lines.rb b/lib/syntax_suggest/unvisited_lines.rb new file mode 100644 index 00000000000000..32808db63402ae --- /dev/null +++ b/lib/syntax_suggest/unvisited_lines.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Tracks which lines various code blocks have expanded to + # and which are still unexplored + class UnvisitedLines + def initialize(code_lines:) + @unvisited = code_lines.sort_by(&:indent_index) + @visited_lines = {} + @visited_lines.compare_by_identity + end + + def empty? + @unvisited.empty? + end + + def peek + @unvisited.last + end + + def pop + @unvisited.pop + end + + def visit_block(block) + block.lines.each do |line| + next if @visited_lines[line] + @visited_lines[line] = true + end + + while @visited_lines[@unvisited.last] + @unvisited.pop + end + end + end +end diff --git a/lib/syntax_suggest/version.rb b/lib/syntax_suggest/version.rb new file mode 100644 index 00000000000000..a5176dcf2e732e --- /dev/null +++ b/lib/syntax_suggest/version.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +module SyntaxSuggest + VERSION = "0.0.1" +end From 61ab06fe8ade93624c0628bd595df69c3c9987e0 Mon Sep 17 00:00:00 2001 From: git Date: Fri, 19 Aug 2022 01:03:07 +0000 Subject: [PATCH 163/546] Update default gems list at 490af8dbdb66263f29d0b4e43752fb [ci skip] --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 5c4fadc7de295e..d7ce50f399e1a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -177,6 +177,7 @@ Note: We're only listing outstanding class updates. * reline 0.3.1 * securerandom 0.2.0 * stringio 3.0.3 + * syntax_suggest 0.0.1 * timeout 0.3.0 * The following bundled gems are updated. * minitest 5.16.3 From bd0fe26b81a62f7ee79c3b125c118d54ee81728a Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 19 Aug 2022 10:20:13 +0900 Subject: [PATCH 164/546] [DOC] Fix "military timezone" Not only `J` is called military timezone. --- doc/time/in.rdoc | 2 +- doc/time/zone_and_in.rdoc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/time/in.rdoc b/doc/time/in.rdoc index f47db76a353f4b..870982b0c2e4d7 100644 --- a/doc/time/in.rdoc +++ b/doc/time/in.rdoc @@ -1,7 +1,7 @@ - in: zone: a timezone _zone_, which may be: - A string offset from UTC. - A single letter offset from UTC, in the range 'A'..'Z', - 'J' (the so-called military timezone) excluded. + 'J' excluded (the so-called military timezone). - An integer number of seconds. - A timezone object; see {Timezone Argument}[#class-Time-label-Timezone+Argument] for details. diff --git a/doc/time/zone_and_in.rdoc b/doc/time/zone_and_in.rdoc index e09e22874beca1..5bdfaacd4c4bb3 100644 --- a/doc/time/zone_and_in.rdoc +++ b/doc/time/zone_and_in.rdoc @@ -1,7 +1,7 @@ - +zone+: a timezone, which may be: - A string offset from UTC. - A single letter offset from UTC, in the range 'A'..'Z', - 'J' (the so-called military timezone) excluded. + 'J' excluded (the so-called military timezone). - An integer number of seconds. - A timezone object; see {Timezone Argument}[#class-Time-label-Timezone+Argument] for details. From 17d0e5bee7f15ad4e32266afc194cf6c65976236 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 19 Aug 2022 13:20:17 +0900 Subject: [PATCH 165/546] syntax_suggest moved to under the ruby organization. --- tool/sync_default_gems.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index ae3fcbce6152f1..7a5e190c2d000e 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -73,7 +73,7 @@ pathname: "ruby/pathname", digest: "ruby/digest", error_highlight: "ruby/error_highlight", - syntax_suggest: "zombocom/syntax_suggest", + syntax_suggest: "ruby/syntax_suggest", un: "ruby/un", win32ole: "ruby/win32ole", } From 6bcb473d9cf00c113587a9fbb7605a2fe110862c Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Fri, 19 Aug 2022 18:33:33 +0900 Subject: [PATCH 166/546] [ruby/error_highlight] Apply ErrorHighlight::CoreExt to TypeError and ArgumentError https://github.com/ruby/error_highlight/commit/defcaf1beb --- lib/error_highlight/core_ext.rb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/error_highlight/core_ext.rb b/lib/error_highlight/core_ext.rb index 130f9ef832e478..00d567164846c8 100644 --- a/lib/error_highlight/core_ext.rb +++ b/lib/error_highlight/core_ext.rb @@ -37,9 +37,6 @@ def to_s end NameError.prepend(CoreExt) - - # The extension for TypeError/ArgumentError is temporarily disabled due to many test failures - - #TypeError.prepend(CoreExt) - #ArgumentError.prepend(CoreExt) + TypeError.prepend(CoreExt) + ArgumentError.prepend(CoreExt) end From 4177f60eedd71b846d9a86889fd46071ecdb0158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ka=C3=ADque=20Kandy=20Koga?= Date: Thu, 18 Aug 2022 14:54:36 -0300 Subject: [PATCH 167/546] Write interface instead of interfact --- misc/lldb_rb/commands/command_template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/lldb_rb/commands/command_template.py b/misc/lldb_rb/commands/command_template.py index 8d46f141591f2e..21014a993e4d71 100644 --- a/misc/lldb_rb/commands/command_template.py +++ b/misc/lldb_rb/commands/command_template.py @@ -4,7 +4,7 @@ # 2. Rename the class to something descriptive that ends with Command. # 3. Change the program variable to be a descriptive command name # 4. Ensure you are inheriting from RbBaseCommand or another command that -# implements the same interfact +# implements the same interface import lldb From ce384ef5a95b809f248e089c1608e60753dabe45 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 20 Aug 2022 00:16:43 +0900 Subject: [PATCH 168/546] [Bug #18955] Check length of argument for `%c` in proper encoding --- sprintf.c | 5 +++-- test/ruby/test_m17n.rb | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sprintf.c b/sprintf.c index b16ab3f581c196..5f7227e6197422 100644 --- a/sprintf.c +++ b/sprintf.c @@ -441,10 +441,11 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { - if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { + rb_encoding *valenc = rb_enc_get(tmp); + if (rb_enc_strlen(RSTRING_PTR(tmp), RSTRING_END(tmp), valenc) != 1) { rb_raise(rb_eArgError, "%%c requires a character"); } - c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); + c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, valenc); RB_GC_GUARD(tmp); } else { diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index c00bf59e1875c3..2c6fcee00420fb 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -892,6 +892,8 @@ def test_sprintf_c assert_raise(Encoding::CompatibilityError) { "%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")] } + + "%c" % "\u3042".encode('Windows-31J') end def test_sprintf_p From 1ef49de83483e6f78bfe9c795a473ccfb29db150 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 20 Aug 2022 01:04:02 +0900 Subject: [PATCH 169/546] [Bug #18955] format single character for `%c` --- spec/ruby/core/kernel/shared/sprintf.rb | 28 ++++++++++++++++++------- spec/ruby/core/string/modulo_spec.rb | 12 +++++++++-- sprintf.c | 11 +++++----- test/ruby/test_m17n.rb | 2 +- test/ruby/test_sprintf.rb | 3 ++- 5 files changed, 38 insertions(+), 18 deletions(-) diff --git a/spec/ruby/core/kernel/shared/sprintf.rb b/spec/ruby/core/kernel/shared/sprintf.rb index 84d472b0d1adaa..59f5ab003620a9 100644 --- a/spec/ruby/core/kernel/shared/sprintf.rb +++ b/spec/ruby/core/kernel/shared/sprintf.rb @@ -289,16 +289,28 @@ def obj.to_i; 10; end @method.call("%c", "a").should == "a" end - it "raises ArgumentError if argument is a string of several characters" do - -> { - @method.call("%c", "abc") - }.should raise_error(ArgumentError) + ruby_version_is ""..."3.2" do + it "raises ArgumentError if argument is a string of several characters" do + -> { + @method.call("%c", "abc") + }.should raise_error(ArgumentError) + end + + it "raises ArgumentError if argument is an empty string" do + -> { + @method.call("%c", "") + }.should raise_error(ArgumentError) + end end - it "raises ArgumentError if argument is an empty string" do - -> { - @method.call("%c", "") - }.should raise_error(ArgumentError) + ruby_version_is "3.2" do + it "displays only the first character if argument is a string of several characters" do + @method.call("%c", "abc").should == "a" + end + + it "displays no characters if argument is an empty string" do + @method.call("%c", "").should == "" + end end it "supports Unicode characters" do diff --git a/spec/ruby/core/string/modulo_spec.rb b/spec/ruby/core/string/modulo_spec.rb index 99c1694417de3a..bf96a8287462dc 100644 --- a/spec/ruby/core/string/modulo_spec.rb +++ b/spec/ruby/core/string/modulo_spec.rb @@ -368,8 +368,16 @@ def universal.to_f() 0.0 end ("%c" % 'A').should == "A" end - it "raises an exception for multiple character strings as argument for %c" do - -> { "%c" % 'AA' }.should raise_error(ArgumentError) + ruby_version_is ""..."3.2" do + it "raises an exception for multiple character strings as argument for %c" do + -> { "%c" % 'AA' }.should raise_error(ArgumentError) + end + end + + ruby_version_is "3.2" do + it "supports only the first character as argument for %c" do + ("%c" % 'AA').should == "A" + end end it "calls to_str on argument for %c formats" do diff --git a/sprintf.c b/sprintf.c index 5f7227e6197422..22edf398febabf 100644 --- a/sprintf.c +++ b/sprintf.c @@ -441,12 +441,10 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { - rb_encoding *valenc = rb_enc_get(tmp); - if (rb_enc_strlen(RSTRING_PTR(tmp), RSTRING_END(tmp), valenc) != 1) { - rb_raise(rb_eArgError, "%%c requires a character"); - } - c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, valenc); - RB_GC_GUARD(tmp); + flags |= FPREC; + prec = 1; + str = tmp; + goto format_s1; } else { c = NUM2INT(val); @@ -488,6 +486,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) else { str = rb_obj_as_string(arg); } + format_s1: len = RSTRING_LEN(str); rb_str_set_len(result, blen); if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 2c6fcee00420fb..a50507a528b6c1 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -893,7 +893,7 @@ def test_sprintf_c "%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")] } - "%c" % "\u3042".encode('Windows-31J') + assert_equal("\u3042".encode('Windows-31J'), "%c" % "\u3042\u3044".encode('Windows-31J')) end def test_sprintf_p diff --git a/test/ruby/test_sprintf.rb b/test/ruby/test_sprintf.rb index b05f4f3e448506..618e67264a7bc9 100644 --- a/test/ruby/test_sprintf.rb +++ b/test/ruby/test_sprintf.rb @@ -362,7 +362,8 @@ def test_skip def test_char assert_equal("a", sprintf("%c", 97)) assert_equal("a", sprintf("%c", ?a)) - assert_raise(ArgumentError) { sprintf("%c", sprintf("%c%c", ?a, ?a)) } + assert_equal("a", sprintf("%c", "a")) + assert_equal("a", sprintf("%c", sprintf("%c%c", ?a, ?a))) assert_equal(" " * (BSIZ - 1) + "a", sprintf(" " * (BSIZ - 1) + "%c", ?a)) assert_equal(" " * (BSIZ - 1) + "a", sprintf(" " * (BSIZ - 1) + "%-1c", ?a)) assert_equal(" " * BSIZ + "a", sprintf("%#{ BSIZ + 1 }c", ?a)) From 745de85a7eebed9fef1f2f951d378490dd331952 Mon Sep 17 00:00:00 2001 From: git Date: Sat, 20 Aug 2022 03:57:35 +0900 Subject: [PATCH 170/546] * 2022-08-20 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index b551f340ceb21f..b5ed9d1d253fa3 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 19 +#define RUBY_RELEASE_DAY 20 #include "ruby/version.h" #include "ruby/internal/abi.h" From fc4acf8cae82e5196186d3278d831f2438479d91 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 19 Aug 2022 14:42:50 -0700 Subject: [PATCH 171/546] Make benchmark indentation consistent Related to https://github.com/Shopify/yjit-bench/pull/109 --- benchmark/so_nbody.rb | 58 +++++++++++++-------------- benchmark/vm_ivar_set_on_instance.yml | 52 ++++++++++++------------ 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/benchmark/so_nbody.rb b/benchmark/so_nbody.rb index d6c5bb9e61f12a..9884fc4edc6585 100644 --- a/benchmark/so_nbody.rb +++ b/benchmark/so_nbody.rb @@ -12,38 +12,38 @@ def _puts *args end class Planet - attr_accessor :x, :y, :z, :vx, :vy, :vz, :mass + attr_accessor :x, :y, :z, :vx, :vy, :vz, :mass - def initialize(x, y, z, vx, vy, vz, mass) - @x, @y, @z = x, y, z - @vx, @vy, @vz = vx * DAYS_PER_YEAR, vy * DAYS_PER_YEAR, vz * DAYS_PER_YEAR - @mass = mass * SOLAR_MASS - end - - def move_from_i(bodies, nbodies, dt, i) - while i < nbodies - b2 = bodies[i] - dx = @x - b2.x - dy = @y - b2.y - dz = @z - b2.z - - distance = Math.sqrt(dx * dx + dy * dy + dz * dz) - mag = dt / (distance * distance * distance) - b_mass_mag, b2_mass_mag = @mass * mag, b2.mass * mag - - @vx -= dx * b2_mass_mag - @vy -= dy * b2_mass_mag - @vz -= dz * b2_mass_mag - b2.vx += dx * b_mass_mag - b2.vy += dy * b_mass_mag - b2.vz += dz * b_mass_mag - i += 1 + def initialize(x, y, z, vx, vy, vz, mass) + @x, @y, @z = x, y, z + @vx, @vy, @vz = vx * DAYS_PER_YEAR, vy * DAYS_PER_YEAR, vz * DAYS_PER_YEAR + @mass = mass * SOLAR_MASS end - @x += dt * @vx - @y += dt * @vy - @z += dt * @vz - end + def move_from_i(bodies, nbodies, dt, i) + while i < nbodies + b2 = bodies[i] + dx = @x - b2.x + dy = @y - b2.y + dz = @z - b2.z + + distance = Math.sqrt(dx * dx + dy * dy + dz * dz) + mag = dt / (distance * distance * distance) + b_mass_mag, b2_mass_mag = @mass * mag, b2.mass * mag + + @vx -= dx * b2_mass_mag + @vy -= dy * b2_mass_mag + @vz -= dz * b2_mass_mag + b2.vx += dx * b_mass_mag + b2.vy += dy * b_mass_mag + b2.vz += dz * b_mass_mag + i += 1 + end + + @x += dt * @vx + @y += dt * @vy + @z += dt * @vz + end end def energy(bodies) diff --git a/benchmark/vm_ivar_set_on_instance.yml b/benchmark/vm_ivar_set_on_instance.yml index f0d5c169648af0..91857b7742e0f2 100644 --- a/benchmark/vm_ivar_set_on_instance.yml +++ b/benchmark/vm_ivar_set_on_instance.yml @@ -1,33 +1,33 @@ prelude: | - class TheClass - def initialize - @v0 = 1 - @v1 = 2 - @v3 = 3 - @levar = 1 - end + class TheClass + def initialize + @v0 = 1 + @v1 = 2 + @v3 = 3 + @levar = 1 + end - def set_value_loop - # 1M - i = 0 - while i < 1000000 - # 10 times to de-emphasize loop overhead - @levar = i - @levar = i - @levar = i - @levar = i - @levar = i - @levar = i - @levar = i - @levar = i - @levar = i - @levar = i - i += 1 - end - end + def set_value_loop + # 1M + i = 0 + while i < 1000000 + # 10 times to de-emphasize loop overhead + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + i += 1 + end end + end - obj = TheClass.new + obj = TheClass.new benchmark: vm_ivar_set_on_instance: | From 8f4a53d0517a99e57060211a3efe2d7a9d5d06b2 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 19 Aug 2022 14:45:51 -0700 Subject: [PATCH 172/546] Ignore fc4acf8cae on git blame --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index c05a98e306c73d..6c5eac5a0f8026 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -18,3 +18,6 @@ d7ffd3fea402239b16833cc434404a7af82d44f3 f28287d34c03f472ffe90ea262bdde9affd4b965 0d842fecb4f75ab3b1d4097ebdb8e88f51558041 4ba2c66761d6a293abdfba409241d31063cefd62 + +# Make benchmark indentation consistent +fc4acf8cae82e5196186d3278d831f2438479d91 From d2483393cbcb4dcfa0000fa8166bb7fa7ed9f7b4 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 20 Aug 2022 01:43:04 +0900 Subject: [PATCH 173/546] [Bug #18956] Negative codepoints are invalid characters --- sprintf.c | 4 ++-- test/ruby/test_sprintf.rb | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sprintf.c b/sprintf.c index 22edf398febabf..f7ebe82e2cb3d6 100644 --- a/sprintf.c +++ b/sprintf.c @@ -447,8 +447,8 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) goto format_s1; } else { - c = NUM2INT(val); - n = rb_enc_codelen(c, enc); + n = NUM2INT(val); + if (n >= 0) n = rb_enc_codelen((c = n), enc); } if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); diff --git a/test/ruby/test_sprintf.rb b/test/ruby/test_sprintf.rb index 618e67264a7bc9..803399fdb3c940 100644 --- a/test/ruby/test_sprintf.rb +++ b/test/ruby/test_sprintf.rb @@ -368,6 +368,7 @@ def test_char assert_equal(" " * (BSIZ - 1) + "a", sprintf(" " * (BSIZ - 1) + "%-1c", ?a)) assert_equal(" " * BSIZ + "a", sprintf("%#{ BSIZ + 1 }c", ?a)) assert_equal("a" + " " * BSIZ, sprintf("%-#{ BSIZ + 1 }c", ?a)) + assert_raise(ArgumentError) { sprintf("%c", -1) } end def test_string From 43e8d9a0509e1961c406fedb2c168a6ec2359fdc Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 19 Aug 2022 23:39:52 +0900 Subject: [PATCH 174/546] Check if encoding capable object before check if ASCII compatible --- string.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/string.c b/string.c index 6f211b200524bc..cd4a59385653c6 100644 --- a/string.c +++ b/string.c @@ -2526,6 +2526,9 @@ void rb_must_asciicompat(VALUE str) { rb_encoding *enc = rb_enc_get(str); + if (!enc) { + rb_raise(rb_eTypeError, "not encoding capable object"); + } if (!rb_enc_asciicompat(enc)) { rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc)); } From 1a2f99275be28fb0e8ff4cfc0165966e99898d70 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 19 Aug 2022 23:53:33 +0900 Subject: [PATCH 175/546] [Bug #18958] format string must be ASCII compatible --- sprintf.c | 1 + test/ruby/test_m17n.rb | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/sprintf.c b/sprintf.c index f7ebe82e2cb3d6..22323265b3e56d 100644 --- a/sprintf.c +++ b/sprintf.c @@ -250,6 +250,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) --argv; StringValue(fmt); enc = rb_enc_get(fmt); + rb_must_asciicompat(fmt); orig = fmt; fmt = rb_str_tmp_frozen_acquire(fmt); p = RSTRING_PTR(fmt); diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index a50507a528b6c1..da04ae7fa72029 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -898,6 +898,16 @@ def test_sprintf_c def test_sprintf_p Encoding.list.each do |e| + unless e.ascii_compatible? + format = e.dummy? ? "%p".force_encoding(e) : "%p".encode(e) + assert_raise(Encoding::CompatibilityError) do + sprintf(format, nil) + end + assert_raise(Encoding::CompatibilityError) do + format % nil + end + next + end format = "%p".force_encoding(e) ['', 'a', "\xC2\xA1", "\x00"].each do |s| s.force_encoding(e) From 6f3857f6a7b3cd6bd7e62e4efdbb1b841544e053 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Fri, 18 Jun 2021 16:05:15 -0700 Subject: [PATCH 176/546] Support Encoding::Converter newline: :lf and :lf_newline options Previously, newline: :lf was accepted but ignored. Where it should have been used was commented out code that didn't work, but unlike all other invalid values, using newline: :lf did not raise an error. This adds support for newline: :lf and :lf_newline, for consistency with newline: :cr and :cr_newline. This is basically the same as universal_newline, except that it only affects writing and not reading due to RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK. Add tests for the File.open :newline option while here. Fixes [Bug #12436] --- enc/trans/newline.trans | 20 +++++++++++ include/ruby/internal/encoding/transcode.h | 18 ++++++---- test/ruby/test_file.rb | 42 ++++++++++++++++++++++ test/ruby/test_transcode.rb | 2 ++ transcode.c | 30 +++++++++++++++- 5 files changed, 104 insertions(+), 8 deletions(-) diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index 9e763407f9ebce..95e082f5bd0b96 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -17,10 +17,16 @@ map_cr["0a"] = "0d" transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") + + map_normalize = {} + map_normalize["{00-ff}"] = :func_so + + transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline") %> <%= transcode_generated_code %> +#define lf_newline universal_newline #define STATE (sp[0]) #define NORMAL 0 #define JUST_AFTER_CR 1 @@ -126,10 +132,24 @@ rb_cr_newline = { 0, 0, 0, 0 }; +static const rb_transcoder +rb_lf_newline = { + "", "lf_newline", lf_newline, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 2, /* max_output */ + asciicompat_converter, /* asciicompat_type */ + 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ + 0, 0, 0, fun_so_universal_newline, + universal_newline_finish +}; + void Init_newline(void) { rb_register_transcoder(&rb_universal_newline); rb_register_transcoder(&rb_crlf_newline); rb_register_transcoder(&rb_cr_newline); + rb_register_transcoder(&rb_lf_newline); } diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h index 60c96a41c9f9bb..7f26d2eae98b03 100644 --- a/include/ruby/internal/encoding/transcode.h +++ b/include/ruby/internal/encoding/transcode.h @@ -476,16 +476,16 @@ enum ruby_econv_flag_type { RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, /** Decorators are there. */ - RUBY_ECONV_DECORATOR_MASK = 0x0000ff00, + RUBY_ECONV_DECORATOR_MASK = 0x0001ff00, /** Newline converters are there. */ - RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00, + RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00, /** (Unclear; seems unused). */ RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, /** (Unclear; seems unused). */ - RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000, + RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000, /** Universal newline mode. */ RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, @@ -496,11 +496,14 @@ enum ruby_econv_flag_type { /** CRLF to CR conversion shall happen. */ RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, + /** CRLF to LF conversion shall happen. */ + RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000, + /** Texts shall be XML-escaped. */ - RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000, + RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000, /** Texts shall be AttrValue escaped */ - RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000, + RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000, /** (Unclear; seems unused). */ RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, @@ -529,6 +532,7 @@ enum ruby_econv_flag_type { #define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ #define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ #define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ +#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */ #define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ #define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ #define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ @@ -543,10 +547,10 @@ enum ruby_econv_flag_type { */ /** Indicates the input is a part of much larger one. */ - RUBY_ECONV_PARTIAL_INPUT = 0x00010000, + RUBY_ECONV_PARTIAL_INPUT = 0x00020000, /** Instructs the converter to stop after output. */ - RUBY_ECONV_AFTER_OUTPUT = 0x00020000, + RUBY_ECONV_AFTER_OUTPUT = 0x00040000, #define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ #define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 905416911a96e8..669b004b83317e 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -460,6 +460,48 @@ def test_long_unc end end + def test_file_open_newline_option + Dir.mktmpdir(__method__.to_s) do |tmpdir| + path = File.join(tmpdir, "foo") + test = lambda do |newline| + File.open(path, "wt", newline: newline) do |f| + f.write "a\n" + f.puts "b" + end + File.binread(path) + end + assert_equal("a\nb\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\r\nb\r\n", test.(:crlf)) + assert_equal("a\rb\r", test.(:cr)) + + test = lambda do |newline| + File.open(path, "rt", newline: newline) do |f| + f.read + end + end + + File.binwrite(path, "a\nb\n") + assert_equal("a\nb\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\nb\n", test.(:crlf)) + assert_equal("a\nb\n", test.(:cr)) + + File.binwrite(path, "a\r\nb\r\n") + assert_equal("a\r\nb\r\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + # Work on both Windows and non-Windows + assert_include(["a\r\nb\r\n", "a\nb\n"], test.(:crlf)) + assert_equal("a\r\nb\r\n", test.(:cr)) + + File.binwrite(path, "a\rb\r") + assert_equal("a\rb\r", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\rb\r", test.(:crlf)) + assert_equal("a\rb\r", test.(:cr)) + end + end + def test_open_nul Dir.mktmpdir(__method__.to_s) do |tmpdir| path = File.join(tmpdir, "foo") diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index c8b0034e060075..73737be0ad7457 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -2305,5 +2305,7 @@ def test_newline_options assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf)) + assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true)) + assert_equal("A\nB\nC", s.encode(usascii, newline: :lf)) end end diff --git a/transcode.c b/transcode.c index 5fafad398fb68c..535e436b039a87 100644 --- a/transcode.c +++ b/transcode.c @@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr; static VALUE sym_universal_newline; static VALUE sym_crlf_newline; static VALUE sym_cr_newline; +static VALUE sym_lf_newline; #ifdef ENABLE_ECONV_NEWLINE_OPTION static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf; #endif @@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret) case ECONV_UNIVERSAL_NEWLINE_DECORATOR: case ECONV_CRLF_NEWLINE_DECORATOR: case ECONV_CR_NEWLINE_DECORATOR: + case ECONV_LF_NEWLINE_DECORATOR: case 0: break; default: @@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret) decorators_ret[num_decorators++] = "crlf_newline"; if (ecflags & ECONV_CR_NEWLINE_DECORATOR) decorators_ret[num_decorators++] = "cr_newline"; + if (ecflags & ECONV_LF_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "lf_newline"; if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) decorators_ret[num_decorators++] = "universal_newline"; @@ -1982,6 +1986,9 @@ rb_econv_binmode(rb_econv_t *ec) case ECONV_CR_NEWLINE_DECORATOR: dname = "cr_newline"; break; + case ECONV_LF_NEWLINE_DECORATOR: + dname = "lf_newline"; + break; } if (dname) { @@ -2040,6 +2047,10 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg) rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, "cr_newline"); } + if (ecflags & ECONV_LF_NEWLINE_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "lf_newline"); + } if (ecflags & ECONV_XML_TEXT_DECORATOR) { rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, "xml_text"); @@ -2515,7 +2526,7 @@ econv_opts(VALUE opt, int ecflags) ecflags |= ECONV_CR_NEWLINE_DECORATOR; } else if (v == sym_lf) { - /* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */ + ecflags |= ECONV_LF_NEWLINE_DECORATOR; } else if (SYMBOL_P(v)) { rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE, @@ -2544,6 +2555,11 @@ econv_opts(VALUE opt, int ecflags) setflags |= ECONV_CR_NEWLINE_DECORATOR; newlineflag |= !NIL_P(v); + v = rb_hash_aref(opt, sym_lf_newline); + if (RTEST(v)) + setflags |= ECONV_LF_NEWLINE_DECORATOR; + newlineflag |= !NIL_P(v); + switch (newlineflag) { case 1: ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; @@ -3281,11 +3297,13 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * :undef => :replace # replace undefined conversion * :replace => string # replacement string ("?" or "\uFFFD" if not specified) * :newline => :universal # decorator for converting CRLF and CR to LF + * :newline => :lf # decorator for converting CRLF and CR to LF when writing * :newline => :crlf # decorator for converting LF to CRLF * :newline => :cr # decorator for converting LF to CR * :universal_newline => true # decorator for converting CRLF and CR to LF * :crlf_newline => true # decorator for converting LF to CRLF * :cr_newline => true # decorator for converting LF to CR + * :lf_newline => true # decorator for converting CRLF and CR to LF when writing * :xml => :text # escape as XML CharData. * :xml => :attr # escape as XML AttValue * integer form: @@ -3293,6 +3311,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Encoding::Converter::UNDEF_REPLACE * Encoding::Converter::UNDEF_HEX_CHARREF * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR + * Encoding::Converter::LF_NEWLINE_DECORATOR * Encoding::Converter::CRLF_NEWLINE_DECORATOR * Encoding::Converter::CR_NEWLINE_DECORATOR * Encoding::Converter::XML_TEXT_DECORATOR @@ -3335,6 +3354,8 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Convert LF to CRLF. * [:cr_newline => true] * Convert LF to CR. + * [:lf_newline => true] + * Convert CRLF and CR to LF (when writing). * [:xml => :text] * Escape as XML CharData. * This form can be used as an HTML 4.0 #PCDATA. @@ -4437,6 +4458,7 @@ Init_transcode(void) sym_universal_newline = ID2SYM(rb_intern_const("universal_newline")); sym_crlf_newline = ID2SYM(rb_intern_const("crlf_newline")); sym_cr_newline = ID2SYM(rb_intern_const("cr_newline")); + sym_lf_newline = ID2SYM(rb_intern("lf_newline")); sym_partial_input = ID2SYM(rb_intern_const("partial_input")); #ifdef ENABLE_ECONV_NEWLINE_OPTION @@ -4533,6 +4555,12 @@ InitVM_transcode(void) */ rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR)); + /* Document-const: LF_NEWLINE_DECORATOR + * + * Decorator for converting CRLF and CR to LF when writing + */ + rb_define_const(rb_cEncodingConverter, "LF_NEWLINE_DECORATOR", INT2FIX(ECONV_LF_NEWLINE_DECORATOR)); + /* Document-const: CRLF_NEWLINE_DECORATOR * * Decorator for converting LF to CRLF From 01e8d393bc06d8658ce04a042766cc8c0b5c108b Mon Sep 17 00:00:00 2001 From: nick evans Date: Sun, 3 Apr 2022 09:54:18 -0400 Subject: [PATCH 177/546] Fix gdb incompatibilies in rp_class, rb_ps_vm Other changes are needed to bring .gdbinit up-to-date with current ruby. It looks like lldb is the preferred approach now, and that config *is* being kept up-to-date. Still, this might be helpful to someone? --- .gdbinit | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.gdbinit b/.gdbinit index 8979e8b47c5ab1..34d044caf6317f 100644 --- a/.gdbinit +++ b/.gdbinit @@ -544,13 +544,13 @@ end define rp_class printf "(struct RClass *) %p", (void*)$arg0 - if ((struct RClass *)($arg0))->ptr.origin_ != $arg0 - printf " -> %p", ((struct RClass *)($arg0))->ptr.origin_ + if RCLASS_ORIGIN((struct RClass *)($arg0)) != $arg0 + printf " -> %p", RCLASS_ORIGIN((struct RClass *)($arg0)) end printf "\n" rb_classname $arg0 print/x *(struct RClass *)($arg0) - print *((struct RClass *)($arg0))->ptr + print *RCLASS_EXT((struct RClass *)($arg0)) end document rp_class Print the content of a Class/Module. @@ -979,8 +979,8 @@ end define rb_ps_vm print $ps_vm = (rb_vm_t*)$arg0 - set $ps_thread_ln = $ps_vm->living_threads.n.next - set $ps_thread_ln_last = $ps_vm->living_threads.n.prev + set $ps_thread_ln = $ps_vm->ractor.main_ractor.threads.set.n.next + set $ps_thread_ln_last = $ps_vm->ractor.main_ractor.threads.set.n.prev while 1 set $ps_thread_th = (rb_thread_t *)$ps_thread_ln set $ps_thread = (VALUE)($ps_thread_th->self) From fc5382d46524bdf901efc8d15ef9faf14bea3ad1 Mon Sep 17 00:00:00 2001 From: S-H-GAMELINKS Date: Mon, 15 Aug 2022 19:40:45 +0900 Subject: [PATCH 178/546] Reuse rb_class_new_instance_kw function --- object.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/object.c b/object.c index d1743b554ba4eb..eb54d849675cc9 100644 --- a/object.c +++ b/object.c @@ -1982,13 +1982,7 @@ rb_class_new_instance_kw(int argc, const VALUE *argv, VALUE klass, int kw_splat) VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass) { - VALUE obj; - Check_Type(klass, T_CLASS); - - obj = rb_class_alloc(klass); - rb_obj_call_init_kw(obj, argc, argv, RB_NO_KEYWORDS); - - return obj; + return rb_class_new_instance_kw(argc, argv, klass, RB_NO_KEYWORDS); } /** From 485019c2bd02794b484500c78919b0d1230e4a84 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 19 Aug 2022 23:57:17 -0700 Subject: [PATCH 179/546] Rename mjit_exec to jit_exec (#6262) * Rename mjit_exec to jit_exec * Rename mjit_exec_slowpath to mjit_check_iseq * Remove mjit_exec references from comments --- .../lib/benchmark_driver/runner/mjit_exec.rb | 6 ++-- debug_counter.h | 4 +-- mjit.h | 2 +- mjit_compile.c | 2 +- test/ruby/test_mjit.rb | 2 +- tool/ruby_vm/views/_mjit_compile_send.erb | 2 +- vm.c | 28 +++++++++---------- vm_core.h | 2 +- vm_eval.c | 2 +- vm_insnhelper.c | 11 ++++---- yjit/src/yjit.rs | 2 +- 11 files changed, 31 insertions(+), 32 deletions(-) diff --git a/benchmark/lib/benchmark_driver/runner/mjit_exec.rb b/benchmark/lib/benchmark_driver/runner/mjit_exec.rb index eac3dfba8485bd..121791eb2bf13f 100644 --- a/benchmark/lib/benchmark_driver/runner/mjit_exec.rb +++ b/benchmark/lib/benchmark_driver/runner/mjit_exec.rb @@ -2,7 +2,7 @@ require 'benchmark_driver/metric' require 'erb' -# A special runner dedicated for measuring mjit_exec overhead. +# A special runner dedicated for measuring jit_exec overhead. class BenchmarkDriver::Runner::MjitExec METRIC = BenchmarkDriver::Metric.new(name: 'Iteration per second', unit: 'i/s') @@ -12,8 +12,8 @@ class BenchmarkDriver::Runner::MjitExec :metrics, # @param [Array] :num_methods, # @param [Integer] num_methods - The number of methods to be defined :loop_count, # @param [Integer] loop_count - :from_jit, # @param [TrueClass,FalseClass] from_jit - Whether the mjit_exec() is from JIT or not - :to_jit, # @param [TrueClass,FalseClass] to_jit - Whether the mjit_exec() is to JIT or not + :from_jit, # @param [TrueClass,FalseClass] from_jit - Whether the jit_exec() is from JIT or not + :to_jit, # @param [TrueClass,FalseClass] to_jit - Whether the jit_exec() is to JIT or not ) # Dynamically fetched and used by `BenchmarkDriver::JobParser.parse` class << JobParser = Module.new diff --git a/debug_counter.h b/debug_counter.h index 3f0dec948fa26a..c6f4176e9752de 100644 --- a/debug_counter.h +++ b/debug_counter.h @@ -347,8 +347,8 @@ RB_DEBUG_COUNTER(vm_sync_lock_enter_nb) RB_DEBUG_COUNTER(vm_sync_lock_enter_cr) RB_DEBUG_COUNTER(vm_sync_barrier) -/* mjit_exec() counts */ -RB_DEBUG_COUNTER(mjit_exec) +/* jit_exec() counts */ +RB_DEBUG_COUNTER(jit_exec) RB_DEBUG_COUNTER(mjit_exec_not_added) RB_DEBUG_COUNTER(mjit_exec_not_ready) RB_DEBUG_COUNTER(mjit_exec_not_compiled) diff --git a/mjit.h b/mjit.h index 344b20b9013501..045612d7be92a0 100644 --- a/mjit.h +++ b/mjit.h @@ -120,7 +120,7 @@ static inline struct mjit_cont *mjit_cont_new(rb_execution_context_t *ec){return static inline void mjit_cont_free(struct mjit_cont *cont){} static inline void mjit_free_iseq(const rb_iseq_t *iseq){} static inline void mjit_mark(void){} -static inline VALUE mjit_exec(rb_execution_context_t *ec) { return Qundef; /* unreachable */ } +static inline VALUE jit_exec(rb_execution_context_t *ec) { return Qundef; /* unreachable */ } static inline void mjit_child_after_fork(void){} #define mjit_enabled false diff --git a/mjit_compile.c b/mjit_compile.c index 390e3d2850ad97..1bf5beb6a32b84 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -370,7 +370,7 @@ mjit_compile_body(FILE *f, const rb_iseq_t *iseq, struct compile_status *status) } // Simulate `opt_pc` in setup_parameters_complex. Other PCs which may be passed by catch tables - // are not considered since vm_exec doesn't call mjit_exec for catch tables. + // are not considered since vm_exec doesn't call jit_exec for catch tables. if (body->param.flags.has_opt) { int i; fprintf(f, "\n"); diff --git a/test/ruby/test_mjit.rb b/test/ruby/test_mjit.rb index 02be88aa322687..3a1dcf7f09ad24 100644 --- a/test/ruby/test_mjit.rb +++ b/test/ruby/test_mjit.rb @@ -749,7 +749,7 @@ def b end def a - # Calling #b should be vm_exec, not direct mjit_exec. + # Calling #b should be vm_exec, not direct jit_exec. # Otherwise `1` on local variable would be purged. 1 + b end diff --git a/tool/ruby_vm/views/_mjit_compile_send.erb b/tool/ruby_vm/views/_mjit_compile_send.erb index 8900ee64256c95..316974a7e667c3 100644 --- a/tool/ruby_vm/views/_mjit_compile_send.erb +++ b/tool/ruby_vm/views/_mjit_compile_send.erb @@ -90,7 +90,7 @@ fprintf(f, " val = vm_exec(ec, true);\n"); } else { - fprintf(f, " if ((val = mjit_exec(ec)) == Qundef) {\n"); + fprintf(f, " if ((val = jit_exec(ec)) == Qundef) {\n"); fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); // This is vm_call0_body's code after vm_call_iseq_setup fprintf(f, " val = vm_exec(ec, false);\n"); fprintf(f, " }\n"); diff --git a/vm.c b/vm.c index 394fa333885cd3..8cab8b9b57b534 100644 --- a/vm.c +++ b/vm.c @@ -379,12 +379,12 @@ static VALUE vm_invoke_proc(rb_execution_context_t *ec, rb_proc_t *proc, VALUE s #if USE_MJIT # ifdef MJIT_HEADER -NOINLINE(static COLDFUNC VALUE mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body)); +NOINLINE(static COLDFUNC VALUE mjit_check_iseq(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body)); # else -static inline VALUE mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body); +static inline VALUE mjit_check_iseq(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body); # endif static VALUE -mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body) +mjit_check_iseq(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body) { uintptr_t func_i = (uintptr_t)(body->jit_func); ASSUME(func_i <= LAST_JIT_ISEQ_FUNC); @@ -414,7 +414,7 @@ mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_ // If it is not, add ISEQ to the compilation queue and return Qundef for MJIT. // YJIT compiles on the thread running the iseq. static inline VALUE -mjit_exec(rb_execution_context_t *ec) +jit_exec(rb_execution_context_t *ec) { const rb_iseq_t *iseq = ec->cfp->iseq; struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); @@ -442,7 +442,7 @@ mjit_exec(rb_execution_context_t *ec) if (!(mjit_call_p || yjit_enabled)) return Qundef; - RB_DEBUG_COUNTER_INC(mjit_exec); + RB_DEBUG_COUNTER_INC(jit_exec); mjit_func_t func = body->jit_func; @@ -458,7 +458,7 @@ mjit_exec(rb_execution_context_t *ec) # else RB_DEBUG_COUNTER_INC(mjit_frame_VM2VM); # endif - return mjit_exec_slowpath(ec, iseq, body); + return mjit_check_iseq(ec, iseq, body); } # ifdef MJIT_HEADER @@ -2286,8 +2286,8 @@ hook_before_rewind(rb_execution_context_t *ec, const rb_control_frame_t *cfp, void *code; // }; - If mjit_exec is already called before calling vm_exec, `mjit_enable_p` should - be FALSE to avoid calling `mjit_exec` twice. + If jit_exec is already called before calling vm_exec, `jit_enable_p` should + be FALSE to avoid calling `jit_exec` twice. */ static inline VALUE @@ -2303,7 +2303,7 @@ struct rb_vm_exec_context { VALUE initial; VALUE result; enum ruby_tag_type state; - bool mjit_enable_p; + bool jit_enable_p; }; static void @@ -2332,7 +2332,7 @@ vm_exec_bottom_main(void *context) struct rb_vm_exec_context *ctx = (struct rb_vm_exec_context *)context; ctx->state = TAG_NONE; - if (!ctx->mjit_enable_p || (ctx->result = mjit_exec(ctx->ec)) == Qundef) { + if (!ctx->jit_enable_p || (ctx->result = jit_exec(ctx->ec)) == Qundef) { ctx->result = vm_exec_core(ctx->ec, ctx->initial); } vm_exec_enter_vm_loop(ctx->ec, ctx, ctx->tag, true); @@ -2347,12 +2347,12 @@ vm_exec_bottom_rescue(void *context) } VALUE -vm_exec(rb_execution_context_t *ec, bool mjit_enable_p) +vm_exec(rb_execution_context_t *ec, bool jit_enable_p) { struct rb_vm_exec_context ctx = { .ec = ec, .initial = 0, .result = Qundef, - .mjit_enable_p = mjit_enable_p, + .jit_enable_p = jit_enable_p, }; struct rb_wasm_try_catch try_catch; @@ -2374,7 +2374,7 @@ vm_exec(rb_execution_context_t *ec, bool mjit_enable_p) #else VALUE -vm_exec(rb_execution_context_t *ec, bool mjit_enable_p) +vm_exec(rb_execution_context_t *ec, bool jit_enable_p) { enum ruby_tag_type state; VALUE result = Qundef; @@ -2384,7 +2384,7 @@ vm_exec(rb_execution_context_t *ec, bool mjit_enable_p) _tag.retval = Qnil; if ((state = EC_EXEC_TAG()) == TAG_NONE) { - if (!mjit_enable_p || (result = mjit_exec(ec)) == Qundef) { + if (!jit_enable_p || (result = jit_exec(ec)) == Qundef) { result = vm_exec_core(ec, initial); } goto vm_loop_start; /* fallback to the VM */ diff --git a/vm_core.h b/vm_core.h index 717f1168006ab3..45ec1111559b9b 100644 --- a/vm_core.h +++ b/vm_core.h @@ -489,7 +489,7 @@ struct rb_iseq_constant_body { /* The following fields are MJIT related info. */ VALUE (*jit_func)(struct rb_execution_context_struct *, struct rb_control_frame_struct *); /* function pointer for loaded native code */ - long unsigned total_calls; /* number of total calls with `mjit_exec()` */ + long unsigned total_calls; /* number of total calls with `jit_exec()` */ struct rb_mjit_unit *jit_unit; #endif diff --git a/vm_eval.c b/vm_eval.c index c7669cbb858e2a..db8ca455d94ec6 100644 --- a/vm_eval.c +++ b/vm_eval.c @@ -20,7 +20,7 @@ static inline VALUE vm_yield_with_cref(rb_execution_context_t *ec, int argc, con static inline VALUE vm_yield(rb_execution_context_t *ec, int argc, const VALUE *argv, int kw_splat); static inline VALUE vm_yield_with_block(rb_execution_context_t *ec, int argc, const VALUE *argv, VALUE block_handler, int kw_splat); static inline VALUE vm_yield_force_blockarg(rb_execution_context_t *ec, VALUE args); -VALUE vm_exec(rb_execution_context_t *ec, bool mjit_enable_p); +VALUE vm_exec(rb_execution_context_t *ec, bool jit_enable_p); static void vm_set_eval_stack(rb_execution_context_t * th, const rb_iseq_t *iseq, const rb_cref_t *cref, const struct rb_block *base_block); static int vm_collect_local_variables_in_heap(const VALUE *dfp, const struct local_var_list *vars); diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 837c71ffd5ee14..68362ddf60cea9 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -4783,16 +4783,16 @@ vm_sendish( #ifdef MJIT_HEADER /* When calling ISeq which may catch an exception from JIT-ed - code, we should not call mjit_exec directly to prevent the + code, we should not call jit_exec directly to prevent the caller frame from being canceled. That's because the caller frame may have stack values in the local variables and the cancelling the caller frame will purge them. But directly - calling mjit_exec is faster... */ + calling jit_exec is faster... */ if (ISEQ_BODY(GET_ISEQ())->catch_except_p) { VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH); return vm_exec(ec, true); } - else if ((val = mjit_exec(ec)) == Qundef) { + else if ((val = jit_exec(ec)) == Qundef) { VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH); return vm_exec(ec, false); } @@ -4801,9 +4801,8 @@ vm_sendish( } #else /* When calling from VM, longjmp in the callee won't purge any - JIT-ed caller frames. So it's safe to directly call - mjit_exec. */ - return mjit_exec(ec); + JIT-ed caller frames. So it's safe to directly call jit_exec. */ + return jit_exec(ec); #endif } diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs index 192e9753d976d6..bfa9188d3e9bb2 100644 --- a/yjit/src/yjit.rs +++ b/yjit/src/yjit.rs @@ -22,7 +22,7 @@ pub extern "C" fn rb_yjit_parse_option(str_ptr: *const raw::c_char) -> bool { } /// Is YJIT on? The interpreter uses this function to decide whether to increment -/// ISEQ call counters. See mjit_exec(). +/// ISEQ call counters. See jit_exec(). /// This is used frequently since it's used on every method call in the interpreter. #[no_mangle] pub extern "C" fn rb_yjit_enabled_p() -> raw::c_int { From b32a3f1275a8c7748f2134492ce3c532f277d261 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 20 Aug 2022 16:25:30 +0900 Subject: [PATCH 180/546] [Bug #18964] Add test for `rb_econv_append` --- test/-ext-/econv/test_append.rb | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 test/-ext-/econv/test_append.rb diff --git a/test/-ext-/econv/test_append.rb b/test/-ext-/econv/test_append.rb new file mode 100644 index 00000000000000..f8c1d2add690a2 --- /dev/null +++ b/test/-ext-/econv/test_append.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: false +require 'test/unit' +require "-test-/econv" + +class Test_EConvAppend < Test::Unit::TestCase + def test_econv_str_append_valid + ec = Bug::EConv.new("utf-8", "cp932") + dst = "\u3044".encode("cp932") + ret = ec.append("\u3042"*30, dst) + assert_same(dst, ret) + assert_not_predicate(dst, :ascii_only?) + assert_predicate(dst, :valid_encoding?) + end + + def test_econv_str_append_broken + ec = Bug::EConv.new("utf-8", "cp932") + dst = "" + ret = ec.append("\u3042"*30, dst) + assert_same(dst, ret) + assert_not_predicate(dst, :ascii_only?) + assert_not_predicate(dst, :valid_encoding?) + end +end From 8212aab81a77a2a91fb7c1681b4968171193b48f Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Mon, 27 Dec 2021 09:39:15 -0800 Subject: [PATCH 181/546] Make Object#method and Module#instance_method not skip ZSUPER methods Based on https://github.com/jeremyevans/ruby/commit/c95e7e5329140f640b6497905485761f3336d967 Among other things, this fixes calling visibility methods (public?, protected?, and private?) on them. It also fixes #owner to show the class the zsuper method entry is defined in, instead of the original class it references. For some backwards compatibility, adjust #parameters and #source_location, to show the parameters and source location of the method originally defined. Also have the parameters and source location still be shown by #inspect. Clarify documentation of {Method,UnboundMethod}#owner. Add tests based on the description of https://bugs.ruby-lang.org/issues/18435 and based on https://github.com/ruby/ruby/pull/5356#issuecomment-1005298809 Fixes [Bug #18435] [Bug #18729] Co-authored-by: Benoit Daloze --- proc.c | 63 +++++++++++++++++++++++++++++----------- test/ruby/test_method.rb | 59 +++++++++++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 22 deletions(-) diff --git a/proc.c b/proc.c index 3c52fb06a775d0..dbf28aa55e0f95 100644 --- a/proc.c +++ b/proc.c @@ -1684,7 +1684,6 @@ mnew_internal(const rb_method_entry_t *me, VALUE klass, VALUE iclass, VALUE method; rb_method_visibility_t visi = METHOD_VISI_UNDEF; - again: if (UNDEFINED_METHOD_ENTRY_P(me)) { if (respond_to_missing_p(klass, obj, ID2SYM(id), scope)) { return mnew_missing(klass, obj, id, mclass); @@ -1700,19 +1699,6 @@ mnew_internal(const rb_method_entry_t *me, VALUE klass, VALUE iclass, rb_print_inaccessible(klass, id, visi); } } - if (me->def->type == VM_METHOD_TYPE_ZSUPER) { - if (me->defined_class) { - VALUE klass = RCLASS_SUPER(RCLASS_ORIGIN(me->defined_class)); - id = me->def->original_id; - me = (rb_method_entry_t *)rb_callable_method_entry_with_refinements(klass, id, &iclass); - } - else { - VALUE klass = RCLASS_SUPER(RCLASS_ORIGIN(me->owner)); - id = me->def->original_id; - me = rb_method_entry_without_refinements(klass, id, &iclass); - } - goto again; - } method = TypedData_Make_Struct(mclass, struct METHOD, &method_data_type, data); @@ -1934,7 +1920,15 @@ method_original_name(VALUE obj) * call-seq: * meth.owner -> class_or_module * - * Returns the class or module that defines the method. + * Returns the class or module on which this method is defined. + * In other words, + * + * meth.owner.instance_methods(false).include?(meth.name) # => true + * + * holds as long as the method is not removed/undefined/replaced, + * (with private_instance_methods instead of instance_methods if the method + * is private). + * * See also Method#receiver. * * (1..3).method(:map).owner #=> Enumerable @@ -2951,6 +2945,24 @@ rb_method_entry_location(const rb_method_entry_t *me) return method_def_location(me->def); } +static VALUE method_super_method(VALUE method); + +static const rb_method_definition_t * +zsuper_ref_method_def(VALUE method) +{ + const rb_method_definition_t *def = rb_method_def(method); + VALUE super_method; + while (def->type == VM_METHOD_TYPE_ZSUPER) { + super_method = method_super_method(method); + if (NIL_P(super_method)) { + break; + } + method = super_method; + def = rb_method_def(method); + } + return def; +} + /* * call-seq: * meth.source_location -> [String, Integer] @@ -2962,7 +2974,7 @@ rb_method_entry_location(const rb_method_entry_t *me) VALUE rb_method_location(VALUE method) { - return method_def_location(rb_method_def(method)); + return method_def_location(zsuper_ref_method_def(method)); } static const rb_method_definition_t * @@ -3050,7 +3062,7 @@ method_def_parameters(const rb_method_definition_t *def) static VALUE rb_method_parameters(VALUE method) { - return method_def_parameters(rb_method_def(method)); + return method_def_parameters(zsuper_ref_method_def(method)); } /* @@ -3112,6 +3124,23 @@ method_inspect(VALUE method) if (data->me->def->type == VM_METHOD_TYPE_ALIAS) { defined_class = data->me->def->body.alias.original_me->owner; } + else if (data->me->def->type == VM_METHOD_TYPE_ZSUPER) { + const rb_method_definition_t *zsuper_ref_def = data->me->def; + struct METHOD *zsuper_ref_data; + VALUE super_method; + + do { + super_method = method_super_method(method); + if (NIL_P(super_method)) { + break; + } + method = super_method; + zsuper_ref_def = rb_method_def(method); + } while (zsuper_ref_def->type == VM_METHOD_TYPE_ZSUPER); + + TypedData_Get_Struct(method, struct METHOD, &method_data_type, zsuper_ref_data); + defined_class = method_entry_defined_class(zsuper_ref_data->me); + } else { defined_class = method_entry_defined_class(data->me); } diff --git a/test/ruby/test_method.rb b/test/ruby/test_method.rb index 56e94493d9860c..5f689c3d4f8ece 100644 --- a/test/ruby/test_method.rb +++ b/test/ruby/test_method.rb @@ -1056,20 +1056,28 @@ def foo; end assert_equal(sm, im.clone.bind(o).super_method) end - def test_super_method_removed + def test_super_method_removed_public c1 = Class.new {private def foo; end} c2 = Class.new(c1) {public :foo} c3 = Class.new(c2) {def foo; end} c1.class_eval {undef foo} m = c3.instance_method(:foo) m = assert_nothing_raised(NameError, Feature9781) {break m.super_method} - assert_nil(m, Feature9781) + assert_equal c2, m.owner + end + + def test_super_method_removed_regular + c1 = Class.new { def foo; end } + c2 = Class.new(c1) { def foo; end } + assert_equal c1.instance_method(:foo), c2.instance_method(:foo).super_method + c1.remove_method :foo + assert_equal nil, c2.instance_method(:foo).super_method end def test_prepended_public_zsuper mod = EnvUtil.labeled_module("Mod") {private def foo; :ok end} - mods = [mod] obj = Object.new.extend(mod) + mods = [obj.singleton_class] class << obj public :foo end @@ -1079,7 +1087,7 @@ class << obj end m = obj.method(:foo) assert_equal(mods, mods.map {m.owner.tap {m = m.super_method}}) - assert_nil(m) + assert_nil(m.super_method) end def test_super_method_with_prepended_module @@ -1192,6 +1200,47 @@ def foo assert_nil(super_method) end + # Bug 18435 + def test_instance_methods_owner_consistency + a = Module.new { def method1; end } + + b = Class.new do + include a + protected :method1 + end + + assert_equal [:method1], b.instance_methods(false) + assert_equal b, b.instance_method(:method1).owner + end + + def test_zsuper_method_removed + a = EnvUtil.labeled_class('A') do + private + def foo(arg = nil) + 1 + end + end + line = __LINE__ - 4 + + b = EnvUtil.labeled_class('B', a) do + public :foo + end + + unbound = b.instance_method(:foo) + + assert_equal unbound, b.public_instance_method(:foo) + assert_equal "#", unbound.inspect + assert_equal [[:opt, :arg]], unbound.parameters + + a.remove_method(:foo) + + assert_equal [[:rest]], unbound.parameters + assert_equal "#", unbound.inspect + + obj = b.new + assert_raise_with_message(NoMethodError, /super: no superclass method `foo'/) { unbound.bind_call(obj) } + end + def rest_parameter(*rest) rest end @@ -1310,7 +1359,7 @@ module M2 ::Object.prepend(M2) m = Object.instance_method(:x) - assert_equal M, m.owner + assert_equal M2, m.owner end; end From 209631a45f9682dedf718f4b4a140efe7d21a6fc Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Mon, 15 Aug 2022 16:01:33 +0200 Subject: [PATCH 182/546] Consider resolved-through-zsuper methods equal for compatibility * Fixes https://bugs.ruby-lang.org/issues/18751 --- proc.c | 65 +++++++++---------- .../core/unboundmethod/equal_value_spec.rb | 37 +++++++++++ test/ruby/test_method.rb | 18 +++++ 3 files changed, 86 insertions(+), 34 deletions(-) diff --git a/proc.c b/proc.c index dbf28aa55e0f95..f9bd469618a1c8 100644 --- a/proc.c +++ b/proc.c @@ -1738,6 +1738,27 @@ mnew_unbound(VALUE klass, ID id, VALUE mclass, int scope) return mnew_from_me(me, klass, iclass, Qundef, id, mclass, scope); } +static const rb_method_entry_t* +zsuper_resolve(const rb_method_entry_t *me) +{ + const rb_method_entry_t *super_me; + while (me->def->type == VM_METHOD_TYPE_ZSUPER) { + VALUE defined_class = me->defined_class ? me->defined_class : me->owner; + VALUE super_class = RCLASS_SUPER(RCLASS_ORIGIN(defined_class)); + if (!super_class) { + break; + } + ID id = me->def->original_id; + VALUE iclass; + super_me = (rb_method_entry_t *)rb_callable_method_entry_with_refinements(super_class, id, &iclass); + if (!super_me) { + break; + } + me = super_me; + } + return me; +} + static inline VALUE method_entry_defined_class(const rb_method_entry_t *me) { @@ -1798,10 +1819,13 @@ method_eq(VALUE method, VALUE other) m1 = (struct METHOD *)DATA_PTR(method); m2 = (struct METHOD *)DATA_PTR(other); - klass1 = method_entry_defined_class(m1->me); - klass2 = method_entry_defined_class(m2->me); + const rb_method_entry_t *m1_me = zsuper_resolve(m1->me); + const rb_method_entry_t *m2_me = zsuper_resolve(m2->me); - if (!rb_method_entry_eq(m1->me, m2->me) || + klass1 = method_entry_defined_class(m1_me); + klass2 = method_entry_defined_class(m2_me); + + if (!rb_method_entry_eq(m1_me, m2_me) || klass1 != klass2 || m1->klass != m2->klass || m1->recv != m2->recv) { @@ -2945,22 +2969,12 @@ rb_method_entry_location(const rb_method_entry_t *me) return method_def_location(me->def); } -static VALUE method_super_method(VALUE method); - static const rb_method_definition_t * zsuper_ref_method_def(VALUE method) { - const rb_method_definition_t *def = rb_method_def(method); - VALUE super_method; - while (def->type == VM_METHOD_TYPE_ZSUPER) { - super_method = method_super_method(method); - if (NIL_P(super_method)) { - break; - } - method = super_method; - def = rb_method_def(method); - } - return def; + const struct METHOD *data; + TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); + return zsuper_resolve(data->me)->def; } /* @@ -3124,25 +3138,8 @@ method_inspect(VALUE method) if (data->me->def->type == VM_METHOD_TYPE_ALIAS) { defined_class = data->me->def->body.alias.original_me->owner; } - else if (data->me->def->type == VM_METHOD_TYPE_ZSUPER) { - const rb_method_definition_t *zsuper_ref_def = data->me->def; - struct METHOD *zsuper_ref_data; - VALUE super_method; - - do { - super_method = method_super_method(method); - if (NIL_P(super_method)) { - break; - } - method = super_method; - zsuper_ref_def = rb_method_def(method); - } while (zsuper_ref_def->type == VM_METHOD_TYPE_ZSUPER); - - TypedData_Get_Struct(method, struct METHOD, &method_data_type, zsuper_ref_data); - defined_class = method_entry_defined_class(zsuper_ref_data->me); - } else { - defined_class = method_entry_defined_class(data->me); + defined_class = method_entry_defined_class(zsuper_resolve(data->me)); } if (RB_TYPE_P(defined_class, T_ICLASS)) { diff --git a/spec/ruby/core/unboundmethod/equal_value_spec.rb b/spec/ruby/core/unboundmethod/equal_value_spec.rb index 6242b048840d4b..b21677687ec655 100644 --- a/spec/ruby/core/unboundmethod/equal_value_spec.rb +++ b/spec/ruby/core/unboundmethod/equal_value_spec.rb @@ -98,4 +98,41 @@ def discard_1; :discard; end (@discard_1 == UnboundMethodSpecs::Methods.instance_method(:discard_1)).should == false end + + it "considers methods through aliasing equal" do + c = Class.new do + class << self + alias_method :n, :new + end + end + + c.method(:new).should == c.method(:n) + c.method(:n).should == Class.instance_method(:new).bind(c) + end + + # On CRuby < 3.2, the 2 specs below pass due to method/instance_method skipping zsuper methods. + # We are interested in the general pattern working, i.e. the combination of method/instance_method + # and #== exposes the wanted behavior. + it "considers methods through visibility change equal" do + c = Class.new do + class << self + private :new + end + end + + c.method(:new).should == Class.instance_method(:new).bind(c) + end + + it "considers methods through aliasing and visibility change equal" do + c = Class.new do + class << self + alias_method :n, :new + private :new + end + end + + c.method(:new).should == c.method(:n) + c.method(:n).should == Class.instance_method(:new).bind(c) + c.method(:new).should == Class.instance_method(:new).bind(c) + end end diff --git a/test/ruby/test_method.rb b/test/ruby/test_method.rb index 5f689c3d4f8ece..7e440095c8d879 100644 --- a/test/ruby/test_method.rb +++ b/test/ruby/test_method.rb @@ -1241,6 +1241,24 @@ def foo(arg = nil) assert_raise_with_message(NoMethodError, /super: no superclass method `foo'/) { unbound.bind_call(obj) } end + # Bug #18751 + def method_equality_visbility_alias + c = Class.new do + class << self + alias_method :n, :new + private :new + end + end + + assert_equal c.method(:n), c.method(:new) + + assert_not_equal c.method(:n), Class.method(:new) + assert_equal c.method(:n) == Class.instance_method(:new).bind(c) + + assert_not_equal c.method(:new), Class.method(:new) + assert_equal c.method(:new), Class.instance_method(:new).bind(c) + end + def rest_parameter(*rest) rest end From dc8d70e4615cdf12378322fbcd4396486270ddbe Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sat, 20 Aug 2022 16:33:03 -0700 Subject: [PATCH 183/546] Execute MJIT in a forked Ruby process (#6264) [Misc #18968] --- mjit.c | 85 ++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 23 deletions(-) diff --git a/mjit.c b/mjit.c index 98f4af3d18fa71..7912f8fb8d5004 100644 --- a/mjit.c +++ b/mjit.c @@ -431,10 +431,12 @@ CRITICAL_SECTION_FINISH(int level, const char *msg) rb_native_mutex_unlock(&mjit_engine_mutex); } +static pid_t mjit_pid = 0; + static int sprint_uniq_filename(char *str, size_t size, unsigned long id, const char *prefix, const char *suffix) { - return snprintf(str, size, "%s/%sp%"PRI_PIDT_PREFIX"uu%lu%s", tmp_dir, prefix, getpid(), id, suffix); + return snprintf(str, size, "%s/%sp%"PRI_PIDT_PREFIX"uu%lu%s", tmp_dir, prefix, mjit_pid, id, suffix); } // Return time in milliseconds as a double. @@ -798,8 +800,8 @@ make_pch(void) } } -static pid_t -start_compiling_c_to_so(const char *c_file, const char *so_file) +static int +compile_c_to_so(const char *c_file, const char *so_file) { const char *so_args[] = { "-o", so_file, @@ -821,18 +823,14 @@ start_compiling_c_to_so(const char *c_file, const char *so_file) char **args = form_args(8, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS, cc_added_args, so_args, loader_args, CC_LIBS, CC_DLDFLAGS_ARGS, CC_LINKER_ARGS); - if (args == NULL) return -1; - - rb_vm_t *vm = GET_VM(); - rb_native_mutex_lock(&vm->waitpid_lock); + if (args == NULL) return 1; - pid_t pid = start_process(cc_path, args); - mjit_add_waiting_pid(vm, pid); - - rb_native_mutex_unlock(&vm->waitpid_lock); + int exit_code = exec_process(cc_path, args); + if (!mjit_opts.save_temps) + remove_file(c_file); free(args); - return pid; + return exit_code; } #endif // _MSC_VER @@ -888,8 +886,8 @@ mjit_compact(char* c_file) // Compile all cached .c files and build a single .so file. Reload all JIT func from it. // This improves the code locality for better performance in terms of iTLB and iCache. -static pid_t -start_mjit_compact(struct rb_mjit_unit *unit) +static int +mjit_compact_unit(struct rb_mjit_unit *unit) { static const char c_ext[] = ".c"; static const char so_ext[] = DLEXT; @@ -900,9 +898,30 @@ start_mjit_compact(struct rb_mjit_unit *unit) bool success = mjit_compact(c_file); if (success) { - return start_compiling_c_to_so(c_file, so_file); + return compile_c_to_so(c_file, so_file); + } + return 1; +} + +static pid_t +start_mjit_compact(struct rb_mjit_unit *unit) +{ + rb_vm_t *vm = GET_VM(); + rb_native_mutex_lock(&vm->waitpid_lock); + + pid_t pid = fork(); + if (pid == 0) { + rb_native_mutex_unlock(&vm->waitpid_lock); + + int exit_code = mjit_compact_unit(unit); + exit(exit_code); + } + else { + mjit_add_waiting_pid(vm, pid); + rb_native_mutex_unlock(&vm->waitpid_lock); + + return pid; } - return -1; } static void @@ -1005,8 +1024,8 @@ compile_prelude(FILE *f) // Compile ISeq in UNIT and return function pointer of JIT-ed code. // It may return NOT_COMPILED_JIT_ISEQ_FUNC if something went wrong. -static pid_t -start_mjit_compile(struct rb_mjit_unit *unit) +static int +mjit_compile_unit(struct rb_mjit_unit *unit) { static const char c_ext[] = ".c"; static const char so_ext[] = DLEXT; @@ -1022,7 +1041,7 @@ start_mjit_compile(struct rb_mjit_unit *unit) int e = errno; if (fd >= 0) (void)close(fd); verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e)); - return -1; + return 1; } // print #include of MJIT header, etc. @@ -1047,10 +1066,31 @@ start_mjit_compile(struct rb_mjit_unit *unit) if (!mjit_opts.save_temps) remove_file(c_file); verbose(1, "JIT failure: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file); - return -1; + return 1; } - return start_compiling_c_to_so(c_file, so_file); + return compile_c_to_so(c_file, so_file); +} + +static pid_t +start_mjit_compile(struct rb_mjit_unit *unit) +{ + rb_vm_t *vm = GET_VM(); + rb_native_mutex_lock(&vm->waitpid_lock); + + pid_t pid = fork(); + if (pid == 0) { + rb_native_mutex_unlock(&vm->waitpid_lock); + + int exit_code = mjit_compile_unit(unit); + exit(exit_code); + } + else { + mjit_add_waiting_pid(vm, pid); + rb_native_mutex_unlock(&vm->waitpid_lock); + + return pid; + } } #ifdef _WIN32 @@ -1556,8 +1596,6 @@ mjit_notify_waitpid(int status) // Delete .c file char c_file[MAXPATHLEN]; sprint_uniq_filename(c_file, (int)sizeof(c_file), current_cc_unit->id, MJIT_TMP_PREFIX, ".c"); - if (!mjit_opts.save_temps) - remove_file(c_file); // Check the result bool success = false; @@ -2092,6 +2130,7 @@ mjit_init(const struct mjit_options *opts) mjit_opts = *opts; mjit_enabled = true; mjit_call_p = true; + mjit_pid = getpid(); // Normalize options if (mjit_opts.min_calls == 0) From e85db849590201972b93f8188d9c0ad232b5055f Mon Sep 17 00:00:00 2001 From: git Date: Sun, 21 Aug 2022 08:33:24 +0900 Subject: [PATCH 184/546] * 2022-08-21 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index b5ed9d1d253fa3..ee9df020b00463 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 20 +#define RUBY_RELEASE_DAY 21 #include "ruby/version.h" #include "ruby/internal/abi.h" From ddf96b7693639e354e95b4d0c6021586968a5a5f Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sat, 20 Aug 2022 18:35:36 -0700 Subject: [PATCH 185/546] Drop mswin support of MJIT (#6265) The current MJIT relies on SIGCHLD and fork(2) to be performant, and it's something mswin can't offer. You could run Linux MJIT on WSL instead. [Misc #18968] --- mjit.c | 334 +--------------------------------- mjit_compile.c | 3 - test/ruby/test_rubyoptions.rb | 3 +- tool/transform_mjit_header.rb | 14 +- win32/Makefile.sub | 111 +---------- win32/configure.bat | 12 -- win32/setup.mak | 1 - 7 files changed, 13 insertions(+), 465 deletions(-) diff --git a/mjit.c b/mjit.c index 7912f8fb8d5004..e519a3091da8e7 100644 --- a/mjit.c +++ b/mjit.c @@ -98,14 +98,9 @@ #include "insns_info.inc" #include "internal/compile.h" -#ifdef _WIN32 -#include -#include -#else #include #include #include -#endif #include #ifdef HAVE_FCNTL_H #include @@ -122,34 +117,11 @@ # define MAXPATHLEN 1024 #endif -#ifdef _WIN32 -#define dlopen(name,flag) ((void*)LoadLibrary(name)) -#define dlerror() strerror(rb_w32_map_errno(GetLastError())) -#define dlsym(handle,name) ((void*)GetProcAddress((handle),(name))) -#define dlclose(handle) (!FreeLibrary(handle)) -#define RTLD_NOW -1 - -#define waitpid(pid,stat_loc,options) (WaitForSingleObject((HANDLE)(pid), INFINITE), GetExitCodeProcess((HANDLE)(pid), (LPDWORD)(stat_loc)), CloseHandle((HANDLE)pid), (pid)) -#define WIFEXITED(S) ((S) != STILL_ACTIVE) -#define WEXITSTATUS(S) (S) -#define WIFSIGNALED(S) (0) -typedef intptr_t pid_t; -#endif - // Atomically set function pointer if possible. #define MJIT_ATOMIC_SET(var, val) (void)ATOMIC_PTR_EXCHANGE(var, val) #define MJIT_TMP_PREFIX "_ruby_mjit_" -// JIT compaction requires the header transformation because linking multiple .o files -// doesn't work without having `static` in the same function definitions. We currently -// don't support transforming the MJIT header on Windows. -#ifdef _WIN32 -# define USE_JIT_COMPACTION 0 -#else -# define USE_JIT_COMPACTION 1 -#endif - // Linked list of struct rb_mjit_unit. struct rb_mjit_unit_list { struct ccan_list_head head; @@ -237,15 +209,8 @@ static struct rb_mjit_unit *current_cc_unit = NULL; // PID of currently running C compiler process. 0 if nothing is running. static pid_t current_cc_pid = 0; // TODO: make this part of unit? -#ifndef _MSC_VER // Name of the header file. static char *header_file; -#endif - -#ifdef _WIN32 -// Linker option to enable libruby. -static char *libruby_pathflag; -#endif #include "mjit_config.h" @@ -261,7 +226,7 @@ static char *libruby_pathflag; // Use `-nodefaultlibs -nostdlib` for GCC where possible, which does not work on cygwin, AIX, and OpenBSD. // This seems to improve MJIT performance on GCC. -#if defined __GNUC__ && !defined __clang__ && !defined(_WIN32) && !defined(__CYGWIN__) && !defined(_AIX) && !defined(__OpenBSD__) +#if defined __GNUC__ && !defined __clang__ && !defined(__CYGWIN__) && !defined(_AIX) && !defined(__OpenBSD__) # define GCC_NOSTDLIB_FLAGS "-nodefaultlibs", "-nostdlib", #else # define GCC_NOSTDLIB_FLAGS // empty @@ -286,7 +251,7 @@ static const char *const CC_LINKER_ARGS[] = { }; static const char *const CC_LIBS[] = { -#if defined(_WIN32) || defined(__CYGWIN__) +#if defined(__CYGWIN__) MJIT_LIBS // mswin, cygwin #endif #if defined __GNUC__ && !defined __clang__ @@ -371,22 +336,6 @@ remove_file(const char *filename) } } -// Lazily delete .so files. -static void -clean_temp_files(struct rb_mjit_unit *unit) -{ -#if defined(_WIN32) - if (unit->so_file) { - char *so_file = unit->so_file; - - unit->so_file = NULL; - // unit->so_file is set only when mjit_opts.save_temps is false. - remove_file(so_file); - free(so_file); - } -#endif -} - // This is called in the following situations: // 1) On dequeue or `unload_units()`, associated ISeq is already GCed. // 2) The unit is not called often and unloaded by `unload_units()`. @@ -409,7 +358,6 @@ free_unit(struct rb_mjit_unit *unit) if (unit->handle && dlclose(unit->handle)) { // handle is NULL if it's in queue mjit_warning("failed to close handle for u%d: %s", unit->id, dlerror()); } - clean_temp_files(unit); free(unit); } @@ -556,22 +504,6 @@ start_process(const char *abspath, char *const *argv) } pid_t pid; -#ifdef _WIN32 - extern HANDLE rb_w32_start_process(const char *abspath, char *const *argv, int out_fd); - int out_fd = 0; - if (mjit_opts.verbose <= 1) { - // Discard cl.exe's outputs like: - // _ruby_mjit_p12u3.c - // Creating library C:.../_ruby_mjit_p12u3.lib and object C:.../_ruby_mjit_p12u3.exp - out_fd = dev_null; - } - - pid = (pid_t)rb_w32_start_process(abspath, argv, out_fd); - if (pid == 0) { - verbose(1, "MJIT: Failed to create process: %s", dlerror()); - return -1; - } -#else if ((pid = vfork()) == 0) { /* TODO: reuse some function in process.c */ umask(0077); if (mjit_opts.verbose == 0) { @@ -589,7 +521,6 @@ start_process(const char *abspath, char *const *argv) verbose(1, "MJIT: Error in execv: %s", abspath); _exit(1); } -#endif (void)close(dev_null); return pid; } @@ -629,14 +560,7 @@ exec_process(const char *path, char *const argv[]) static void remove_so_file(const char *so_file, struct rb_mjit_unit *unit) { -#if defined(_WIN32) - // Windows can't remove files while it's used. - unit->so_file = strdup(so_file); // lazily delete on `clean_temp_files()` - if (unit->so_file == NULL) - mjit_warning("failed to allocate memory to lazily remove '%s': %s", so_file, strerror(errno)); -#else remove_file(so_file); -#endif } // Print _mjitX, but make a human-readable funcname when --mjit-debug is used @@ -683,87 +607,6 @@ static const int c_file_access_mode = #define append_str(p, str) append_str2(p, str, sizeof(str)-1) #define append_lit(p, str) append_str2(p, str, rb_strlen_lit(str)) -#ifdef _MSC_VER -// Compile C file to so. It returns true if it succeeds. (mswin) -static bool -compile_c_to_so(const char *c_file, const char *so_file) -{ - const char *files[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, "-link", libruby_pathflag, NULL }; - char *p; - - // files[0] = "-Fe*.dll" - files[0] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fe") + strlen(so_file) + 1)); - p = append_lit(p, "-Fe"); - p = append_str2(p, so_file, strlen(so_file)); - *p = '\0'; - - // files[1] = "-Fo*.obj" - // We don't need .obj file, but it's somehow created to cwd without -Fo and we want to control the output directory. - files[1] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fo") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".obj") + 1)); - char *obj_file = p = append_lit(p, "-Fo"); - p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT)); - p = append_lit(p, ".obj"); - *p = '\0'; - - // files[2] = "-Yu*.pch" - files[2] = p = alloca(sizeof(char) * (rb_strlen_lit("-Yu") + strlen(pch_file) + 1)); - p = append_lit(p, "-Yu"); - p = append_str2(p, pch_file, strlen(pch_file)); - *p = '\0'; - - // files[3] = "C:/.../rb_mjit_header-*.obj" - files[3] = p = alloca(sizeof(char) * (strlen(pch_file) + 1)); - p = append_str2(p, pch_file, strlen(pch_file) - strlen(".pch")); - p = append_lit(p, ".obj"); - *p = '\0'; - - // files[4] = "-Tc*.c" - files[4] = p = alloca(sizeof(char) * (rb_strlen_lit("-Tc") + strlen(c_file) + 1)); - p = append_lit(p, "-Tc"); - p = append_str2(p, c_file, strlen(c_file)); - *p = '\0'; - - // files[5] = "-Fd*.pdb" - // Generate .pdb file in temporary directory instead of cwd. - files[5] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fd") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".pdb") + 1)); - p = append_lit(p, "-Fd"); - p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT)); - p = append_lit(p, ".pdb"); - *p = '\0'; - - // files[6] = "-Z7" - // Put this last to override any debug options that came previously. - files[6] = p = alloca(sizeof(char) * rb_strlen_lit("-Z7") + 1); - p = append_lit(p, "-Z7"); - *p = '\0'; - - char **args = form_args(5, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS, - files, CC_LIBS, CC_DLDFLAGS_ARGS); - if (args == NULL) - return false; - - int exit_code = exec_process(cc_path, args); - free(args); - - if (exit_code == 0) { - // remove never-used files (.obj, .lib, .exp, .pdb). XXX: Is there any way not to generate this? - if (!mjit_opts.save_temps) { - char *before_dot; - remove_file(obj_file); - - before_dot = obj_file + strlen(obj_file) - rb_strlen_lit(".obj"); - append_lit(before_dot, ".lib"); remove_file(obj_file); - append_lit(before_dot, ".exp"); remove_file(obj_file); - append_lit(before_dot, ".pdb"); remove_file(obj_file); - } - } - else { - verbose(2, "compile_c_to_so: compile error: %d", exit_code); - } - return exit_code == 0; -} -#else // _MSC_VER - // The function producing the pre-compiled header. static void make_pch(void) @@ -805,9 +648,6 @@ compile_c_to_so(const char *c_file, const char *so_file) { const char *so_args[] = { "-o", so_file, -# ifdef _WIN32 - libruby_pathflag, -# endif # ifdef __clang__ "-include-pch", pch_file, # endif @@ -832,9 +672,7 @@ compile_c_to_so(const char *c_file, const char *so_file) free(args); return exit_code; } -#endif // _MSC_VER -#if USE_JIT_COMPACTION static void compile_prelude(FILE *f); // Compile all JIT code into a single .c file @@ -961,7 +799,6 @@ load_compact_funcs_from_so(struct rb_mjit_unit *unit, char *c_file, char *so_fil } verbose(1, "JIT compaction (%.1fms): Compacted %d methods %s -> %s", end_time - current_cc_ms, active_units.length, c_file, so_file); } -#endif // USE_JIT_COMPACTION static void * load_func_from_so(const char *so_file, const char *funcname, struct rb_mjit_unit *unit) @@ -1015,11 +852,6 @@ compile_prelude(FILE *f) } fprintf(f, "\"\n"); #endif - -#ifdef _WIN32 - fprintf(f, "void _pei386_runtime_relocator(void){}\n"); - fprintf(f, "int __stdcall DllMainCRTStartup(void* hinstDLL, unsigned int fdwReason, void* lpvReserved) { return 1; }\n"); -#endif } // Compile ISeq in UNIT and return function pointer of JIT-ed code. @@ -1093,77 +925,6 @@ start_mjit_compile(struct rb_mjit_unit *unit) } } -#ifdef _WIN32 -// Compile ISeq in UNIT and return function pointer of JIT-ed code. -// It may return NOT_COMPILED_JIT_ISEQ_FUNC if something went wrong. -static mjit_func_t -convert_unit_to_func(struct rb_mjit_unit *unit) -{ - static const char c_ext[] = ".c"; - static const char so_ext[] = DLEXT; - char c_file[MAXPATHLEN], so_file[MAXPATHLEN], funcname[MAXPATHLEN]; - - sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext); - sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext); - sprint_funcname(funcname, unit); - - FILE *f; - int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600); - if (fd < 0 || (f = fdopen(fd, "w")) == NULL) { - int e = errno; - if (fd >= 0) (void)close(fd); - verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e)); - return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC; - } - - // print #include of MJIT header, etc. - compile_prelude(f); - - // To make MJIT worker thread-safe against GC.compact, copy ISeq values while `in_jit` is true. - long iseq_lineno = 0; - if (FIXNUM_P(ISEQ_BODY(unit->iseq)->location.first_lineno)) - // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG. - iseq_lineno = FIX2LONG(ISEQ_BODY(unit->iseq)->location.first_lineno); - char *iseq_label = alloca(RSTRING_LEN(ISEQ_BODY(unit->iseq)->location.label) + 1); - char *iseq_path = alloca(RSTRING_LEN(rb_iseq_path(unit->iseq)) + 1); - strcpy(iseq_label, RSTRING_PTR(ISEQ_BODY(unit->iseq)->location.label)); - strcpy(iseq_path, RSTRING_PTR(rb_iseq_path(unit->iseq))); - - verbose(2, "start compilation: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file); - fprintf(f, "/* %s@%s:%ld */\n\n", iseq_label, iseq_path, iseq_lineno); - bool success = mjit_compile(f, unit->iseq, funcname, unit->id); - - fclose(f); - if (!success) { - if (!mjit_opts.save_temps) - remove_file(c_file); - verbose(1, "JIT failure: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file); - return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC; - } - - double start_time = real_ms_time(); - success = compile_c_to_so(c_file, so_file); - if (!mjit_opts.save_temps) - remove_file(c_file); - double end_time = real_ms_time(); - - if (!success) { - verbose(2, "Failed to generate so: %s", so_file); - return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC; - } - - void *func = load_func_from_so(so_file, funcname, unit); - if (!mjit_opts.save_temps) - remove_so_file(so_file, unit); - - if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) { - verbose(1, "JIT success (%.1fms): %s@%s:%ld -> %s", - end_time - start_time, iseq_label, iseq_path, iseq_lineno, c_file); - } - return (mjit_func_t)func; -} -#endif - // Capture cc entries of `captured_iseq` and append them to `compiled_iseq->jit_unit->cc_entries`. // This is needed when `captured_iseq` is inlined by `compiled_iseq` and GC needs to mark inlined cc. // @@ -1412,7 +1173,6 @@ free_list(struct rb_mjit_unit_list *list, bool close_handle_p) if (unit->handle && dlclose(unit->handle)) { mjit_warning("failed to close handle for u%d: %s", unit->id, dlerror()); } - clean_temp_files(unit); free(unit); } else { @@ -1510,15 +1270,6 @@ check_unit_queue(void) struct rb_mjit_unit *unit = get_from_list(&unit_queue); if (unit == NULL) return; -#ifdef _WIN32 - // Synchronously compile methods on Windows. - // mswin: No SIGCHLD, MinGW: directly compiling .c to .so doesn't work - mjit_func_t func = convert_unit_to_func(unit); - MJIT_ATOMIC_SET(ISEQ_BODY(unit->iseq)->jit_func, func); - if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) { - add_to_list(unit, &active_units); - } -#else current_cc_ms = real_ms_time(); current_cc_unit = unit; current_cc_pid = start_mjit_compile(unit); @@ -1534,7 +1285,6 @@ check_unit_queue(void) if (mjit_opts.wait) { mjit_wait(unit->iseq->body); } -#endif } // Create unit for `iseq`. This function may be called from an MJIT worker. @@ -1561,7 +1311,6 @@ create_unit(const rb_iseq_t *iseq) static void check_compaction(void) { -#if USE_JIT_COMPACTION // Allow only `max_cache_size / 100` times (default: 100) of compaction. // Note: GC of compacted code has not been implemented yet. int max_compact_size = mjit_opts.max_cache_size / 100; @@ -1583,7 +1332,6 @@ check_compaction(void) // TODO: check -1 } } -#endif } // Check the current CC process if any, and start a next C compiler process as needed. @@ -1616,12 +1364,8 @@ mjit_notify_waitpid(int status) char so_file[MAXPATHLEN]; sprint_uniq_filename(so_file, (int)sizeof(so_file), current_cc_unit->id, MJIT_TMP_PREFIX, DLEXT); if (current_cc_unit->compact_p) { // Compact unit -#if USE_JIT_COMPACTION load_compact_funcs_from_so(current_cc_unit, c_file, so_file); current_cc_unit = NULL; -#else - RUBY_ASSERT(!current_cc_unit->compact_p); -#endif } else { // Normal unit // Load the function from so @@ -1833,16 +1577,6 @@ init_header_filename(void) const char *basedir = ""; size_t baselen = 0; char *p; -#ifdef _WIN32 - static const char libpathflag[] = -# ifdef _MSC_VER - "-LIBPATH:" -# else - "-L" -# endif - ; - const size_t libpathflag_len = sizeof(libpathflag) - 1; -#endif #ifdef LOAD_RELATIVE basedir_val = ruby_prefix_path; @@ -1884,7 +1618,6 @@ init_header_filename(void) } else #endif -#ifndef _MSC_VER { // A name of the header file included in any C file generated by MJIT for iseqs. static const char header_name[] = MJIT_HEADER_INSTALL_DIR "/" MJIT_MIN_HEADER_NAME; @@ -1904,56 +1637,15 @@ init_header_filename(void) } pch_file = get_uniq_filename(0, MJIT_TMP_PREFIX "h", ".h.gch"); -#else - { - static const char pch_name[] = MJIT_HEADER_INSTALL_DIR "/" MJIT_PRECOMPILED_HEADER_NAME; - const size_t pch_name_len = sizeof(pch_name) - 1; - - pch_file = xmalloc(baselen + pch_name_len + 1); - p = append_str2(pch_file, basedir, baselen); - p = append_str2(p, pch_name, pch_name_len + 1); - if ((fd = rb_cloexec_open(pch_file, O_RDONLY, 0)) < 0) { - verbose(1, "Cannot access precompiled header file: %s", pch_file); - xfree(pch_file); - pch_file = NULL; - return false; - } - (void)close(fd); - } -#endif - -#ifdef _WIN32 - basedir_val = ruby_archlibdir_path; - basedir = StringValuePtr(basedir_val); - baselen = RSTRING_LEN(basedir_val); - libruby_pathflag = p = xmalloc(libpathflag_len + baselen + 1); - p = append_str(p, libpathflag); - p = append_str2(p, basedir, baselen); - *p = '\0'; -#endif return true; } -#ifdef _WIN32 -UINT rb_w32_system_tmpdir(WCHAR *path, UINT len); -#endif - static char * system_default_tmpdir(void) { // c.f. ext/etc/etc.c:etc_systmpdir() -#ifdef _WIN32 - WCHAR tmppath[_MAX_PATH]; - UINT len = rb_w32_system_tmpdir(tmppath, numberof(tmppath)); - if (len) { - int blen = WideCharToMultiByte(CP_UTF8, 0, tmppath, len, NULL, 0, NULL, NULL); - char *tmpdir = xmalloc(blen + 1); - WideCharToMultiByte(CP_UTF8, 0, tmppath, len, tmpdir, blen, NULL, NULL); - tmpdir[blen] = '\0'; - return tmpdir; - } -#elif defined _CS_DARWIN_USER_TEMP_DIR +#if defined _CS_DARWIN_USER_TEMP_DIR char path[MAXPATHLEN]; size_t len = confstr(_CS_DARWIN_USER_TEMP_DIR, path, sizeof(path)); if (len > 0) { @@ -1981,19 +1673,17 @@ check_tmpdir(const char *dir) # define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) #endif if (!S_ISDIR(st.st_mode)) return FALSE; -#ifndef _WIN32 -# ifndef S_IWOTH +#ifndef S_IWOTH # define S_IWOTH 002 -# endif +#endif if (st.st_mode & S_IWOTH) { -# ifdef S_ISVTX +#ifdef S_ISVTX if (!(st.st_mode & S_ISVTX)) return FALSE; -# else +#else return FALSE; -# endif +#endif } if (access(dir, W_OK)) return FALSE; -#endif return TRUE; } @@ -2141,11 +1831,7 @@ mjit_init(const struct mjit_options *opts) mjit_opts.max_cache_size = MIN_CACHE_SIZE; // Initialize variables for compilation -#ifdef _MSC_VER - pch_status = PCH_SUCCESS; // has prebuilt precompiled header -#else pch_status = PCH_NOT_READY; -#endif cc_path = CC_COMMON_ARGS[0]; verbose(2, "MJIT: CC defaults to %s", cc_path); cc_common_args = xmalloc(sizeof(CC_COMMON_ARGS)); @@ -2189,10 +1875,8 @@ mjit_init(const struct mjit_options *opts) // Initialize worker thread start_worker(); -#ifndef _MSC_VER // TODO: Consider running C compiler asynchronously make_pch(); -#endif } static void @@ -2311,12 +1995,10 @@ mjit_finish(bool close_handle_p) mjit_dump_total_calls(); #endif -#ifndef _MSC_VER // mswin has prebuilt precompiled header if (!mjit_opts.save_temps && getpid() == pch_owner_pid) remove_file(pch_file); xfree(header_file); header_file = NULL; -#endif xfree((void *)cc_common_args); cc_common_args = NULL; for (char **flag = cc_added_args; *flag != NULL; flag++) xfree(*flag); diff --git a/mjit_compile.c b/mjit_compile.c index 1bf5beb6a32b84..e85eaaa6cb9058 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -587,9 +587,6 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname, int id) return false; } -#ifdef _WIN32 - fprintf(f, "__declspec(dllexport)\n"); -#endif fprintf(f, "VALUE\n%s(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp)\n{\n", funcname); bool success = mjit_compile_body(f, iseq, &status); fprintf(f, "\n} // end of %s\n", funcname); diff --git a/test/ruby/test_rubyoptions.rb b/test/ruby/test_rubyoptions.rb index ed2bc3538c63ba..cf6829cf8800b4 100644 --- a/test/ruby/test_rubyoptions.rb +++ b/test/ruby/test_rubyoptions.rb @@ -1128,8 +1128,7 @@ def test_null_script end def test_mjit_debug - # mswin uses prebuilt precompiled header. Thus it does not show a pch compilation log to check "-O0 -O1". - if JITSupport.supported? && !RUBY_PLATFORM.match?(/mswin/) + if JITSupport.supported? env = { 'MJIT_SEARCH_BUILD_DIR' => 'true' } assert_in_out_err([env, "--disable-yjit", "--mjit-debug=-O0 -O1", "--mjit-verbose=2", "" ], "", [], /-O0 -O1/) end diff --git a/tool/transform_mjit_header.rb b/tool/transform_mjit_header.rb index 8867c556f0dbaf..503e4c3593f8ca 100644 --- a/tool/transform_mjit_header.rb +++ b/tool/transform_mjit_header.rb @@ -169,10 +169,6 @@ def self.windows? RUBY_PLATFORM =~ /mswin|mingw|msys/ end - def self.cl_exe?(cc) - cc =~ /\Acl(\z| |\.exe)/ - end - # If code has macro which only supported compilers predefine, return true. def self.supported_header?(code) SUPPORTED_CC_MACROS.any? { |macro| code =~ /^#\s*define\s+#{Regexp.escape(macro)}\b/ } @@ -220,13 +216,9 @@ def self.with_code(code) cc = ARGV[0] code = File.binread(ARGV[1]) # Current version of the header file. outfile = ARGV[2] -if MJITHeader.cl_exe?(cc) - cflags = '-DMJIT_HEADER -Zs' -else - cflags = '-S -DMJIT_HEADER -fsyntax-only -Werror=implicit-function-declaration -Werror=implicit-int -Wfatal-errors' -end +cflags = '-S -DMJIT_HEADER -fsyntax-only -Werror=implicit-function-declaration -Werror=implicit-int -Wfatal-errors' -if !MJITHeader.cl_exe?(cc) && !MJITHeader.supported_header?(code) +if !MJITHeader.supported_header?(code) puts "This compiler (#{cc}) looks not supported for MJIT. Giving up to generate MJIT header." MJITHeader.write("#error MJIT does not support '#{cc}' yet", outfile) exit @@ -234,7 +226,7 @@ def self.with_code(code) MJITHeader.remove_predefined_macros!(code) -if MJITHeader.windows? # transformation is broken with Windows headers for now +if MJITHeader.windows? # transformation is broken on Windows and the platfor is not supported MJITHeader.remove_harmful_macros!(code) MJITHeader.check_code!(code, cc, cflags, 'initial') puts "\nSkipped transforming external functions to static on Windows." diff --git a/win32/Makefile.sub b/win32/Makefile.sub index bea24450ed0390..1aa27a91195211 100644 --- a/win32/Makefile.sub +++ b/win32/Makefile.sub @@ -319,33 +319,7 @@ CPPFLAGS = $(DEFS) $(ARCHDEFS) $(CPPFLAGS) CPPFLAGS = -DDISABLE_RUBYGEMS $(CPPFLAGS) !endif !ifndef MJIT_SUPPORT -MJIT_SUPPORT = yes -!endif -!if "$(CPPOUTFLAG)" == ">" -MJIT_HEADER_FLAGS = -!else -MJIT_HEADER_FLAGS = -P -!endif -MJIT_HEADER_SUFFIX = -MJIT_HEADER_ARCH = -MJIT_HEADER_INSTALL_DIR = include/$(RUBY_VERSION_NAME)/$(arch) -MJIT_PRECOMPILED_HEADER_NAME = rb_mjit_header-$(RUBY_PROGRAM_VERSION).pch -MJIT_PRECOMPILED_HEADER = $(MJIT_HEADER_INSTALL_DIR)/$(MJIT_PRECOMPILED_HEADER_NAME) -!ifndef MJIT_CC -MJIT_CC = $(CC) -!endif -!ifndef MJIT_OPTFLAGS -# TODO: Use only $(OPTFLAGS) for performance. It requires to modify flags for precompiled header too. -# For now, using flags used for building precompiled header to make JIT succeed. -MJIT_OPTFLAGS = -DMJIT_HEADER $(CFLAGS) $(XCFLAGS) $(CPPFLAGS) -!endif -!ifndef MJIT_DEBUGFLAGS -# TODO: Make this work... Another header for debug build needs to be installed first. -MJIT_DEBUGFLAGS = $(empty) $(DEBUGFLAGS) $(empty) -MJIT_DEBUGFLAGS = $(MJIT_DEBUGFLAGS: -Zi = -Z7 ) -!endif -!ifndef MJIT_LDSHARED -MJIT_LDSHARED = $(MJIT_CC) -LD +MJIT_SUPPORT = no !endif POSTLINK = @@ -918,11 +892,7 @@ $(CONFIG_H): $(MKFILES) $(srcdir)/win32/Makefile.sub $(win_srcdir)/Makefile.sub #define RUBY_COREDLL "$(RT)" #define RUBY_PLATFORM "$(arch)" #define RUBY_SITEARCH "$(sitearch)" -!if "$(MJIT_SUPPORT)" == "yes" -#define USE_MJIT 1 -!else #define USE_MJIT 0 -!endif #endif /* $(guard) */ << @@ -1338,42 +1308,6 @@ probes.h: {$(VPATH)}probes.dmyh #include "$(*F).dmyh" < $(NULL) || exit 0 - $(Q)$(RM) $(arch_hdrdir)/rb_mjit_header-*.pch - $(Q)$(RM) $(arch_hdrdir)/rb_mjit_header-*.$(OBJEXT) - -# Non-mswin environment is not using prebuilt precompiled header because upgrading compiler -# or changing compiler options may break MJIT so build (currently only by --mjit-debug though). -# -# But mswin is building precompiled header because cl.exe cannot leave macro after preprocess. -# As a workaround to use macro without installing many source files, it uses precompiled header -# without sufficient guard for a broken build. -# -# TODO: Fix the above issue by including VC version in header name, and create another header -# for --mjit-debug as well. -$(TIMESTAMPDIR)/$(MJIT_PRECOMPILED_HEADER_NAME:.pch=).time: probes.h vm.$(OBJEXT) - $(ECHO) building $(@F:.time=.pch) - $(Q) $(CC) -DMJIT_HEADER $(CFLAGS: -Zi = -Z7 ) $(XCFLAGS:-DRUBY_EXPORT =) -URUBY_EXPORT $(CPPFLAGS) $(srcdir)/vm.c -c -Yc \ - $(COUTFLAG)$(@F:.time=.)$(OBJEXT) -Fd$(@F:.time=.pdb) -Fp$(@F:.time=.pch).new -Z7 - $(Q) $(IFCHANGE) "--timestamp=$@" $(@F:.time=.pch) $(@F:.time=.pch).new - -$(MJIT_PRECOMPILED_HEADER_NAME): $(TIMESTAMPDIR)/$(MJIT_PRECOMPILED_HEADER_NAME:.pch=).time - -$(MJIT_PRECOMPILED_HEADER): $(MJIT_PRECOMPILED_HEADER_NAME) - $(Q) $(MAKEDIRS) $(MJIT_HEADER_INSTALL_DIR) - $(Q) $(MAKE_LINK) $(MJIT_PRECOMPILED_HEADER_NAME) $@ - $(Q) $(MAKE_LINK) $(MJIT_PRECOMPILED_HEADER_NAME:.pch=.)$(OBJEXT) $(MJIT_HEADER_INSTALL_DIR)/$(MJIT_PRECOMPILED_HEADER_NAME:.pch=.)$(OBJEXT) - $(Q) $(MAKEDIRS) $(arch_hdrdir) - $(Q) $(MAKE_LINK) $(MJIT_PRECOMPILED_HEADER_NAME) $(arch_hdrdir)/$(MJIT_PRECOMPILED_HEADER_NAME) - $(Q) $(MAKE_LINK) $(MJIT_PRECOMPILED_HEADER_NAME:.pch=.)$(OBJEXT) $(arch_hdrdir)/$(MJIT_PRECOMPILED_HEADER_NAME:.pch=.)$(OBJEXT) - INSNS = opt_sc.inc optinsn.inc optunifs.inc insns.inc insns_info.inc \ vmtc.inc vm.inc mjit_compile.inc @@ -1399,46 +1333,3 @@ loadpath: verconf.h @$(CPP) $(XCFLAGS) $(CPPFLAGS) $(srcdir)/loadpath.c | \ sed -e '1,/^const char ruby_initial_load_paths/d;/;/,$$d' \ -e '/^^ /!d;s/ *"\\\\0"$$//;s/" *"//g' - -mjit_config.h: $(MKFILES) $(srcdir)/win32/Makefile.sub $(win_srcdir)/Makefile.sub - @echo making <<$@ -#ifndef RUBY_MJIT_CONFIG_H -#define RUBY_MJIT_CONFIG_H 1 - -#define MJIT_CONFIG_ESCAPED_EQ "=" -#define MJIT_HEADER_INSTALL_DIR "/$(MJIT_HEADER_INSTALL_DIR)" -#define MJIT_MIN_HEADER_NAME "$(MJIT_MIN_HEADER_NAME)" -#define MJIT_PRECOMPILED_HEADER_NAME "$(MJIT_PRECOMPILED_HEADER_NAME)" -<> $@ - @echo /* MJIT_CC_COMMON */>> $@ - @ - @(set sep=#define MJIT_CFLAGS ) & \ - for %I in ($(RUNTIMEFLAG) $(ARCH_FLAG)) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_CFLAGS */>> $@ - @ - @(set sep=#define MJIT_OPTFLAGS ) & \ - for %I in ($(MJIT_OPTFLAGS:^==" MJIT_CONFIG_ESCAPED_EQ ")) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_OPTFLAGS */>> $@ - @ - @(set sep=#define MJIT_DEBUGFLAGS ) & \ - for %I in ($(MJIT_DEBUGFLAGS:^==" MJIT_CONFIG_ESCAPED_EQ ")) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_DEBUGFLAGS */>> $@ - @ - @(set sep=#define MJIT_LDSHARED ) & \ - for %I in ($(MJIT_LDSHARED)) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_LDSHARED */>> $@ - @ - @(set sep=#define MJIT_DLDFLAGS ) & \ - for %I in ($(DLDFLAGS)) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_DLDFLAGS */>> $@ - @ - @(set sep=#define MJIT_LIBS ) & \ - for %I in ($(LIBRUBYARG_SHARED)) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_LIBS */>> $@ - @ - @echo.>> $@ - @echo #endif /* RUBY_MJIT_CONFIG_H */>> $@ - @$(Q:@=: :) type $@ diff --git a/win32/configure.bat b/win32/configure.bat index 4602b41ec599a2..a3df0bb4eb8092 100755 --- a/win32/configure.bat +++ b/win32/configure.bat @@ -38,8 +38,6 @@ if "%1" == "--enable-devel" goto :enable-devel if "%1" == "--disable-devel" goto :disable-devel if "%1" == "--enable-rubygems" goto :enable-rubygems if "%1" == "--disable-rubygems" goto :disable-rubygems -if "%1" == "--enable-mjit-support" goto :enable-mjit-support -if "%1" == "--disable-mjit-support" goto :disable-mjit-support if "%1" == "--extout" goto :extout if "%1" == "--path" goto :path if "%1" == "--with-baseruby" goto :baseruby @@ -165,16 +163,6 @@ goto :loop ; echo>>confargs.tmp %1 \ shift goto :loop ; -:enable-mjit-support - echo>> ~tmp~.mak "MJIT_SUPPORT=yes" \ - echo>>confargs.tmp %1 \ - shift -goto :loop ; -:disable-mjit-support - echo>> ~tmp~.mak "MJIT_SUPPORT=no" \ - echo>>confargs.tmp %1 \ - shift -goto :loop ; :ntver echo>> ~tmp~.mak "NTVER=%~2" \ echo>>confargs.tmp %1=%2 \ diff --git a/win32/setup.mak b/win32/setup.mak index fd804a84cec6d5..17e321b98427a1 100644 --- a/win32/setup.mak +++ b/win32/setup.mak @@ -295,7 +295,6 @@ AS = $(AS) -nologo (echo AS = $(AS:64=) -nologo) || \ (echo AS = $(AS) -nologo) ) >>$(MAKEFILE) !endif - @(for %I in (cl.exe) do @set MJIT_CC=%~$$PATH:I) && (call echo MJIT_CC = "%MJIT_CC:\=/%" -nologo>>$(MAKEFILE)) @type << >>$(MAKEFILE) $(BANG)include $$(srcdir)/win32/Makefile.sub From b93d554be9d7882b7f0b3cfc0a6c925d20733001 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sat, 20 Aug 2022 18:36:51 -0700 Subject: [PATCH 186/546] Fix a typo [ci skip] --- tool/transform_mjit_header.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tool/transform_mjit_header.rb b/tool/transform_mjit_header.rb index 503e4c3593f8ca..21802368247039 100644 --- a/tool/transform_mjit_header.rb +++ b/tool/transform_mjit_header.rb @@ -226,7 +226,7 @@ def self.with_code(code) MJITHeader.remove_predefined_macros!(code) -if MJITHeader.windows? # transformation is broken on Windows and the platfor is not supported +if MJITHeader.windows? # transformation is broken on Windows and the platform is not supported MJITHeader.remove_harmful_macros!(code) MJITHeader.check_code!(code, cc, cflags, 'initial') puts "\nSkipped transforming external functions to static on Windows." From f6d4d73abd986fbd89918fa8bbbb82746ee08371 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 21 Aug 2022 14:04:52 +0900 Subject: [PATCH 187/546] Use `rb_fork` to suppress deprecated-declarations warnings --- common.mk | 1 + mjit.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/common.mk b/common.mk index e34139d4854b72..56d283599b62a3 100644 --- a/common.mk +++ b/common.mk @@ -9477,6 +9477,7 @@ mjit.$(OBJEXT): $(top_srcdir)/internal/file.h mjit.$(OBJEXT): $(top_srcdir)/internal/gc.h mjit.$(OBJEXT): $(top_srcdir)/internal/hash.h mjit.$(OBJEXT): $(top_srcdir)/internal/imemo.h +mjit.$(OBJEXT): $(top_srcdir)/internal/process.h mjit.$(OBJEXT): $(top_srcdir)/internal/serial.h mjit.$(OBJEXT): $(top_srcdir)/internal/static_assert.h mjit.$(OBJEXT): $(top_srcdir)/internal/vm.h diff --git a/mjit.c b/mjit.c index e519a3091da8e7..a873b3d9f84576 100644 --- a/mjit.c +++ b/mjit.c @@ -76,6 +76,7 @@ #include "internal/cont.h" #include "internal/file.h" #include "internal/hash.h" +#include "internal/process.h" #include "internal/warnings.h" #include "vm_sync.h" #include "ractor_core.h" @@ -747,7 +748,7 @@ start_mjit_compact(struct rb_mjit_unit *unit) rb_vm_t *vm = GET_VM(); rb_native_mutex_lock(&vm->waitpid_lock); - pid_t pid = fork(); + pid_t pid = rb_fork(); if (pid == 0) { rb_native_mutex_unlock(&vm->waitpid_lock); @@ -910,7 +911,7 @@ start_mjit_compile(struct rb_mjit_unit *unit) rb_vm_t *vm = GET_VM(); rb_native_mutex_lock(&vm->waitpid_lock); - pid_t pid = fork(); + pid_t pid = rb_fork(); if (pid == 0) { rb_native_mutex_unlock(&vm->waitpid_lock); From b87ddd7538220e9782274281356fd55bfd29078b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Fri, 19 Aug 2022 20:35:26 +0200 Subject: [PATCH 188/546] [rubygems/rubygems] Fix `bundle platform` crash when there's a lockfile with no Ruby locked https://github.com/rubygems/rubygems/commit/49fc54e87d --- lib/bundler/cli/platform.rb | 2 +- spec/bundler/commands/platform_spec.rb | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/bundler/cli/platform.rb b/lib/bundler/cli/platform.rb index 16d4e0145add4c..73da8cf80e45ef 100644 --- a/lib/bundler/cli/platform.rb +++ b/lib/bundler/cli/platform.rb @@ -9,7 +9,7 @@ def initialize(options) def run platforms, ruby_version = Bundler.ui.silence do - locked_ruby_version = Bundler.locked_gems && Bundler.locked_gems.ruby_version.gsub(/p\d+\Z/, "") + locked_ruby_version = Bundler.locked_gems && Bundler.locked_gems.ruby_version&.gsub(/p\d+\Z/, "") gemfile_ruby_version = Bundler.definition.ruby_version && Bundler.definition.ruby_version.single_version_string [Bundler.definition.platforms.map {|p| "* #{p}" }, locked_ruby_version || gemfile_ruby_version] diff --git a/spec/bundler/commands/platform_spec.rb b/spec/bundler/commands/platform_spec.rb index 0b964eac8cd255..4e8e3946fe14cc 100644 --- a/spec/bundler/commands/platform_spec.rb +++ b/spec/bundler/commands/platform_spec.rb @@ -234,6 +234,29 @@ expect(out).to eq("ruby 1.0.0") end + it "handles when there is a lockfile with no requirement" do + gemfile <<-G + source "#{file_uri_for(gem_repo1)}" + G + + lockfile <<-L + GEM + remote: #{file_uri_for(gem_repo1)}/ + specs: + + PLATFORMS + ruby + + DEPENDENCIES + + BUNDLED WITH + #{Bundler::VERSION} + L + + bundle "platform --ruby" + expect(out).to eq("No ruby version specified") + end + it "handles when there is a requirement in the gemfile" do gemfile <<-G source "#{file_uri_for(gem_repo1)}" From 560941e711bed8e8cdd0183b76d9e2057cce6806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Fri, 19 Aug 2022 14:03:43 +0200 Subject: [PATCH 189/546] [rubygems/rubygems] Fix edge case where `bundler/inline` unintentionally skips install If the application has the `no_install` setting set for `bundle package`, then `bundler/inline` would silently skip installing any gems. https://github.com/rubygems/rubygems/commit/7864f49b27 --- lib/bundler/inline.rb | 2 +- spec/bundler/runtime/inline_spec.rb | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/bundler/inline.rb b/lib/bundler/inline.rb index 8ef580f1f00ac4..25e055fbe4cc49 100644 --- a/lib/bundler/inline.rb +++ b/lib/bundler/inline.rb @@ -54,7 +54,7 @@ def definition.lock(*); end Bundler.ui = install ? ui : Bundler::UI::Silent.new if install || definition.missing_specs? - Bundler.settings.temporary(:inline => true) do + Bundler.settings.temporary(:inline => true, :no_install => false) do installer = Bundler::Installer.install(Bundler.root, definition, :system => true) installer.post_install_messages.each do |name, message| Bundler.ui.info "Post-install message from #{name}:\n#{message}" diff --git a/spec/bundler/runtime/inline_spec.rb b/spec/bundler/runtime/inline_spec.rb index dd22c86f90457a..e3cf5020ecdd99 100644 --- a/spec/bundler/runtime/inline_spec.rb +++ b/spec/bundler/runtime/inline_spec.rb @@ -355,6 +355,20 @@ def confirm(msg, newline = nil) expect(err).to be_empty end + it "still installs if the application has `bundle package` no_install config set" do + bundle "config set --local no_install true" + + script <<-RUBY + gemfile do + source "#{file_uri_for(gem_repo1)}" + gem "rack" + end + RUBY + + expect(last_command).to be_success + expect(system_gem_path("gems/rack-1.0.0")).to exist + end + it "preserves previous BUNDLE_GEMFILE value" do ENV["BUNDLE_GEMFILE"] = "" script <<-RUBY From 0cc989696e9a7d0386773d23aa64de8fc04ea9fe Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 1 Jul 2021 06:38:33 +0900 Subject: [PATCH 190/546] Add invalid hexadecimal float tests --- test/ruby/test_float.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/ruby/test_float.rb b/test/ruby/test_float.rb index 57a46fce921b99..fdc5d28ed7b95f 100644 --- a/test/ruby/test_float.rb +++ b/test/ruby/test_float.rb @@ -141,6 +141,9 @@ def test_strtod assert_raise(ArgumentError){Float("1__1")} assert_raise(ArgumentError){Float("1.")} assert_raise(ArgumentError){Float("1.e+00")} + assert_raise(ArgumentError){Float("0x.1")} + assert_raise(ArgumentError){Float("0x1.")} + assert_raise(ArgumentError){Float("0x1.0")} assert_raise(ArgumentError){Float("0x1.p+0")} # add expected behaviour here. assert_equal(10, Float("1_0")) From b043dd9c5dd7b5c46580e49ad38b49d5cb5beaf1 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 21 Aug 2022 20:06:39 +0900 Subject: [PATCH 191/546] Check the room including the space to fill --- sprintf.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sprintf.c b/sprintf.c index 22323265b3e56d..32a72439af0b6a 100644 --- a/sprintf.c +++ b/sprintf.c @@ -460,14 +460,16 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) blen += n; } else if ((flags & FMINUS)) { - CHECK(n); + --width; + CHECK(n + (width > 0 ? width : 0)); rb_enc_mbcput(c, &buf[blen], enc); blen += n; - if (width > 1) FILL(' ', width-1); + if (width > 0) FILL_(' ', width); } else { - if (width > 1) FILL(' ', width-1); - CHECK(n); + --width; + CHECK(n + (width > 0 ? width : 0)); + if (width > 0) FILL_(' ', width); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } @@ -512,16 +514,16 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) /* need to adjust multi-byte string pos */ if ((flags&FWIDTH) && (width > slen)) { width -= (int)slen; + CHECK(len + width); if (!(flags&FMINUS)) { - FILL(' ', width); + FILL_(' ', width); width = 0; } - CHECK(len); memcpy(&buf[blen], RSTRING_PTR(str), len); RB_GC_GUARD(str); blen += len; if (flags&FMINUS) { - FILL(' ', width); + FILL_(' ', width); } rb_enc_associate(result, enc); break; From 936327a51915d6a39086f65ea1b8e5c5c4ade78f Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Sun, 21 Aug 2022 11:36:36 -0500 Subject: [PATCH 192/546] [DOC] Enhanced RDoc for Time (#6255) Treats: #utc #hash #localtime --- doc/time/in.rdoc | 11 +++---- doc/time/zone_and_in.rdoc | 13 +++----- doc/timezone_specifiers.rdoc | 45 +++++++++++++++++++++++++++ time.c | 60 +++++++++++++++++++----------------- timev.rb | 4 +-- 5 files changed, 87 insertions(+), 46 deletions(-) create mode 100644 doc/timezone_specifiers.rdoc diff --git a/doc/time/in.rdoc b/doc/time/in.rdoc index 870982b0c2e4d7..33178c514af0c1 100644 --- a/doc/time/in.rdoc +++ b/doc/time/in.rdoc @@ -1,7 +1,4 @@ -- in: zone: a timezone _zone_, which may be: - - A string offset from UTC. - - A single letter offset from UTC, in the range 'A'..'Z', - 'J' excluded (the so-called military timezone). - - An integer number of seconds. - - A timezone object; - see {Timezone Argument}[#class-Time-label-Timezone+Argument] for details. +- in: zone: a timezone +zone+. + +For forms of argument +zone+, see +{Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. diff --git a/doc/time/zone_and_in.rdoc b/doc/time/zone_and_in.rdoc index 5bdfaacd4c4bb3..2cf6564c01f2b7 100644 --- a/doc/time/zone_and_in.rdoc +++ b/doc/time/zone_and_in.rdoc @@ -1,8 +1,5 @@ -- +zone+: a timezone, which may be: - - A string offset from UTC. - - A single letter offset from UTC, in the range 'A'..'Z', - 'J' excluded (the so-called military timezone). - - An integer number of seconds. - - A timezone object; - see {Timezone Argument}[#class-Time-label-Timezone+Argument] for details. -- in: zone: a timezone _zone_, which may be as above. +- +zone+: a timezone +zone+. +- in: zone: a timezone +zone+. + +For forms of +zone+, see +{Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. diff --git a/doc/timezone_specifiers.rdoc b/doc/timezone_specifiers.rdoc new file mode 100644 index 00000000000000..f1c23372b18890 --- /dev/null +++ b/doc/timezone_specifiers.rdoc @@ -0,0 +1,45 @@ +=== Timezone Specifiers + +Certain methods in class Time accept arguments that specify timezones: + +- Time.at: keyword argument +in:+. +- Time.new: positional argument +zone+ or keyword argument +in:+. +- Time.now: keyword argument +in:+. +- Time#localtime: positional argument +zone+. + +The value given with any of these must be one of the following: + +- A string offset from UTC in the form '+HH:MM' or -HH:MM, + where: + + - +HH+ is the 2-digit hour in the range 0..23. + - +MM+ is the 2-digit minute in the range 0..59. + + Examples: + + t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + Time.at(t, in: '-23:59') # => 1999-12-31 20:16:01 -2359 + Time.at(t, in: '+23:59') # => 2000-01-02 20:14:01 +2359 + +- A letter in the range 'A'..'I' or 'K'..'Z'; + see {List of military time zones}[https://en.wikipedia.org/wiki/List_of_military_time_zones]: + + t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + Time.at(t, in: 'A') # => 2000-01-01 21:15:01 +0100 + Time.at(t, in: 'I') # => 2000-01-02 05:15:01 +0900 + Time.at(t, in: 'K') # => 2000-01-02 06:15:01 +1000 + Time.at(t, in: 'Y') # => 2000-01-01 08:15:01 -1200 + Time.at(t, in: 'Z') # => 2000-01-01 20:15:01 UTC + +- An integer number of seconds in the range -86399..86399: + + t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + Time.at(t, in: -86399) # => 1999-12-31 20:15:02 -235959 + Time.at(t, in: 86399) # => 2000-01-02 20:15:00 +235959 + +-- +TODO: Pull in and revise the text at the link, +then add the example class TZ from the tests. +++ +- A timezone object; + see {Timezone Argument}[rdoc-ref:Time@Timezone+Argument] for details. diff --git a/time.c b/time.c index a7a4c5dc1a393e..e0cb7537ec12c5 100644 --- a/time.c +++ b/time.c @@ -3486,7 +3486,7 @@ time_to_f(VALUE time) /* * call-seq: - * time.to_r -> rational + * to_r -> rational * * Returns the value of +self+ as a Rational number of seconds * since the Epoch, which is exact: @@ -3540,7 +3540,7 @@ time_usec(VALUE time) /* * call-seq: - * time.nsec -> integer + * nsec -> integer * * Returns the number of nanoseconds in the subseconds part of +self+ * in the range (0..999_999_999); @@ -3659,20 +3659,20 @@ time_eql(VALUE time1, VALUE time2) /* * call-seq: - * time.utc? -> true or false - * time.gmt? -> true or false + * utc? -> true or false * - * Returns +true+ if _time_ represents a time in UTC (GMT). + * Returns +true+ if +self+ represents a time in UTC (GMT): * - * t = Time.now #=> 2007-11-19 08:15:23 -0600 - * t.utc? #=> false - * t = Time.gm(2000,"jan",1,20,15,1) #=> 2000-01-01 20:15:01 UTC - * t.utc? #=> true + * now = Time.now + * # => 2022-08-18 10:24:13.5398485 -0500 + * now.utc? # => false + * utc = Time.utc(2000, 1, 1, 20, 15, 1) + * # => 2000-01-01 20:15:01 UTC + * utc.utc? # => true * - * t = Time.now #=> 2007-11-19 08:16:03 -0600 - * t.gmt? #=> false - * t = Time.gm(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 UTC - * t.gmt? #=> true + * Time#gmt? is an alias for Time#utc?. + * + * Related: Time.utc. */ static VALUE @@ -3686,11 +3686,11 @@ time_utc_p(VALUE time) /* * call-seq: - * time.hash -> integer + * hash -> integer * - * Returns a hash code for this Time object. + * Returns the integer hash code for +self+. * - * See also Object#hash. + * Related: Object#hash. */ static VALUE @@ -3777,25 +3777,27 @@ time_zonelocal(VALUE time, VALUE off) /* * call-seq: - * time.localtime -> time - * time.localtime(utc_offset) -> time + * localtime -> self or new_time + * localtime(zone) -> new_time * - * Converts _time_ to local time (using the local time zone in - * effect at the creation time of _time_) modifying the receiver. + * With no argument given: * - * If +utc_offset+ is given, it is used instead of the local time. + * - Returns +self+ if +self+ is a local time. + * - Otherwise returns a new \Time in the user's local timezone: + * + * t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + * t.localtime # => 2000-01-01 14:15:01 -0600 * - * t = Time.utc(2000, "jan", 1, 20, 15, 1) #=> 2000-01-01 20:15:01 UTC - * t.utc? #=> true + * With argument +zone+ given, + * returns the new \Time object created by converting + * +self+ to the given time zone: * - * t.localtime #=> 2000-01-01 14:15:01 -0600 - * t.utc? #=> false + * t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + * t.localtime("-09:00") # => 2000-01-01 11:15:01 -0900 * - * t.localtime("+09:00") #=> 2000-01-02 05:15:01 +0900 - * t.utc? #=> false + * For forms of argument +zone+, see + * {Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. * - * If +utc_offset+ is not given and _time_ is local time, just returns - * the receiver. */ static VALUE diff --git a/timev.rb b/timev.rb index a7e70b290f4afb..1fd8295d0fb8a6 100644 --- a/timev.rb +++ b/timev.rb @@ -217,8 +217,8 @@ class Time # Time.now # => 2009-06-24 12:39:54 +0900 # Time.now(in: '+04:00') # => 2009-06-24 07:39:54 +0400 # - # Parameter: - # :include: doc/time/in.rdoc + # For forms of argument +zone+, see + # {Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. def self.now(in: nil) Primitive.time_s_now(Primitive.arg!(:in)) end From 73f8027ac68af2cbcc23407605a40025ca07b34e Mon Sep 17 00:00:00 2001 From: git Date: Mon, 22 Aug 2022 01:36:54 +0900 Subject: [PATCH 193/546] * 2022-08-22 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index ee9df020b00463..f8802b235cd6df 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 21 +#define RUBY_RELEASE_DAY 22 #include "ruby/version.h" #include "ruby/internal/abi.h" From a60507f616a7f5beb4aef99aa56f6b964c16880d Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sun, 21 Aug 2022 11:30:32 -0700 Subject: [PATCH 194/546] Rename mjit_compile.c to mjit_compiler.c I'm planning to introduce mjit_compiler.rb, and I want to make this consistent with it. Consistency with compile.c doesn't seem important for MJIT anyway. --- .../lib/benchmark_driver/runner/mjit_exec.rb | 2 +- common.mk | 406 +++++++++--------- mjit_compile.c => mjit_compiler.c | 2 +- tool/ruby_vm/views/mjit_compile.inc.erb | 2 +- 4 files changed, 206 insertions(+), 206 deletions(-) rename mjit_compile.c => mjit_compiler.c (99%) diff --git a/benchmark/lib/benchmark_driver/runner/mjit_exec.rb b/benchmark/lib/benchmark_driver/runner/mjit_exec.rb index 121791eb2bf13f..63f5d472b2dd50 100644 --- a/benchmark/lib/benchmark_driver/runner/mjit_exec.rb +++ b/benchmark/lib/benchmark_driver/runner/mjit_exec.rb @@ -210,7 +210,7 @@ def a<%= i %> # You may need to: # * Increase `JIT_ISEQ_SIZE_THRESHOLD` to 10000000 in mjit.h - # * Always return false in `inlinable_iseq_p()` of mjit_compile.c + # * Always return false in `inlinable_iseq_p()` of mjit_compiler.c def jit t = Process.clock_gettime(Process::CLOCK_MONOTONIC) i = 0 diff --git a/common.mk b/common.mk index 56d283599b62a3..c8434b9b7b699c 100644 --- a/common.mk +++ b/common.mk @@ -113,7 +113,7 @@ COMMONOBJS = array.$(OBJEXT) \ math.$(OBJEXT) \ memory_view.$(OBJEXT) \ mjit.$(OBJEXT) \ - mjit_compile.$(OBJEXT) \ + mjit_compiler.$(OBJEXT) \ node.$(OBJEXT) \ numeric.$(OBJEXT) \ object.$(OBJEXT) \ @@ -9686,208 +9686,208 @@ mjit.$(OBJEXT): {$(VPATH)}vm_debug.h mjit.$(OBJEXT): {$(VPATH)}vm_opts.h mjit.$(OBJEXT): {$(VPATH)}vm_sync.h mjit.$(OBJEXT): {$(VPATH)}yjit.h -mjit_compile.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h -mjit_compile.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h -mjit_compile.$(OBJEXT): $(CCAN_DIR)/list/list.h -mjit_compile.$(OBJEXT): $(CCAN_DIR)/str/str.h -mjit_compile.$(OBJEXT): $(hdrdir)/ruby.h -mjit_compile.$(OBJEXT): $(hdrdir)/ruby/ruby.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/array.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/class.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/compile.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/compilers.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/gc.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/hash.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/imemo.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/object.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/serial.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/static_assert.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/variable.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/vm.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/warnings.h -mjit_compile.$(OBJEXT): {$(VPATH)}assert.h -mjit_compile.$(OBJEXT): {$(VPATH)}atomic.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/assume.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/attributes.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/bool.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/gcc_version_since.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/inttypes.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/limits.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/long_long.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h -mjit_compile.$(OBJEXT): {$(VPATH)}builtin.h -mjit_compile.$(OBJEXT): {$(VPATH)}config.h -mjit_compile.$(OBJEXT): {$(VPATH)}constant.h -mjit_compile.$(OBJEXT): {$(VPATH)}debug_counter.h -mjit_compile.$(OBJEXT): {$(VPATH)}defines.h -mjit_compile.$(OBJEXT): {$(VPATH)}id.h -mjit_compile.$(OBJEXT): {$(VPATH)}id_table.h -mjit_compile.$(OBJEXT): {$(VPATH)}insns.def -mjit_compile.$(OBJEXT): {$(VPATH)}insns.inc -mjit_compile.$(OBJEXT): {$(VPATH)}insns_info.inc -mjit_compile.$(OBJEXT): {$(VPATH)}intern.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/abi.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/anyargs.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/char.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/double.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/fixnum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/gid_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/int.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/intptr_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/long.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/long_long.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/mode_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/off_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/pid_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/short.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/size_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/st_data_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/uid_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/assume.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/alloc_size.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/artificial.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/cold.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/const.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/constexpr.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/deprecated.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/diagnose_if.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/enum_extensibility.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/error.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/flag_enum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/forceinline.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/format.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/maybe_unused.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/noalias.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/nodiscard.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/noexcept.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/noinline.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/nonnull.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/noreturn.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/pure.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/restrict.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/returns_nonnull.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/warning.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/weakref.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/cast.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/apple.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/clang.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/gcc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/intel.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/msvc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/sunpro.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_since.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/config.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/constant_p.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rarray.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rbasic.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rbignum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rclass.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rdata.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rfile.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rhash.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/robject.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rstring.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rstruct.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rtypeddata.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/ctype.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/dllexport.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/dosish.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/error.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/eval.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/event.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/fl_type.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/gc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/glob.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/globals.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/attribute.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/builtin.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/c_attribute.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/cpp_attribute.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/declspec_attribute.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/extension.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/feature.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/warning.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/array.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/bignum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/class.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/compar.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/complex.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/cont.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/dir.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/enum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/enumerator.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/error.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/eval.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/file.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/gc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/hash.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/io.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/load.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/marshal.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/numeric.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/object.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/parse.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/proc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/process.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/random.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/range.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/rational.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/re.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/ruby.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/select.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/select/largesize.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/signal.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/sprintf.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/string.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/struct.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/thread.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/time.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/variable.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/vm.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/interpreter.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/iterator.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/memory.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/method.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/module.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/newobj.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/rgengc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/scan_args.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/special_consts.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/static_assert.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/stdalign.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/stdbool.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/symbol.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/value.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/value_type.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/variable.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/warning_push.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/xmalloc.h -mjit_compile.$(OBJEXT): {$(VPATH)}iseq.h -mjit_compile.$(OBJEXT): {$(VPATH)}method.h -mjit_compile.$(OBJEXT): {$(VPATH)}missing.h -mjit_compile.$(OBJEXT): {$(VPATH)}mjit.h -mjit_compile.$(OBJEXT): {$(VPATH)}mjit_compile.c -mjit_compile.$(OBJEXT): {$(VPATH)}mjit_compile.inc -mjit_compile.$(OBJEXT): {$(VPATH)}mjit_unit.h -mjit_compile.$(OBJEXT): {$(VPATH)}node.h -mjit_compile.$(OBJEXT): {$(VPATH)}ruby_assert.h -mjit_compile.$(OBJEXT): {$(VPATH)}ruby_atomic.h -mjit_compile.$(OBJEXT): {$(VPATH)}st.h -mjit_compile.$(OBJEXT): {$(VPATH)}subst.h -mjit_compile.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h -mjit_compile.$(OBJEXT): {$(VPATH)}thread_native.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_callinfo.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_core.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_exec.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_insnhelper.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_opts.h -mjit_compile.$(OBJEXT): {$(VPATH)}yjit.h +mjit_compiler.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h +mjit_compiler.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h +mjit_compiler.$(OBJEXT): $(CCAN_DIR)/list/list.h +mjit_compiler.$(OBJEXT): $(CCAN_DIR)/str/str.h +mjit_compiler.$(OBJEXT): $(hdrdir)/ruby.h +mjit_compiler.$(OBJEXT): $(hdrdir)/ruby/ruby.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/array.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/class.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/compile.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/compilers.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/gc.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/hash.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/imemo.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/object.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/serial.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/static_assert.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/variable.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/vm.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/warnings.h +mjit_compiler.$(OBJEXT): {$(VPATH)}assert.h +mjit_compiler.$(OBJEXT): {$(VPATH)}atomic.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/assume.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/attributes.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/bool.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/gcc_version_since.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/inttypes.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/limits.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/long_long.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h +mjit_compiler.$(OBJEXT): {$(VPATH)}builtin.h +mjit_compiler.$(OBJEXT): {$(VPATH)}config.h +mjit_compiler.$(OBJEXT): {$(VPATH)}constant.h +mjit_compiler.$(OBJEXT): {$(VPATH)}debug_counter.h +mjit_compiler.$(OBJEXT): {$(VPATH)}defines.h +mjit_compiler.$(OBJEXT): {$(VPATH)}id.h +mjit_compiler.$(OBJEXT): {$(VPATH)}id_table.h +mjit_compiler.$(OBJEXT): {$(VPATH)}insns.def +mjit_compiler.$(OBJEXT): {$(VPATH)}insns.inc +mjit_compiler.$(OBJEXT): {$(VPATH)}insns_info.inc +mjit_compiler.$(OBJEXT): {$(VPATH)}intern.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/abi.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/anyargs.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/char.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/double.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/fixnum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/gid_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/int.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/intptr_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/long.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/long_long.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/mode_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/off_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/pid_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/short.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/size_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/st_data_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/uid_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/assume.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/alloc_size.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/artificial.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/cold.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/const.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/constexpr.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/deprecated.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/diagnose_if.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/enum_extensibility.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/error.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/flag_enum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/forceinline.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/format.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/maybe_unused.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/noalias.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/nodiscard.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/noexcept.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/noinline.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/nonnull.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/noreturn.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/pure.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/restrict.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/returns_nonnull.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/warning.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/weakref.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/cast.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/apple.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/clang.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/gcc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/intel.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/msvc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/sunpro.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_since.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/config.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/constant_p.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rarray.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rbasic.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rbignum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rclass.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rdata.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rfile.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rhash.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/robject.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rstring.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rstruct.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rtypeddata.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/ctype.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/dllexport.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/dosish.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/error.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/eval.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/event.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/fl_type.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/gc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/glob.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/globals.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/attribute.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/builtin.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/c_attribute.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/cpp_attribute.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/declspec_attribute.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/extension.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/feature.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/warning.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/array.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/bignum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/class.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/compar.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/complex.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/cont.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/dir.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/enum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/enumerator.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/error.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/eval.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/file.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/gc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/hash.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/io.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/load.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/marshal.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/numeric.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/object.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/parse.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/proc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/process.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/random.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/range.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/rational.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/re.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/ruby.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/select.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/select/largesize.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/signal.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/sprintf.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/string.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/struct.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/thread.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/time.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/variable.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/vm.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/interpreter.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/iterator.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/memory.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/method.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/module.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/newobj.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/rgengc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/scan_args.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/special_consts.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/static_assert.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/stdalign.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/stdbool.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/symbol.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/value.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/value_type.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/variable.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/warning_push.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/xmalloc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}iseq.h +mjit_compiler.$(OBJEXT): {$(VPATH)}method.h +mjit_compiler.$(OBJEXT): {$(VPATH)}missing.h +mjit_compiler.$(OBJEXT): {$(VPATH)}mjit.h +mjit_compiler.$(OBJEXT): {$(VPATH)}mjit_compile.inc +mjit_compiler.$(OBJEXT): {$(VPATH)}mjit_compiler.c +mjit_compiler.$(OBJEXT): {$(VPATH)}mjit_unit.h +mjit_compiler.$(OBJEXT): {$(VPATH)}node.h +mjit_compiler.$(OBJEXT): {$(VPATH)}ruby_assert.h +mjit_compiler.$(OBJEXT): {$(VPATH)}ruby_atomic.h +mjit_compiler.$(OBJEXT): {$(VPATH)}st.h +mjit_compiler.$(OBJEXT): {$(VPATH)}subst.h +mjit_compiler.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h +mjit_compiler.$(OBJEXT): {$(VPATH)}thread_native.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_callinfo.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_core.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_exec.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_insnhelper.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_opts.h +mjit_compiler.$(OBJEXT): {$(VPATH)}yjit.h node.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h node.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h node.$(OBJEXT): $(CCAN_DIR)/list/list.h diff --git a/mjit_compile.c b/mjit_compiler.c similarity index 99% rename from mjit_compile.c rename to mjit_compiler.c index e85eaaa6cb9058..c02397e53ce1c5 100644 --- a/mjit_compile.c +++ b/mjit_compiler.c @@ -1,6 +1,6 @@ /********************************************************************** - mjit_compile.c - MRI method JIT compiler + mjit_compiler.c - MRI method JIT compiler Copyright (C) 2017 Takashi Kokubun . diff --git a/tool/ruby_vm/views/mjit_compile.inc.erb b/tool/ruby_vm/views/mjit_compile.inc.erb index 5820f81770265f..0e66f78007db8f 100644 --- a/tool/ruby_vm/views/mjit_compile.inc.erb +++ b/tool/ruby_vm/views/mjit_compile.inc.erb @@ -11,7 +11,7 @@ % # This is an ERB template that generates Ruby code that generates C code that % # generates JIT-ed C code. <%= render 'notice', locals: { - this_file: 'is the main part of compile_insn() in mjit_compile.c', + this_file: 'is the main part of compile_insn() in mjit_compiler.c', edit: __FILE__, } -%> % From 9f3140a42e3542dce565a27135dceeb135a4e691 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sun, 21 Aug 2022 11:34:20 -0700 Subject: [PATCH 195/546] Remove mjit_exec benchmarks Now that mjit_exec doesn't exist, those files feel old. I'll probably change how I benchmark it when I add benchmarks for it again. --- .../lib/benchmark_driver/runner/mjit_exec.rb | 237 ------------------ benchmark/mjit_exec_jt2jt.yml | 6 - benchmark/mjit_exec_vm2jt.yml | 6 - benchmark/mjit_exec_vm2vm.yml | 6 - 4 files changed, 255 deletions(-) delete mode 100644 benchmark/lib/benchmark_driver/runner/mjit_exec.rb delete mode 100644 benchmark/mjit_exec_jt2jt.yml delete mode 100644 benchmark/mjit_exec_vm2jt.yml delete mode 100644 benchmark/mjit_exec_vm2vm.yml diff --git a/benchmark/lib/benchmark_driver/runner/mjit_exec.rb b/benchmark/lib/benchmark_driver/runner/mjit_exec.rb deleted file mode 100644 index 63f5d472b2dd50..00000000000000 --- a/benchmark/lib/benchmark_driver/runner/mjit_exec.rb +++ /dev/null @@ -1,237 +0,0 @@ -require 'benchmark_driver/struct' -require 'benchmark_driver/metric' -require 'erb' - -# A special runner dedicated for measuring jit_exec overhead. -class BenchmarkDriver::Runner::MjitExec - METRIC = BenchmarkDriver::Metric.new(name: 'Iteration per second', unit: 'i/s') - - # JobParser returns this, `BenchmarkDriver::Runner.runner_for` searches "*::Job" - Job = ::BenchmarkDriver::Struct.new( - :name, # @param [String] name - This is mandatory for all runner - :metrics, # @param [Array] - :num_methods, # @param [Integer] num_methods - The number of methods to be defined - :loop_count, # @param [Integer] loop_count - :from_jit, # @param [TrueClass,FalseClass] from_jit - Whether the jit_exec() is from JIT or not - :to_jit, # @param [TrueClass,FalseClass] to_jit - Whether the jit_exec() is to JIT or not - ) - # Dynamically fetched and used by `BenchmarkDriver::JobParser.parse` - class << JobParser = Module.new - # @param [Array,String] num_methods - # @param [Integer] loop_count - # @param [TrueClass,FalseClass] from_jit - # @param [TrueClass,FalseClass] to_jit - def parse(num_methods:, loop_count:, from_jit:, to_jit:) - if num_methods.is_a?(String) - num_methods = eval(num_methods) - end - - num_methods.map do |num| - if num_methods.size > 1 - suffix = "[#{'%4d' % num}]" - else - suffix = "_#{num}" - end - Job.new( - name: "mjit_exec_#{from_jit ? 'JT' : 'VM'}2#{to_jit ? 'JT' : 'VM'}#{suffix}", - metrics: [METRIC], - num_methods: num, - loop_count: loop_count, - from_jit: from_jit, - to_jit: to_jit, - ) - end - end - end - - # @param [BenchmarkDriver::Config::RunnerConfig] config - # @param [BenchmarkDriver::Output] output - # @param [BenchmarkDriver::Context] contexts - def initialize(config:, output:, contexts:) - @config = config - @output = output - @contexts = contexts - end - - # This method is dynamically called by `BenchmarkDriver::JobRunner.run` - # @param [Array] jobs - def run(jobs) - @output.with_benchmark do - jobs.each do |job| - @output.with_job(name: job.name) do - @contexts.each do |context| - result = BenchmarkDriver::Repeater.with_repeat(config: @config, larger_better: true, rest_on_average: :average) do - run_benchmark(job, context: context) - end - value, duration = result.value - @output.with_context(name: context.name, executable: context.executable, gems: context.gems, prelude: context.prelude) do - @output.report(values: { METRIC => value }, duration: duration, loop_count: job.loop_count) - end - end - end - end - end - end - - private - - # @param [BenchmarkDriver::Runner::Ips::Job] job - loop_count is not nil - # @param [BenchmarkDriver::Context] context - # @return [BenchmarkDriver::Metrics] - def run_benchmark(job, context:) - if job.from_jit - if job.to_jit - benchmark = BenchmarkJT2JT.new(num_methods: job.num_methods, loop_count: job.loop_count) - else - raise NotImplementedError, "JT2VM is not implemented yet" - end - else - if job.to_jit - benchmark = BenchmarkVM2JT.new(num_methods: job.num_methods, loop_count: job.loop_count) - else - benchmark = BenchmarkVM2VM.new(num_methods: job.num_methods, loop_count: job.loop_count) - end - end - - duration = Tempfile.open(['benchmark_driver-result', '.txt']) do |f| - with_script(benchmark.render(result: f.path)) do |path| - opt = [] - if context.executable.command.any? { |c| c.start_with?('--jit') } - opt << '--jit-min-calls=2' - end - IO.popen([*context.executable.command, '--disable-gems', *opt, path], &:read) - if $?.success? - Float(f.read) - else - BenchmarkDriver::Result::ERROR - end - end - end - - [job.loop_count.to_f / duration, duration] - end - - def with_script(script) - if @config.verbose >= 2 - sep = '-' * 30 - $stdout.puts "\n\n#{sep}[Script begin]#{sep}\n#{script}#{sep}[Script end]#{sep}\n\n" - end - - Tempfile.open(['benchmark_driver-', '.rb']) do |f| - f.puts script - f.close - return yield(f.path) - end - end - - # @param [Integer] num_methods - # @param [Integer] loop_count - BenchmarkVM2VM = ::BenchmarkDriver::Struct.new(:num_methods, :loop_count) do - # @param [String] result - A file to write result - def render(result:) - ERB.new(<<~EOS, trim_mode: '%').result(binding) - % num_methods.times do |i| - def a<%= i %> - nil - end - % end - RubyVM::MJIT.pause if defined?(RubyVM::MJIT) && RubyVM::MJIT.enabled? - - def vm - t = Process.clock_gettime(Process::CLOCK_MONOTONIC) - i = 0 - while i < <%= loop_count / 1000 %> - % 1000.times do |i| - a<%= i % num_methods %> - % end - i += 1 - end - % (loop_count % 1000).times do |i| - a<%= i % num_methods %> - % end - Process.clock_gettime(Process::CLOCK_MONOTONIC) - t - end - - vm # warmup call cache - File.write(<%= result.dump %>, vm) - EOS - end - end - private_constant :BenchmarkVM2VM - - # @param [Integer] num_methods - # @param [Integer] loop_count - BenchmarkVM2JT = ::BenchmarkDriver::Struct.new(:num_methods, :loop_count) do - # @param [String] result - A file to write result - def render(result:) - ERB.new(<<~EOS, trim_mode: '%').result(binding) - % num_methods.times do |i| - def a<%= i %> - nil - end - a<%= i %> - a<%= i %> # --jit-min-calls=2 - % end - RubyVM::MJIT.pause if defined?(RubyVM::MJIT) && RubyVM::MJIT.enabled? - - def vm - t = Process.clock_gettime(Process::CLOCK_MONOTONIC) - i = 0 - while i < <%= loop_count / 1000 %> - % 1000.times do |i| - a<%= i % num_methods %> - % end - i += 1 - end - % (loop_count % 1000).times do |i| - a<%= i % num_methods %> - % end - Process.clock_gettime(Process::CLOCK_MONOTONIC) - t - end - - vm # warmup call cache - File.write(<%= result.dump %>, vm) - EOS - end - end - private_constant :BenchmarkVM2JT - - # @param [Integer] num_methods - # @param [Integer] loop_count - BenchmarkJT2JT = ::BenchmarkDriver::Struct.new(:num_methods, :loop_count) do - # @param [String] result - A file to write result - def render(result:) - ERB.new(<<~EOS, trim_mode: '%').result(binding) - % num_methods.times do |i| - def a<%= i %> - nil - end - % end - - # You may need to: - # * Increase `JIT_ISEQ_SIZE_THRESHOLD` to 10000000 in mjit.h - # * Always return false in `inlinable_iseq_p()` of mjit_compiler.c - def jit - t = Process.clock_gettime(Process::CLOCK_MONOTONIC) - i = 0 - while i < <%= loop_count / 1000 %> - % 1000.times do |i| - a<%= i % num_methods %> - % end - i += 1 - end - % (loop_count % 1000).times do |i| - a<%= i % num_methods %> - % end - Process.clock_gettime(Process::CLOCK_MONOTONIC) - t - end - - jit - jit - RubyVM::MJIT.pause if defined?(RubyVM::MJIT) && RubyVM::MJIT.enabled? - File.write(<%= result.dump %>, jit) - EOS - end - end - private_constant :BenchmarkJT2JT -end diff --git a/benchmark/mjit_exec_jt2jt.yml b/benchmark/mjit_exec_jt2jt.yml deleted file mode 100644 index 6c303c7a44a03b..00000000000000 --- a/benchmark/mjit_exec_jt2jt.yml +++ /dev/null @@ -1,6 +0,0 @@ -type: lib/benchmark_driver/runner/mjit_exec -num_methods: [1] -#num_methods: (1..100).to_a + [200, 300, 400, 500, 600, 700, 800, 900, 1000] -loop_count: 50000000 -from_jit: true -to_jit: true diff --git a/benchmark/mjit_exec_vm2jt.yml b/benchmark/mjit_exec_vm2jt.yml deleted file mode 100644 index 764883f070f5ea..00000000000000 --- a/benchmark/mjit_exec_vm2jt.yml +++ /dev/null @@ -1,6 +0,0 @@ -type: lib/benchmark_driver/runner/mjit_exec -num_methods: [1] -#num_methods: (1..100).to_a + [200, 300, 400, 500, 600, 700, 800, 900, 1000] -loop_count: 50000000 -from_jit: false -to_jit: true diff --git a/benchmark/mjit_exec_vm2vm.yml b/benchmark/mjit_exec_vm2vm.yml deleted file mode 100644 index 030aa76c1cd963..00000000000000 --- a/benchmark/mjit_exec_vm2vm.yml +++ /dev/null @@ -1,6 +0,0 @@ -type: lib/benchmark_driver/runner/mjit_exec -num_methods: [1] -#num_methods: (1..100).to_a + [200, 300, 400, 500, 600, 700, 800, 900, 1000] -loop_count: 50000000 -from_jit: false -to_jit: false From 70f69f85395f5735429cd45136d7de2742f08b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Mon, 1 Aug 2022 20:35:37 +0200 Subject: [PATCH 196/546] [ruby/fileutils] Fix mkdir_p hanging on Windows when trying to create a file on a offline drive https://github.com/ruby/fileutils/commit/9cc6a082d7 --- lib/fileutils.rb | 2 +- test/fileutils/test_fileutils.rb | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/fileutils.rb b/lib/fileutils.rb index 7eb66dda0c83a3..8ae52668646b45 100644 --- a/lib/fileutils.rb +++ b/lib/fileutils.rb @@ -374,7 +374,7 @@ def mkdir_p(list, mode: nil, noop: nil, verbose: nil) path = remove_trailing_slash(item) stack = [] - until File.directory?(path) + until File.directory?(path) || File.dirname(path) == path stack.push path path = File.dirname(path) end diff --git a/test/fileutils/test_fileutils.rb b/test/fileutils/test_fileutils.rb index e1e2a829c34e44..4afc8085ef2dc7 100644 --- a/test/fileutils/test_fileutils.rb +++ b/test/fileutils/test_fileutils.rb @@ -1098,6 +1098,14 @@ def test_mkdir_p_root ensure Dir.rmdir(drive) if drive and File.directory?(drive) end + + def test_mkdir_p_offline_drive + offline_drive = ("A".."Z").to_a.reverse.find {|d| !File.exist?("#{d}:/") } + + assert_raise(Errno::ENOENT) { + mkdir_p "#{offline_drive}:/new_dir" + } + end end def test_mkdir_p_file_perm From 3d6baad7fac8c73cef8c82fe946757cf289c704c Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 22 Aug 2022 11:40:38 +0900 Subject: [PATCH 197/546] Added syntax_suggest entry to NEWS --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index d7ce50f399e1a5..8536fc5c8aa611 100644 --- a/NEWS.md +++ b/NEWS.md @@ -156,6 +156,11 @@ Note: We're only listing outstanding class updates. ## Stdlib updates +* SyntaxSuggest + + * The feature of `syntax_suggest` formerly `dead_end` is integrated in Ruby. + [[Feature #18159]] + * The following default gems are updated. * RubyGems 3.4.0.dev * bigdecimal 3.1.2 @@ -281,3 +286,4 @@ The following deprecated APIs are removed. [Bug #18782]: https://bugs.ruby-lang.org/issues/18782 [Feature #18788]: https://bugs.ruby-lang.org/issues/18788 [Feature #18809]: https://bugs.ruby-lang.org/issues/18809 +[Feature #18159]: https://bugs.ruby-lang.org/issues/18159 From 954f709f4e2c21c5a01eb181fe878cf874a5ca37 Mon Sep 17 00:00:00 2001 From: Kazuhiro NISHIYAMA Date: Mon, 22 Aug 2022 12:18:58 +0900 Subject: [PATCH 198/546] NEWS.md: sort [ci skip] --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 8536fc5c8aa611..cc7c9d2a933712 100644 --- a/NEWS.md +++ b/NEWS.md @@ -275,6 +275,7 @@ The following deprecated APIs are removed. [Bug #17545]: https://bugs.ruby-lang.org/issues/17545 [Feature #17881]: https://bugs.ruby-lang.org/issues/17881 [Feature #18037]: https://bugs.ruby-lang.org/issues/18037 +[Feature #18159]: https://bugs.ruby-lang.org/issues/18159 [Feature #18351]: https://bugs.ruby-lang.org/issues/18351 [Bug #18487]: https://bugs.ruby-lang.org/issues/18487 [Feature #18571]: https://bugs.ruby-lang.org/issues/18571 @@ -286,4 +287,3 @@ The following deprecated APIs are removed. [Bug #18782]: https://bugs.ruby-lang.org/issues/18782 [Feature #18788]: https://bugs.ruby-lang.org/issues/18788 [Feature #18809]: https://bugs.ruby-lang.org/issues/18809 -[Feature #18159]: https://bugs.ruby-lang.org/issues/18159 From 6afb4f0a28cbb798b709a8215fe0ac99221333f2 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 22 Aug 2022 12:05:46 +0900 Subject: [PATCH 199/546] Added help entry for test-bundler-parallel --- common.mk | 61 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/common.mk b/common.mk index c8434b9b7b699c..1f33a9a3acc035 100644 --- a/common.mk +++ b/common.mk @@ -1666,36 +1666,37 @@ help: PHONY " Makefile of Ruby" \ "" \ "targets:" \ - " all (default): builds all of below" \ - " miniruby: builds only miniruby" \ - " encs: builds encodings" \ - " exts: builds extensions" \ - " main: builds encodings, extensions and ruby" \ - " docs: builds documents" \ - " install-capi: builds C API documents" \ - " run: runs test.rb by miniruby" \ - " runruby: runs test.rb by ruby you just built" \ - " gdb: runs test.rb by miniruby under gdb" \ - " gdb-ruby: runs test.rb by ruby under gdb" \ - " check: equals make test test-tool test-all test-spec" \ - " test: ruby core tests [BTESTS=]" \ - " test-all: all ruby tests [TESTOPTS=-j4 TESTS=]" \ - " test-spec: run the Ruby spec suite [SPECOPTS=]" \ - " test-bundler: run the Bundler spec" \ - " test-bundled-gems: run the test suite of bundled gems" \ - " test-tool: tests under the tool/test" \ - " update-gems: download files of the bundled gems" \ - " update-bundled_gems: update the latest version of bundled gems" \ - " sync-default-gems: sync default gems from upstream [GEM=]" \ - " up: update local copy and autogenerated files" \ - " benchmark: benchmark this ruby and COMPARE_RUBY." \ - " gcbench: gc benchmark [GCBENCH_ITEM=]" \ - " install: install all ruby distributions" \ - " install-nodoc: install without rdoc" \ - " install-cross: install cross compiling stuff" \ - " clean: clean for tarball" \ - " distclean: clean for repository" \ - " golf: build goruby for golfers" \ + " all (default): builds all of below" \ + " miniruby: builds only miniruby" \ + " encs: builds encodings" \ + " exts: builds extensions" \ + " main: builds encodings, extensions and ruby" \ + " docs: builds documents" \ + " install-capi: builds C API documents" \ + " run: runs test.rb by miniruby" \ + " runruby: runs test.rb by ruby you just built" \ + " gdb: runs test.rb by miniruby under gdb" \ + " gdb-ruby: runs test.rb by ruby under gdb" \ + " check: equals make test test-tool test-all test-spec" \ + " test: ruby core tests [BTESTS=]" \ + " test-all: all ruby tests [TESTOPTS=-j4 TESTS=]" \ + " test-spec: run the Ruby spec suite [SPECOPTS=]" \ + " test-bundler: run the Bundler spec" \ + " test-bundler-parallel: run the Bundler spec with parallel" \ + " test-bundled-gems: run the test suite of bundled gems" \ + " test-tool: tests under the tool/test" \ + " update-gems: download files of the bundled gems" \ + " update-bundled_gems: update the latest version of bundled gems" \ + " sync-default-gems: sync default gems from upstream [GEM=]" \ + " up: update local copy and autogenerated files" \ + " benchmark: benchmark this ruby and COMPARE_RUBY." \ + " gcbench: gc benchmark [GCBENCH_ITEM=]" \ + " install: install all ruby distributions" \ + " install-nodoc: install without rdoc" \ + " install-cross: install cross compiling stuff" \ + " clean: clean for tarball" \ + " distclean: clean for repository" \ + " golf: build goruby for golfers" \ $(HELP_EXTRA_TASKS) \ "see DeveloperHowto for more detail: " \ " https://bugs.ruby-lang.org/projects/ruby/wiki/DeveloperHowto" \ From d0b17a4d203b0838e2290bcc52a647e045596b59 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 22 Aug 2022 12:06:23 +0900 Subject: [PATCH 200/546] Adjust space-width for help message --- defs/gmake.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/defs/gmake.mk b/defs/gmake.mk index 93b73736681453..af4d27a5a2c42e 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -264,9 +264,9 @@ pr-% pull-github-%: fetch-github-% $(call pull-github,$*) HELP_EXTRA_TASKS = \ - " checkout-github: checkout GitHub Pull Request [PR=1234]" \ - " pull-github: rebase GitHub Pull Request to new worktree [PR=1234]" \ - " update-github: merge master branch and push it to Pull Request [PR=1234]" \ + " checkout-github: checkout GitHub Pull Request [PR=1234]" \ + " pull-github: rebase GitHub Pull Request to new worktree [PR=1234]" \ + " update-github: merge master branch and push it to Pull Request [PR=1234]" \ "" extract-gems: $(HAVE_BASERUBY:yes=update-gems) From d5f50463c2b5c5263aa45c58f3f4ec73de8868d5 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 22 Aug 2022 12:21:47 +0900 Subject: [PATCH 201/546] [Bug #18937] Coerce non-Numeric into Complex at comparisons --- complex.c | 5 ++++- test/ruby/test_complex.rb | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/complex.c b/complex.c index d625ced7fa865b..ae40fa7355b5bc 100644 --- a/complex.c +++ b/complex.c @@ -1121,7 +1121,10 @@ nucomp_real_p(VALUE self) static VALUE nucomp_cmp(VALUE self, VALUE other) { - if (nucomp_real_p(self) && k_numeric_p(other)) { + if (!k_numeric_p(other)) { + return rb_num_coerce_cmp(self, other, idCmp); + } + if (nucomp_real_p(self)) { if (RB_TYPE_P(other, T_COMPLEX) && nucomp_real_p(other)) { get_dat2(self, other); return rb_funcall(adat->real, idCmp, 1, bdat->real); diff --git a/test/ruby/test_complex.rb b/test/ruby/test_complex.rb index a3a75465755240..f85bf101e0f06b 100644 --- a/test/ruby/test_complex.rb +++ b/test/ruby/test_complex.rb @@ -579,7 +579,7 @@ def coerce(x) [x, Complex(1)] end def test_coerce2 x = ObjectX.new - %w(+ - * / quo **).each do |op| + %w(+ - * / quo ** <=>).each do |op| assert_kind_of(Numeric, Complex(1).__send__(op, x)) end end From f0a7694f6473685f5c4e6ba1635aa91636b5a467 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 22 Aug 2022 16:59:00 +0900 Subject: [PATCH 202/546] Show the exact version of cl.exe [ci skip] [Misc #18362] is fixed at cl.exe 19.33.31629. --- .github/workflows/windows.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 2c5b823d202758..1cefcaef06aa07 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -105,6 +105,8 @@ jobs: set | C:\msys64\usr\bin\sort > new.env C:\msys64\usr\bin\comm -13 old.env new.env >> %GITHUB_ENV% del *.env + - name: compiler version + run: cl - name: link libraries run: | for %%I in (C:\vcpkg\installed\x64-windows\bin\*.dll) do ( From d91865f33f7bfad7a6d1434aa48674acbe51e7f7 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 22 Aug 2022 18:08:05 +0900 Subject: [PATCH 203/546] Ignore `.document` only commits [ci skip] --- .appveyor.yml | 1 + .cirrus.yml | 2 +- .github/workflows/check_dependencies.yml | 2 ++ .github/workflows/codeql-analysis.yml | 2 ++ .github/workflows/compilers.yml | 2 ++ .github/workflows/macos.yml | 2 ++ .github/workflows/mingw.yml | 2 ++ .github/workflows/mjit.yml | 2 ++ .github/workflows/spec_guards.yml | 2 ++ .github/workflows/ubuntu.yml | 2 ++ .github/workflows/wasm.yml | 2 ++ .github/workflows/windows.yml | 2 ++ .github/workflows/yjit-ubuntu.yml | 2 ++ 13 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index ea9b81aa47c6f3..1ea5e592104a31 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -16,6 +16,7 @@ skip_commits: - doc/* - '**/*.md' - '**/*.rdoc' + - '**/.document' environment: ruby_version: "24-%Platform%" zlib_version: "1.2.12" diff --git a/.cirrus.yml b/.cirrus.yml index 0cab0023c227b0..ec8036297c64b0 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -10,7 +10,7 @@ env: task: name: Arm64 Graviton2 / $CC - skip: "changesIncludeOnly('doc/**', '**.{md,rdoc}')" + skip: "changesIncludeOnly('doc/**', '**.{md,rdoc}', '.document')" arm_container: # We use the arm64 images at https://github.com/ruby/ruby-ci-image/pkgs/container/ruby-ci-image . image: ghcr.io/ruby/ruby-ci-image:$CC diff --git a/.github/workflows/check_dependencies.yml b/.github/workflows/check_dependencies.yml index 6834d2c9c89816..fab198933518ad 100644 --- a/.github/workflows/check_dependencies.yml +++ b/.github/workflows/check_dependencies.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 299c6b220aa6cf..f9fa0a74490162 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -6,11 +6,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' schedule: - cron: '0 12 * * 4' diff --git a/.github/workflows/compilers.yml b/.github/workflows/compilers.yml index 8ef04f1ef233d2..1ccc5dbbc822a8 100644 --- a/.github/workflows/compilers.yml +++ b/.github/workflows/compilers.yml @@ -6,11 +6,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 2f2c876b15384f..72f28a7b615baf 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/mingw.yml b/.github/workflows/mingw.yml index 80b7a92f15223c..6f93aa5392e3c2 100644 --- a/.github/workflows/mingw.yml +++ b/.github/workflows/mingw.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/mjit.yml b/.github/workflows/mjit.yml index c2479f9467c029..b5065288c70c88 100644 --- a/.github/workflows/mjit.yml +++ b/.github/workflows/mjit.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/spec_guards.yml b/.github/workflows/spec_guards.yml index 480731ad930616..d09f1a24d3b1de 100644 --- a/.github/workflows/spec_guards.yml +++ b/.github/workflows/spec_guards.yml @@ -6,11 +6,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 5662ca71adde48..4e4d1a02e1a689 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml index 83688fbacae371..713444b7410882 100644 --- a/.github/workflows/wasm.yml +++ b/.github/workflows/wasm.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 1cefcaef06aa07..4418c78d813cf2 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/yjit-ubuntu.yml b/.github/workflows/yjit-ubuntu.yml index 105489aab46236..bf90b80efb1ee0 100644 --- a/.github/workflows/yjit-ubuntu.yml +++ b/.github/workflows/yjit-ubuntu.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} From 13d31331c87f6fc0a8d43135be67090cd4c810c3 Mon Sep 17 00:00:00 2001 From: "S.H" Date: Mon, 22 Aug 2022 18:52:36 +0900 Subject: [PATCH 204/546] Reuse `nonlocal_var` patterns --- parse.y | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/parse.y b/parse.y index 426e4df772c74c..405a83a8cf01bc 100644 --- a/parse.y +++ b/parse.y @@ -5035,9 +5035,7 @@ ssym : tSYMBEG sym ; sym : fname - | tIVAR - | tGVAR - | tCVAR + | nonlocal_var ; dsym : tSYMBEG string_contents tSTRING_END @@ -5073,10 +5071,8 @@ nonlocal_var : tIVAR ; user_variable : tIDENTIFIER - | tIVAR - | tGVAR | tCONSTANT - | tCVAR + | nonlocal_var ; keyword_variable: keyword_nil {$$ = KWD2EID(nil, $1);} From f99904341e0c376450113f0c37cb1b65d5b62bf3 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 22 Aug 2022 23:14:29 +0900 Subject: [PATCH 205/546] [DOC] Remove extra page-dir prefix --- doc/time/in.rdoc | 2 +- doc/time/zone_and_in.rdoc | 2 +- time.c | 2 +- timev.rb | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/time/in.rdoc b/doc/time/in.rdoc index 33178c514af0c1..506bd916280a24 100644 --- a/doc/time/in.rdoc +++ b/doc/time/in.rdoc @@ -1,4 +1,4 @@ - in: zone: a timezone +zone+. For forms of argument +zone+, see -{Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. +{Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. diff --git a/doc/time/zone_and_in.rdoc b/doc/time/zone_and_in.rdoc index 2cf6564c01f2b7..f36940ee13ccad 100644 --- a/doc/time/zone_and_in.rdoc +++ b/doc/time/zone_and_in.rdoc @@ -2,4 +2,4 @@ - in: zone: a timezone +zone+. For forms of +zone+, see -{Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. +{Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. diff --git a/time.c b/time.c index e0cb7537ec12c5..01c402e7f96ef2 100644 --- a/time.c +++ b/time.c @@ -3796,7 +3796,7 @@ time_zonelocal(VALUE time, VALUE off) * t.localtime("-09:00") # => 2000-01-01 11:15:01 -0900 * * For forms of argument +zone+, see - * {Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. + * {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. * */ diff --git a/timev.rb b/timev.rb index 1fd8295d0fb8a6..f4777358753024 100644 --- a/timev.rb +++ b/timev.rb @@ -218,7 +218,7 @@ class Time # Time.now(in: '+04:00') # => 2009-06-24 07:39:54 +0400 # # For forms of argument +zone+, see - # {Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. + # {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. def self.now(in: nil) Primitive.time_s_now(Primitive.arg!(:in)) end From 148d0ccd18a14195294b0a13f0eb79facf357953 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 22 Aug 2022 23:14:58 +0900 Subject: [PATCH 206/546] [DOC] Fix a type [ci skip] --- doc/time/sec_i.rdoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/time/sec_i.rdoc b/doc/time/sec_i.rdoc index 99c8eddc652005..fd5519082cbb09 100644 --- a/doc/time/sec_i.rdoc +++ b/doc/time/sec_i.rdoc @@ -1 +1 @@ -- +isec_i+ is the integer number of seconds in the range 0..60. +- +sec_i+ is the integer number of seconds in the range 0..60. From b8d142e733b5fc8810f53a45a7107414e6ae567c Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Mon, 22 Aug 2022 11:21:36 -0500 Subject: [PATCH 207/546] [DOC] Enhanced RDoc for Time (#6267) Treats: #utc #getlocal #getutc #ctime #to_s #inspect --- doc/timezone_specifiers.rdoc | 1 + time.c | 124 ++++++++++++++++++----------------- 2 files changed, 64 insertions(+), 61 deletions(-) diff --git a/doc/timezone_specifiers.rdoc b/doc/timezone_specifiers.rdoc index f1c23372b18890..a6d57a1b21ea22 100644 --- a/doc/timezone_specifiers.rdoc +++ b/doc/timezone_specifiers.rdoc @@ -5,6 +5,7 @@ Certain methods in class Time accept arguments that specify timezones: - Time.at: keyword argument +in:+. - Time.new: positional argument +zone+ or keyword argument +in:+. - Time.now: keyword argument +in:+. +- Time#getlocal: positional argument +zone+. - Time#localtime: positional argument +zone+. The value given with any of these must be one of the following: diff --git a/time.c b/time.c index 01c402e7f96ef2..dba8f1f620950f 100644 --- a/time.c +++ b/time.c @@ -3814,20 +3814,18 @@ time_localtime_m(int argc, VALUE *argv, VALUE time) /* * call-seq: - * time.gmtime -> time - * time.utc -> time + * utc -> self * - * Converts _time_ to UTC (GMT), modifying the receiver. + * Returns +self+, converted to the UTC timezone: * - * t = Time.now #=> 2007-11-19 08:18:31 -0600 - * t.gmt? #=> false - * t.gmtime #=> 2007-11-19 14:18:31 UTC - * t.gmt? #=> true + * t = Time.new(2000) # => 2000-01-01 00:00:00 -0600 + * t.utc? # => false + * t.utc # => 2000-01-01 06:00:00 UTC + * t.utc? # => true * - * t = Time.now #=> 2007-11-19 08:18:51 -0600 - * t.utc? #=> false - * t.utc #=> 2007-11-19 14:18:51 UTC - * t.utc? #=> true + * Time#gmtime is an alias for Time#utc. + * + * Related: Time#getutc (returns a new converted \Time object). */ static VALUE @@ -3889,31 +3887,19 @@ time_fixoff(VALUE time) /* * call-seq: - * time.getlocal -> new_time - * time.getlocal(utc_offset) -> new_time - * time.getlocal(timezone) -> new_time - * - * Returns a new Time object representing _time_ in - * local time (using the local time zone in effect for this process). + * getlocal(zone = nil) -> new_time * - * If +utc_offset+ is given, it is used instead of the local time. - * +utc_offset+ can be given as a human-readable string (eg. "+09:00") - * or as a number of seconds (eg. 32400). + * Returns a new \Time object representing the value of +self+ + * converted to a given timezone; + * if +zone+ is +nil+, the local timezone is used: * - * t = Time.utc(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 UTC - * t.utc? #=> true - * - * l = t.getlocal #=> 2000-01-01 14:15:01 -0600 - * l.utc? #=> false - * t == l #=> true + * t = Time.utc(2000) # => 2000-01-01 00:00:00 UTC + * t.getlocal # => 1999-12-31 18:00:00 -0600 + * t.getlocal('+12:00') # => 2000-01-01 12:00:00 +1200 * - * j = t.getlocal("+09:00") #=> 2000-01-02 05:15:01 +0900 - * j.utc? #=> false - * t == j #=> true + * For forms of argument +zone+, see + * {Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. * - * k = t.getlocal(9*60*60) #=> 2000-01-02 05:15:01 +0900 - * k.utc? #=> false - * t == k #=> true */ static VALUE @@ -3950,16 +3936,18 @@ time_getlocaltime(int argc, VALUE *argv, VALUE time) /* * call-seq: - * time.getgm -> new_time - * time.getutc -> new_time + * getutc -> new_time + * + * Returns a new \Time object representing the value of +self+ + * converted to the UTC timezone: * - * Returns a new Time object representing _time_ in UTC. + * local = Time.local(2000) # => 2000-01-01 00:00:00 -0600 + * local.utc? # => false + * utc = local.getutc # => 2000-01-01 06:00:00 UTC + * utc.utc? # => true + * utc == local # => true * - * t = Time.local(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 -0600 - * t.gmt? #=> false - * y = t.getgm #=> 2000-01-02 02:15:01 UTC - * y.gmt? #=> true - * t == y #=> true + * Time#getgm is an alias for Time#getutc. */ static VALUE @@ -3981,13 +3969,25 @@ static VALUE strftime_cstr(const char *fmt, size_t len, VALUE time, rb_encoding /* * call-seq: - * time.asctime -> string - * time.ctime -> string + * ctime -> string + * + * Returns a string representation of +self+, + * formatted by strftime('%a %b %e %T %Y') + * or its shorthand version strftime('%c'); + * see {Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html]: * - * Returns a canonical string representation of _time_. + * t = Time.new(2000, 12, 31, 23, 59, 59, 0.5) + * t.ctime # => "Sun Dec 31 23:59:59 2000" + * t.strftime('%a %b %e %T %Y') # => "Sun Dec 31 23:59:59 2000" + * t.strftime('%c') # => "Sun Dec 31 23:59:59 2000" + * + * Time#asctime is an alias for Time#ctime. + * + * Related: Time#to_s, Time#inspect: + * + * t.inspect # => "2000-12-31 23:59:59.5 +000001" + * t.to_s # => "2000-12-31 23:59:59 +0000" * - * Time.now.asctime #=> "Wed Apr 9 08:56:03 2003" - * Time.now.ctime #=> "Wed Apr 9 08:56:03 2003" */ static VALUE @@ -3998,17 +3998,18 @@ time_asctime(VALUE time) /* * call-seq: - * time.to_s -> string + * to_s -> string + * + * Returns a string representation of +self+, without subseconds: * - * Returns a string representing _time_. Equivalent to calling - * #strftime with the appropriate format string. + * t = Time.new(2000, 12, 31, 23, 59, 59, 0.5) + * t.to_s # => "2000-12-31 23:59:59 +0000" * - * t = Time.now - * t.to_s #=> "2012-11-10 18:16:12 +0100" - * t.strftime "%Y-%m-%d %H:%M:%S %z" #=> "2012-11-10 18:16:12 +0100" + * Related: Time#ctime, Time#inspect: + * + * t.ctime # => "Sun Dec 31 23:59:59 2000" + * t.inspect # => "2000-12-31 23:59:59.5 +000001" * - * t.utc.to_s #=> "2012-11-10 17:16:12 UTC" - * t.strftime "%Y-%m-%d %H:%M:%S UTC" #=> "2012-11-10 17:16:12 UTC" */ static VALUE @@ -4025,17 +4026,18 @@ time_to_s(VALUE time) /* * call-seq: - * time.inspect -> string + * inspect -> string + * + * Returns a string representation of +self+ with subseconds: + * + * t = Time.new(2000, 12, 31, 23, 59, 59, 0.5) + * t.inspect # => "2000-12-31 23:59:59.5 +000001" * - * Returns a detailed string representing _time_. Unlike to_s, - * preserves subsecond in the representation for easier debugging. + * Related: Time#ctime, Time#to_s: * - * t = Time.now - * t.inspect #=> "2012-11-10 18:16:12.261257655 +0100" - * t.strftime "%Y-%m-%d %H:%M:%S.%N %z" #=> "2012-11-10 18:16:12.261257655 +0100" + * t.ctime # => "Sun Dec 31 23:59:59 2000" + * t.to_s # => "2000-12-31 23:59:59 +0000" * - * t.utc.inspect #=> "2012-11-10 17:16:12.261257655 UTC" - * t.strftime "%Y-%m-%d %H:%M:%S.%N UTC" #=> "2012-11-10 17:16:12.261257655 UTC" */ static VALUE From c96ffec984f49d58e6cf12be5f84a2790835dd55 Mon Sep 17 00:00:00 2001 From: git Date: Tue, 23 Aug 2022 01:22:01 +0900 Subject: [PATCH 208/546] * 2022-08-23 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index f8802b235cd6df..8155bb9d100575 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 22 +#define RUBY_RELEASE_DAY 23 #include "ruby/version.h" #include "ruby/internal/abi.h" From a9ee13365adb070af20a1298fed856c595d210c3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Aug 2022 15:25:31 +0000 Subject: [PATCH 209/546] [rubygems/rubygems] Bump rb-sys in /test/rubygems/test_gem_ext_cargo_builder/custom_name Bumps [rb-sys](https://github.com/oxidize-rb/rb-sys) from 0.9.29 to 0.9.30. - [Release notes](https://github.com/oxidize-rb/rb-sys/releases) - [Commits](https://github.com/oxidize-rb/rb-sys/compare/v0.9.29...v0.9.30) --- updated-dependencies: - dependency-name: rb-sys dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] https://github.com/rubygems/rubygems/commit/d54c936c4c --- .../test_gem_ext_cargo_builder/custom_name/Cargo.lock | 8 ++++---- .../test_gem_ext_cargo_builder/custom_name/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock index da09e717f06391..0f1fa7c430e5a8 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock @@ -160,9 +160,9 @@ dependencies = [ [[package]] name = "rb-sys" -version = "0.9.29" +version = "0.9.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0317cb843cdeef14c5622917c55c0a170cee31348eb600c4a1683fb8c9e87e7a" +checksum = "24b22a374fc2e92eb6f49d7efe4eb7663655c6e9455d9259ed3342cc1599da85" dependencies = [ "bindgen", "linkify", @@ -171,9 +171,9 @@ dependencies = [ [[package]] name = "rb-sys-build" -version = "0.9.29" +version = "0.9.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4b8274327aecb7edcff86e290d9cbe7b572b7889c1cfc7476358f4831f78ce5" +checksum = "3cd23b6dd929b7d50ccb35a6d3aa77dec364328ab9cb304dd32c629332491671" dependencies = [ "regex", "shell-words", diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml index 2a215a55dd6b35..c9ba5c27bd696e 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -rb-sys = { version = "0.9.29", features = ["gem"] } +rb-sys = { version = "0.9.30", features = ["gem"] } From aeac8b6c3a48279ffe138f3be6ae62208f43f5b3 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Mon, 22 Aug 2022 13:17:05 -0500 Subject: [PATCH 210/546] [DOC] Addition to section 'Related Methods' (#6271) Addition to section 'Related Methods': suggests adding differentiators to the names of related methods. --- doc/contributing/documentation_guide.md | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/doc/contributing/documentation_guide.md b/doc/contributing/documentation_guide.md index f01184180903ce..df677477103ece 100644 --- a/doc/contributing/documentation_guide.md +++ b/doc/contributing/documentation_guide.md @@ -376,12 +376,22 @@ Mention aliases in the form In some cases, it is useful to document which methods are related to the current method. For example, documentation for `Hash#[]` might mention `Hash#fetch` as a related method, and `Hash#merge` might mention -`Hash#merge!` as a related method. Consider which methods may be related -to the current method, and if you think the reader would benefit it, -at the end of the method documentation, add a line starting with -"Related: " (e.g. "Related: #fetch"). Don't list more than three -related methods. If you think more than three methods are related, -pick the three you think are most important and list those three. +`Hash#merge!` as a related method. + +- Consider which methods may be related + to the current method, and if you think the reader would benefit it, + at the end of the method documentation, add a line starting with + "Related: " (e.g. "Related: #fetch."). +- Don't list more than three related methods. + If you think more than three methods are related, + list the three you think are most important. +- Consider adding: + + - A phrase suggesting how the related method is similar to, + or different from,the current method. + See an example at Time#getutc. + - Example code that illustrates the similarities and differences. + See examples at Time#ctime, Time#inspect, Time#to_s. ### Methods Accepting Multiple Argument Types From 085790bdc0936aec793f6798f9b78c10916c8292 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Aug 2022 17:27:14 +0000 Subject: [PATCH 211/546] [rubygems/rubygems] Bump rb-sys Bumps [rb-sys](https://github.com/oxidize-rb/rb-sys) from 0.9.29 to 0.9.30. - [Release notes](https://github.com/oxidize-rb/rb-sys/releases) - [Commits](https://github.com/oxidize-rb/rb-sys/compare/v0.9.29...v0.9.30) --- updated-dependencies: - dependency-name: rb-sys dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] https://github.com/rubygems/rubygems/commit/912da7828e --- .../rust_ruby_example/Cargo.lock | 8 ++++---- .../rust_ruby_example/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock index e3518198483f6e..5e602fcf92db1f 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock @@ -153,9 +153,9 @@ dependencies = [ [[package]] name = "rb-sys" -version = "0.9.29" +version = "0.9.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0317cb843cdeef14c5622917c55c0a170cee31348eb600c4a1683fb8c9e87e7a" +checksum = "24b22a374fc2e92eb6f49d7efe4eb7663655c6e9455d9259ed3342cc1599da85" dependencies = [ "bindgen", "linkify", @@ -164,9 +164,9 @@ dependencies = [ [[package]] name = "rb-sys-build" -version = "0.9.29" +version = "0.9.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4b8274327aecb7edcff86e290d9cbe7b572b7889c1cfc7476358f4831f78ce5" +checksum = "3cd23b6dd929b7d50ccb35a6d3aa77dec364328ab9cb304dd32c629332491671" dependencies = [ "regex", "shell-words", diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml index 1867db8e66e16c..8e3f623728b3ce 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -rb-sys = { version = "0.9.29", features = ["gem"] } +rb-sys = { version = "0.9.30", features = ["gem"] } From 615f79be3cef210c26b967f06979ed464bf9e178 Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Tue, 24 Nov 2020 13:52:13 +0100 Subject: [PATCH 212/546] [rubygems/rubygems] Test platform's version-ness consistently The symmetry with the "for command line" case is made more apparent. https://github.com/rubygems/rubygems/commit/ab85d3558f --- lib/rubygems/platform.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rubygems/platform.rb b/lib/rubygems/platform.rb index ed3571dbff9316..0c0520e1db4014 100644 --- a/lib/rubygems/platform.rb +++ b/lib/rubygems/platform.rb @@ -70,7 +70,7 @@ def initialize(arch) when String then arch = arch.split "-" - if arch.length > 2 && arch.last !~ (/\d/) # reassemble x86-linux-gnu + if arch.length > 2 && arch.last !~ /\d+(\.\d+)?$/ # reassemble x86-linux-{libc} extra = arch.pop arch.last << "-#{extra}" end From f254b673f89c12c42538d84b431116cf5ed0cde7 Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Tue, 24 Nov 2020 13:55:10 +0100 Subject: [PATCH 213/546] [rubygems/rubygems] Test platform parsing stability On past versions there were observed cases of inconsistencies when some platforms were re-parsed. Ensure that a platform's string representation parses again in a platform object equal to the original. https://github.com/rubygems/rubygems/commit/6da35ee93c --- test/rubygems/test_gem_platform.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/rubygems/test_gem_platform.rb b/test/rubygems/test_gem_platform.rb index 0fb5bf59a521f8..831079acb2d48b 100644 --- a/test/rubygems/test_gem_platform.rb +++ b/test/rubygems/test_gem_platform.rb @@ -144,6 +144,7 @@ def test_initialize test_cases.each do |arch, expected| platform = Gem::Platform.new arch assert_equal expected, platform.to_a, arch.inspect + assert_equal expected, Gem::Platform.new(platform.to_s).to_a, arch.inspect end end From 9819283044b6955f4e3b464e6a99196e84ed107a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Thu, 4 Aug 2022 13:02:18 +0200 Subject: [PATCH 214/546] [rubygems/rubygems] Handle non-gnu libc on linux platforms in RubyGems Attempting to install a gem published as both *-linux and *-linux-musl results in the incorrect gem being picked up, causing build failures due to binary incompatibility. This is caused by the `nil` wildcard swallowing the libc information upon version comparison. Handle the linux case by performing only non-wildcard equality on the version and asserting 'gnu' and nil equivalence, while preserving the current behaviour for other OSes. https://github.com/rubygems/rubygems/commit/9eead86abc Co-authored-by: Loic Nageleisen --- lib/rubygems/platform.rb | 15 +++++++++++++-- test/rubygems/test_gem_platform.rb | 30 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/lib/rubygems/platform.rb b/lib/rubygems/platform.rb index 0c0520e1db4014..1f699c23e00893 100644 --- a/lib/rubygems/platform.rb +++ b/lib/rubygems/platform.rb @@ -151,10 +151,17 @@ def hash # :nodoc: ## # Does +other+ match this platform? Two platforms match if they have the # same CPU, or either has a CPU of 'universal', they have the same OS, and - # they have the same version, or either has no version. + # they have the same version, or either one has no version # # Additionally, the platform will match if the local CPU is 'arm' and the # other CPU starts with "arm" (for generic ARM family support). + # + # Of note, this method is not commutative. Indeed the OS 'linux' has a + # special case: the version is the libc name, yet while "no version" stands + # as a wildcard for a binary gem platform (as for other OSes), for the + # runtime platform "no version" stands for 'gnu'. To be able to disinguish + # these, the method receiver is the gem platform, while the argument is + # the runtime platform. def ===(other) return nil unless Gem::Platform === other @@ -171,7 +178,11 @@ def ===(other) @os == other.os && # version - (@version.nil? || other.version.nil? || @version == other.version) + ( + (@os != "linux" && (@version.nil? || other.version.nil?)) || + (@os == "linux" && (@version.nil? && !other.version.nil?)) || + @version == other.version + ) end ## diff --git a/test/rubygems/test_gem_platform.rb b/test/rubygems/test_gem_platform.rb index 831079acb2d48b..197f19e53cd13a 100644 --- a/test/rubygems/test_gem_platform.rb +++ b/test/rubygems/test_gem_platform.rb @@ -135,7 +135,9 @@ def test_initialize "i386-solaris2.8" => ["x86", "solaris", "2.8"], "mswin32" => ["x86", "mswin32", nil], "x86_64-linux" => ["x86_64", "linux", nil], + "x86_64-linux-gnu" => ["x86_64", "linux", nil], "x86_64-linux-musl" => ["x86_64", "linux", "musl"], + "x86_64-linux-uclibc" => ["x86_64", "linux", "uclibc"], "x86_64-openbsd3.9" => ["x86_64", "openbsd", "3.9"], "x86_64-openbsd4.0" => ["x86_64", "openbsd", "4.0"], "x86_64-openbsd" => ["x86_64", "openbsd", nil], @@ -263,6 +265,34 @@ def test_nil_cpu_arch_is_treated_as_universal assert((with_x86_arch === with_nil_arch), "x86 =~ nil") end + def test_nil_version_is_treated_as_any_version + x86_darwin_8 = Gem::Platform.new "i686-darwin8.0" + x86_darwin_nil = Gem::Platform.new "i686-darwin" + + assert((x86_darwin_8 === x86_darwin_nil), "8.0 =~ nil") + assert((x86_darwin_nil === x86_darwin_8), "nil =~ 8.0") + end + + def test_nil_version_is_stricter_for_linux_os + x86_linux = Gem::Platform.new "i686-linux" + x86_linux_gnu = Gem::Platform.new "i686-linux-gnu" + x86_linux_musl = Gem::Platform.new "i686-linux-musl" + x86_linux_uclibc = Gem::Platform.new "i686-linux-uclibc" + + # a naked linux runtime is implicit gnu, as it represents the common glibc-linked runtime + assert(x86_linux === x86_linux_gnu, "linux =~ linux-gnu") + assert(x86_linux_gnu === x86_linux, "linux-gnu =~ linux") + + # explicit libc differ + refute(x86_linux_uclibc === x86_linux_musl, "linux-uclibc =~ linux-musl") + refute(x86_linux_musl === x86_linux_uclibc, "linux-musl =~ linux-uclibc") + + # musl host runtime accepts libc-generic or statically linked gems... + assert(x86_linux === x86_linux_musl, "linux =~ linux-musl") + # ...but implicit gnu runtime generally does not accept musl-specific gems + refute(x86_linux_musl === x86_linux, "linux-musl =~ linux") + end + def test_equals3_cpu_arm arm = Gem::Platform.new "arm-linux" armv5 = Gem::Platform.new "armv5-linux" From 06ae78f8fb7156e0f0c732fb797343b0c6921a5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Fri, 5 Aug 2022 13:24:14 +0200 Subject: [PATCH 215/546] [rubygems/rubygems] Remove unnecessary variables https://github.com/rubygems/rubygems/commit/b5e0c683d9 --- test/rubygems/test_gem_resolver.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/rubygems/test_gem_resolver.rb b/test/rubygems/test_gem_resolver.rb index 22712447292fc7..065143ef331eaa 100644 --- a/test/rubygems/test_gem_resolver.rb +++ b/test/rubygems/test_gem_resolver.rb @@ -322,16 +322,15 @@ def test_picks_highest_version def test_picks_best_platform is = Gem::Resolver::IndexSpecification unknown = Gem::Platform.new "unknown" - a2_p1 = a3_p2 = nil spec_fetcher do |fetcher| fetcher.spec "a", 2 - a2_p1 = fetcher.spec "a", 2 do |s| + fetcher.spec "a", 2 do |s| s.platform = Gem::Platform.local end - a3_p2 = fetcher.spec "a", 3 do |s| + fetcher.spec "a", 3 do |s| s.platform = unknown end end From 492e70c7b4303ffea8f8c07797e1696b90ce1d01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Fri, 5 Aug 2022 13:24:24 +0200 Subject: [PATCH 216/546] [rubygems/rubygems] Fix `gem install` still choosing musl incorrectly https://github.com/rubygems/rubygems/commit/1b9f7f50a5 --- lib/rubygems/platform.rb | 2 +- test/rubygems/test_gem_resolver.rb | 35 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/lib/rubygems/platform.rb b/lib/rubygems/platform.rb index 1f699c23e00893..78fff703a263f0 100644 --- a/lib/rubygems/platform.rb +++ b/lib/rubygems/platform.rb @@ -25,7 +25,7 @@ def self.match_platforms?(platform, platforms) platforms.any? do |local_platform| platform.nil? || local_platform == platform || - (local_platform != Gem::Platform::RUBY && local_platform =~ platform) + (local_platform != Gem::Platform::RUBY && platform =~ local_platform) end end private_class_method :match_platforms? diff --git a/test/rubygems/test_gem_resolver.rb b/test/rubygems/test_gem_resolver.rb index 065143ef331eaa..c816d5484ba7ba 100644 --- a/test/rubygems/test_gem_resolver.rb +++ b/test/rubygems/test_gem_resolver.rb @@ -356,6 +356,41 @@ def test_picks_best_platform assert_resolves_to [a2_p1.spec], res end + def test_does_not_pick_musl_variants_on_non_musl_linux + util_set_arch "aarch64-linux" do + is = Gem::Resolver::IndexSpecification + + linux_musl = Gem::Platform.new("aarch64-linux-musl") + + spec_fetcher do |fetcher| + fetcher.spec "libv8-node", "15.14.0.1" do |s| + s.platform = Gem::Platform.local + end + + fetcher.spec "libv8-node", "15.14.0.1" do |s| + s.platform = linux_musl + end + end + + v15 = v("15.14.0.1") + source = Gem::Source.new @gem_repo + + s = set + + v15_linux = is.new s, "libv8-node", v15, source, Gem::Platform.local.to_s + v15_linux_musl = is.new s, "libv8-node", v15, source, linux_musl.to_s + + s.add v15_linux + s.add v15_linux_musl + + ad = make_dep "libv8-node", "= 15.14.0.1" + + res = Gem::Resolver.new([ad], s) + + assert_resolves_to [v15_linux.spec], res + end + end + def test_only_returns_spec_once a1 = util_spec "a", "1", "c" => "= 1" b1 = util_spec "b", "1", "c" => "= 1" From 59f27445eaf1f28b4c9769bbfd13e3a4bffa17da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Fri, 5 Aug 2022 16:16:34 +0200 Subject: [PATCH 217/546] [rubygems/rubygems] Implement extra rules for libc versioning https://github.com/rubygems/rubygems/commit/7e976d790a --- lib/rubygems/platform.rb | 4 ++-- test/rubygems/test_gem_platform.rb | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/rubygems/platform.rb b/lib/rubygems/platform.rb index 78fff703a263f0..8c5e7993caf2d2 100644 --- a/lib/rubygems/platform.rb +++ b/lib/rubygems/platform.rb @@ -102,7 +102,7 @@ def initialize(arch) when /^dalvik(\d+)?$/ then [ "dalvik", $1 ] when /^dotnet$/ then [ "dotnet", nil ] when /^dotnet([\d.]*)/ then [ "dotnet", $1 ] - when /linux-?((?!gnu)\w+)?/ then [ "linux", $1 ] + when /linux-?(\w+)?/ then [ "linux", $1 ] when /mingw32/ then [ "mingw32", nil ] when /mingw-?(\w+)?/ then [ "mingw", $1 ] when /(mswin\d+)(\_(\d+))?/ then @@ -180,7 +180,7 @@ def ===(other) # version ( (@os != "linux" && (@version.nil? || other.version.nil?)) || - (@os == "linux" && (@version.nil? && !other.version.nil?)) || + (@os == "linux" && ((@version.nil? && ["gnu", "musl"].include?(other.version)) || (@version == "gnu" && other.version.nil?))) || @version == other.version ) end diff --git a/test/rubygems/test_gem_platform.rb b/test/rubygems/test_gem_platform.rb index 197f19e53cd13a..e0e635e533adf9 100644 --- a/test/rubygems/test_gem_platform.rb +++ b/test/rubygems/test_gem_platform.rb @@ -119,8 +119,8 @@ def test_initialize "i586-linux" => ["x86", "linux", nil], "i486-linux" => ["x86", "linux", nil], "i386-linux" => ["x86", "linux", nil], - "i586-linux-gnu" => ["x86", "linux", nil], - "i386-linux-gnu" => ["x86", "linux", nil], + "i586-linux-gnu" => ["x86", "linux", "gnu"], + "i386-linux-gnu" => ["x86", "linux", "gnu"], "i386-mingw32" => ["x86", "mingw32", nil], "x64-mingw-ucrt" => ["x64", "mingw", "ucrt"], "i386-mswin32" => ["x86", "mswin32", nil], @@ -135,7 +135,7 @@ def test_initialize "i386-solaris2.8" => ["x86", "solaris", "2.8"], "mswin32" => ["x86", "mswin32", nil], "x86_64-linux" => ["x86_64", "linux", nil], - "x86_64-linux-gnu" => ["x86_64", "linux", nil], + "x86_64-linux-gnu" => ["x86_64", "linux", "gnu"], "x86_64-linux-musl" => ["x86_64", "linux", "musl"], "x86_64-linux-uclibc" => ["x86_64", "linux", "uclibc"], "x86_64-openbsd3.9" => ["x86_64", "openbsd", "3.9"], @@ -283,6 +283,10 @@ def test_nil_version_is_stricter_for_linux_os assert(x86_linux === x86_linux_gnu, "linux =~ linux-gnu") assert(x86_linux_gnu === x86_linux, "linux-gnu =~ linux") + # musl and explicit gnu should differ + refute(x86_linux_gnu === x86_linux_musl, "linux-gnu =~ linux-musl") + refute(x86_linux_musl === x86_linux_gnu, "linux-musl =~ linux-gnu") + # explicit libc differ refute(x86_linux_uclibc === x86_linux_musl, "linux-uclibc =~ linux-musl") refute(x86_linux_musl === x86_linux_uclibc, "linux-musl =~ linux-uclibc") @@ -291,6 +295,10 @@ def test_nil_version_is_stricter_for_linux_os assert(x86_linux === x86_linux_musl, "linux =~ linux-musl") # ...but implicit gnu runtime generally does not accept musl-specific gems refute(x86_linux_musl === x86_linux, "linux-musl =~ linux") + + # other libc are not glibc compatible + refute(x86_linux === x86_linux_uclibc, "linux =~ linux-uclibc") + refute(x86_linux_uclibc === x86_linux, "linux-uclibc =~ linux") end def test_equals3_cpu_arm From 6465a5331a2e5a0361e566b9c55f5314271a4df9 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 23 Aug 2022 09:04:57 +0900 Subject: [PATCH 218/546] [DOC] Remove extra page-dir prefix --- time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/time.c b/time.c index dba8f1f620950f..2b1523915279e1 100644 --- a/time.c +++ b/time.c @@ -3898,7 +3898,7 @@ time_fixoff(VALUE time) * t.getlocal('+12:00') # => 2000-01-01 12:00:00 +1200 * * For forms of argument +zone+, see - * {Timezone Specifiers}[rdoc-ref:doc/timezone_specifiers.rdoc]. + * {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. * */ From b30fc03e924235207edb009be2108226e4c8efc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Mon, 8 Aug 2022 21:39:49 +0200 Subject: [PATCH 219/546] [rubygems/rubygems] Centralize loading `Bundler::MatchPlatform` mixin It's explicitly loaded when monkeypatching RubyGems, which we do very early. So neither autoloading it, nor explicitly loading it anywhere else is necessary. https://github.com/rubygems/rubygems/commit/fbc7a57161 --- lib/bundler.rb | 1 - lib/bundler/lazy_specification.rb | 2 -- 2 files changed, 3 deletions(-) diff --git a/lib/bundler.rb b/lib/bundler.rb index 7df22ab3a54813..b24d47c6d06e17 100644 --- a/lib/bundler.rb +++ b/lib/bundler.rb @@ -59,7 +59,6 @@ module Bundler autoload :Installer, File.expand_path("bundler/installer", __dir__) autoload :LazySpecification, File.expand_path("bundler/lazy_specification", __dir__) autoload :LockfileParser, File.expand_path("bundler/lockfile_parser", __dir__) - autoload :MatchPlatform, File.expand_path("bundler/match_platform", __dir__) autoload :ProcessLock, File.expand_path("bundler/process_lock", __dir__) autoload :RemoteSpecification, File.expand_path("bundler/remote_specification", __dir__) autoload :Resolver, File.expand_path("bundler/resolver", __dir__) diff --git a/lib/bundler/lazy_specification.rb b/lib/bundler/lazy_specification.rb index 5b40bec5a814a2..78d2c22f81168a 100644 --- a/lib/bundler/lazy_specification.rb +++ b/lib/bundler/lazy_specification.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "match_platform" - module Bundler class LazySpecification include MatchPlatform From 29c443fedc0d3692cb52241318abb0de619ae178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Mon, 8 Aug 2022 22:03:55 +0200 Subject: [PATCH 220/546] [rubygems/rubygems] Remove unnecessary mixin inclusion It's already included by the parent. https://github.com/rubygems/rubygems/commit/3ffe389c44 --- lib/bundler/endpoint_specification.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/bundler/endpoint_specification.rb b/lib/bundler/endpoint_specification.rb index e9aa366b41d99e..368534ba6d46e7 100644 --- a/lib/bundler/endpoint_specification.rb +++ b/lib/bundler/endpoint_specification.rb @@ -3,8 +3,6 @@ module Bundler # used for Creating Specifications from the Gemcutter Endpoint class EndpointSpecification < Gem::Specification - include MatchPlatform - attr_reader :name, :version, :platform, :checksum attr_accessor :source, :remote, :dependencies From a9509068db84d6a86a718b5ae931b8d65fc15272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Mon, 8 Aug 2022 23:48:02 +0200 Subject: [PATCH 221/546] [rubygems/rubygems] Make `compact_index_api_missing` server more strict The compact index should not request any marshaled gemspecs whatsoever. https://github.com/rubygems/rubygems/commit/6dbd44d0c0 --- spec/bundler/support/artifice/compact_index_api_missing.rb | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/spec/bundler/support/artifice/compact_index_api_missing.rb b/spec/bundler/support/artifice/compact_index_api_missing.rb index 6514fde01e4263..2fd8b6d2e982cb 100644 --- a/spec/bundler/support/artifice/compact_index_api_missing.rb +++ b/spec/bundler/support/artifice/compact_index_api_missing.rb @@ -6,12 +6,7 @@ class CompactIndexApiMissing < CompactIndexAPI get "/fetch/actual/gem/:id" do - warn params[:id] - if params[:id] == "rack-1.0.gemspec.rz" - halt 404 - else - File.binread("#{gem_repo2}/quick/Marshal.4.8/#{params[:id]}") - end + halt 404 end end From c21c9a29eead43364e6347c0ce2f468d26391b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Tue, 9 Aug 2022 11:39:03 +0200 Subject: [PATCH 222/546] [rubygems/rubygems] Refactor building metadata dependencies https://github.com/rubygems/rubygems/commit/fa60f1fe43 --- lib/bundler/resolver/spec_group.rb | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/bundler/resolver/spec_group.rb b/lib/bundler/resolver/spec_group.rb index 4de5b91aa6a38d..1c016fc55d40ef 100644 --- a/lib/bundler/resolver/spec_group.rb +++ b/lib/bundler/resolver/spec_group.rb @@ -97,14 +97,17 @@ def __dependencies(platform) def metadata_dependencies(platform) spec = @specs[platform].first return [] if spec.is_a?(LazySpecification) - dependencies = [] - unless spec.required_ruby_version.none? - dependencies << DepProxy.get_proxy(Dependency.new("Ruby\0", spec.required_ruby_version), platform) - end - unless spec.required_rubygems_version.none? - dependencies << DepProxy.get_proxy(Dependency.new("RubyGems\0", spec.required_rubygems_version), platform) - end - dependencies + + [ + metadata_dependency("Ruby", spec.required_ruby_version, platform), + metadata_dependency("RubyGems", spec.required_rubygems_version, platform), + ].compact + end + + def metadata_dependency(name, requirement, platform) + return if requirement.none? + + DepProxy.get_proxy(Dependency.new("#{name}\0", requirement), platform) end end end From 4790d0accdb745f9d8e605fd42eab712e4ebf834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Thu, 18 Aug 2022 23:28:26 +0200 Subject: [PATCH 223/546] [rubygems/rubygems] Fix conservative update downgrading top level gems When `--conservative` is passed, explicit unlocks are set for top level gems via `@unlock[:gems]`, so that only those particular gems are allowed to be updated. When we compute the "base resolve" from the lockfile (the set of gems whose versions should be kept pinned by the resolver), we always exclude gems explicitly unlocked through `@unlock[:gems]` from it. This is done by the `converge_specs` method. However, the `converge_specs` method is also used for figuring out additional lower bound requirements from the lockfile. But in this case, even if gems are explicitly unlock in `@unlock[:gems]`, we still want to add the additional requirement, so that gems are not downgraded by the resolver. So the solution is to move the line filtering out gems in `@unlock[:gems]` from the `converged_specs` method out of that method, so that it only applies for computing the "base resolve", but not the addtional lower bound requirements. https://github.com/rubygems/rubygems/commit/405119bd7b --- lib/bundler/definition.rb | 6 ++- spec/bundler/commands/update_spec.rb | 60 ++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 0ab0451695b36e..7d28375bb51216 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -715,7 +715,9 @@ def converge_dependencies # commonly happen if the Gemfile has changed since the lockfile was last # generated def converge_locked_specs - resolve = converge_specs(@locked_specs) + converged = converge_specs(@locked_specs) + + resolve = SpecSet.new(converged.reject {|s| @unlock[:gems].include?(s.name) }) diff = nil @@ -788,7 +790,7 @@ def converge_specs(specs) end end - SpecSet.new(filter_specs(converged, deps).reject {|s| @unlock[:gems].include?(s.name) }) + filter_specs(converged, deps) end def metadata_dependencies diff --git a/spec/bundler/commands/update_spec.rb b/spec/bundler/commands/update_spec.rb index 8ca537ac10da74..11ff49bf89a3da 100644 --- a/spec/bundler/commands/update_spec.rb +++ b/spec/bundler/commands/update_spec.rb @@ -301,6 +301,66 @@ expect(lockfile).to eq(previous_lockfile) end + it "does not downgrade direct dependencies when run with --conservative" do + build_repo4 do + build_gem "oauth2", "2.0.6" do |s| + s.add_dependency "faraday", ">= 0.17.3", "< 3.0" + end + + build_gem "oauth2", "1.4.10" do |s| + s.add_dependency "faraday", ">= 0.17.3", "< 3.0" + s.add_dependency "multi_json", "~> 1.3" + end + + build_gem "faraday", "2.5.2" + + build_gem "multi_json", "1.15.0" + + build_gem "quickbooks-ruby", "1.0.19" do |s| + s.add_dependency "oauth2", "~> 1.4" + end + + build_gem "quickbooks-ruby", "0.1.9" do |s| + s.add_dependency "oauth2" + end + end + + gemfile <<-G + source "#{file_uri_for(gem_repo4)}" + + gem "oauth2" + gem "quickbooks-ruby" + G + + lockfile <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + faraday (2.5.2) + multi_json (1.15.0) + oauth2 (1.4.10) + faraday (>= 0.17.3, < 3.0) + multi_json (~> 1.3) + quickbooks-ruby (1.0.19) + oauth2 (~> 1.4) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + oauth2 + quickbooks-ruby + + BUNDLED WITH + #{Bundler::VERSION} + L + + bundle "update --conservative --verbose" + + expect(out).not_to include("Installing quickbooks-ruby 0.1.9") + expect(out).to include("Installing quickbooks-ruby 1.0.19").and include("Installing oauth2 1.4.10") + end + it "does not downgrade indirect dependencies unnecessarily" do build_repo4 do build_gem "a" do |s| From f69244cee8c01d82e94d38032c82be684f37808a Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 22 Aug 2022 11:52:51 +0900 Subject: [PATCH 224/546] Merge rubygems/bundler HEAD Pick from https://github.com/rubygems/rubygems/commit/6b3a5a9ab0453463381a8164efb6298ea9eb776f --- lib/bundler.rb | 2 +- lib/bundler/definition.rb | 41 ++++++----- lib/bundler/endpoint_specification.rb | 13 +--- lib/bundler/gem_version_promoter.rb | 4 ++ lib/bundler/incomplete_specification.rb | 12 ---- lib/bundler/installer.rb | 17 ++--- lib/bundler/lazy_specification.rb | 4 +- lib/bundler/match_metadata.rb | 13 ++++ lib/bundler/match_remote_metadata.rb | 26 +++++++ lib/bundler/remote_specification.rb | 8 +-- lib/bundler/resolver.rb | 56 ++++++++++----- lib/bundler/resolver/spec_group.rb | 2 +- lib/bundler/rubygems_ext.rb | 2 + lib/bundler/spec_set.rb | 28 +++++--- .../install/gems/dependency_api_spec.rb | 16 +++++ spec/bundler/install/gems/resolving_spec.rb | 71 +++++++++++++++++++ 16 files changed, 225 insertions(+), 90 deletions(-) delete mode 100644 lib/bundler/incomplete_specification.rb create mode 100644 lib/bundler/match_metadata.rb create mode 100644 lib/bundler/match_remote_metadata.rb diff --git a/lib/bundler.rb b/lib/bundler.rb index b24d47c6d06e17..79f65ccbe1bfb6 100644 --- a/lib/bundler.rb +++ b/lib/bundler.rb @@ -53,12 +53,12 @@ module Bundler autoload :GemHelpers, File.expand_path("bundler/gem_helpers", __dir__) autoload :GemVersionPromoter, File.expand_path("bundler/gem_version_promoter", __dir__) autoload :Graph, File.expand_path("bundler/graph", __dir__) - autoload :IncompleteSpecification, File.expand_path("bundler/incomplete_specification", __dir__) autoload :Index, File.expand_path("bundler/index", __dir__) autoload :Injector, File.expand_path("bundler/injector", __dir__) autoload :Installer, File.expand_path("bundler/installer", __dir__) autoload :LazySpecification, File.expand_path("bundler/lazy_specification", __dir__) autoload :LockfileParser, File.expand_path("bundler/lockfile_parser", __dir__) + autoload :MatchRemoteMetadata, File.expand_path("bundler/match_remote_metadata", __dir__) autoload :ProcessLock, File.expand_path("bundler/process_lock", __dir__) autoload :RemoteSpecification, File.expand_path("bundler/remote_specification", __dir__) autoload :Resolver, File.expand_path("bundler/resolver", __dir__) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 7d28375bb51216..66efd82b53552d 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -145,8 +145,6 @@ def initialize(lockfile, dependencies, sources, unlock, ruby_version = nil, opti @dependency_changes = converge_dependencies @local_changes = converge_locals - @reresolve = nil - @requires = compute_requires end @@ -218,6 +216,7 @@ def missing_specs? true rescue BundlerError => e @resolve = nil + @resolver = nil @specs = nil @gem_version_promoter = nil @@ -288,7 +287,7 @@ def resolve end else Bundler.ui.debug("Found changes from the lockfile, re-resolving dependencies because #{change_reason}") - @reresolve = reresolve + resolver.start(expanded_dependencies) end end @@ -482,11 +481,18 @@ def unlocking? private - def reresolve - last_resolve = converge_locked_specs - remove_ruby_from_platforms_if_necessary!(dependencies) - expanded_dependencies = expand_dependencies(dependencies + metadata_dependencies, true) - Resolver.resolve(expanded_dependencies, source_requirements, last_resolve, gem_version_promoter, additional_base_requirements_for_resolve, platforms) + def resolver + @resolver ||= begin + last_resolve = converge_locked_specs + Resolver.new(source_requirements, last_resolve, gem_version_promoter, additional_base_requirements_for_resolve, platforms) + end + end + + def expanded_dependencies + @expanded_dependencies ||= begin + remove_ruby_from_platforms_if_necessary!(dependencies) + expand_dependencies(dependencies + metadata_dependencies, true) + end end def filter_specs(specs, deps) @@ -514,15 +520,13 @@ def materialize(dependencies) raise GemNotFound, "Could not find #{missing_specs_list.join(" nor ")}" end - if @reresolve.nil? + loop do incomplete_specs = specs.incomplete_specs + break if incomplete_specs.empty? - if incomplete_specs.any? - Bundler.ui.debug("The lockfile does not have all gems needed for the current platform though, Bundler will still re-resolve dependencies") - @unlock[:gems].concat(incomplete_specs.map(&:name)) - @resolve = reresolve - specs = resolve.materialize(dependencies) - end + Bundler.ui.debug("The lockfile does not have all gems needed for the current platform though, Bundler will still re-resolve dependencies") + @resolve = resolver.start(expanded_dependencies, :exclude_specs => incomplete_specs) + specs = resolve.materialize(dependencies) end bundler = sources.metadata_source.specs.search(Gem::Dependency.new("bundler", VERSION)).last @@ -879,10 +883,8 @@ def compute_requires def additional_base_requirements_for_resolve return [] unless @locked_gems && unlocking? && !sources.expired_sources?(@locked_gems.sources) converge_specs(@originally_locked_specs).map do |locked_spec| - name = locked_spec.name - dep = Dependency.new(name, ">= #{locked_spec.version}") - DepProxy.get_proxy(dep, locked_spec.platform) - end + Dependency.new(locked_spec.name, ">= #{locked_spec.version}") + end.uniq end def remove_ruby_from_platforms_if_necessary!(dependencies) @@ -894,6 +896,7 @@ def remove_ruby_from_platforms_if_necessary!(dependencies) remove_platform(Gem::Platform::RUBY) add_current_platform + resolver.platforms = @platforms end def source_map diff --git a/lib/bundler/endpoint_specification.rb b/lib/bundler/endpoint_specification.rb index 368534ba6d46e7..ea197328ba05c8 100644 --- a/lib/bundler/endpoint_specification.rb +++ b/lib/bundler/endpoint_specification.rb @@ -3,6 +3,8 @@ module Bundler # used for Creating Specifications from the Gemcutter Endpoint class EndpointSpecification < Gem::Specification + include MatchRemoteMetadata + attr_reader :name, :version, :platform, :checksum attr_accessor :source, :remote, :dependencies @@ -20,17 +22,6 @@ def initialize(name, version, platform, spec_fetcher, dependencies, metadata = n parse_metadata(metadata) end - def required_ruby_version - @required_ruby_version ||= _remote_specification.required_ruby_version - end - - # A fallback is included because the original version of the specification - # API didn't include that field, so some marshalled specs in the index have it - # set to +nil+. - def required_rubygems_version - @required_rubygems_version ||= _remote_specification.required_rubygems_version || Gem::Requirement.default - end - def fetch_platform @platform end diff --git a/lib/bundler/gem_version_promoter.rb b/lib/bundler/gem_version_promoter.rb index 3cce3f2139da89..ddf7446dd21f3e 100644 --- a/lib/bundler/gem_version_promoter.rb +++ b/lib/bundler/gem_version_promoter.rb @@ -88,6 +88,10 @@ def sort_versions(dep, spec_groups) end end + def reset + @sort_versions = {} + end + # @return [bool] Convenience method for testing value of level variable. def major? level == :major diff --git a/lib/bundler/incomplete_specification.rb b/lib/bundler/incomplete_specification.rb deleted file mode 100644 index 6d0b9b901c061c..00000000000000 --- a/lib/bundler/incomplete_specification.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Bundler - class IncompleteSpecification - attr_reader :name, :platform - - def initialize(name, platform) - @name = name - @platform = platform - end - end -end diff --git a/lib/bundler/installer.rb b/lib/bundler/installer.rb index b7b0e36dfd85fc..1b17de5d4e2d83 100644 --- a/lib/bundler/installer.rb +++ b/lib/bundler/installer.rb @@ -238,19 +238,14 @@ def load_plugins end def ensure_specs_are_compatible! - system_ruby = Bundler::RubyVersion.system - rubygems_version = Bundler.rubygems.version @definition.specs.each do |spec| - if required_ruby_version = spec.required_ruby_version - unless required_ruby_version.satisfied_by?(system_ruby.gem_version) - raise InstallError, "#{spec.full_name} requires ruby version #{required_ruby_version}, " \ - "which is incompatible with the current version, #{system_ruby}" - end + unless spec.matches_current_ruby? + raise InstallError, "#{spec.full_name} requires ruby version #{spec.required_ruby_version}, " \ + "which is incompatible with the current version, #{Gem.ruby_version}" end - next unless required_rubygems_version = spec.required_rubygems_version - unless required_rubygems_version.satisfied_by?(rubygems_version) - raise InstallError, "#{spec.full_name} requires rubygems version #{required_rubygems_version}, " \ - "which is incompatible with the current version, #{rubygems_version}" + unless spec.matches_current_rubygems? + raise InstallError, "#{spec.full_name} requires rubygems version #{spec.required_rubygems_version}, " \ + "which is incompatible with the current version, #{Gem.rubygems_version}" end end end diff --git a/lib/bundler/lazy_specification.rb b/lib/bundler/lazy_specification.rb index 78d2c22f81168a..ec141cfa27da29 100644 --- a/lib/bundler/lazy_specification.rb +++ b/lib/bundler/lazy_specification.rb @@ -95,8 +95,8 @@ def __materialize__(candidates) @specification = begin search = candidates.reverse.find do |spec| spec.is_a?(StubSpecification) || - (spec.required_ruby_version.satisfied_by?(Gem.ruby_version) && - spec.required_rubygems_version.satisfied_by?(Gem.rubygems_version)) + (spec.matches_current_ruby? && + spec.matches_current_rubygems?) end if search.nil? && Bundler.frozen_bundle? search = candidates.last diff --git a/lib/bundler/match_metadata.rb b/lib/bundler/match_metadata.rb new file mode 100644 index 00000000000000..499036ca93efcf --- /dev/null +++ b/lib/bundler/match_metadata.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Bundler + module MatchMetadata + def matches_current_ruby? + @required_ruby_version.satisfied_by?(Gem.ruby_version) + end + + def matches_current_rubygems? + @required_rubygems_version.satisfied_by?(Gem.rubygems_version) + end + end +end diff --git a/lib/bundler/match_remote_metadata.rb b/lib/bundler/match_remote_metadata.rb new file mode 100644 index 00000000000000..e1b2f4d0e277f9 --- /dev/null +++ b/lib/bundler/match_remote_metadata.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Bundler + module FetchMetadata + def matches_current_ruby? + @required_ruby_version ||= _remote_specification.required_ruby_version + + super + end + + def matches_current_rubygems? + # A fallback is included because the original version of the specification + # API didn't include that field, so some marshalled specs in the index have it + # set to +nil+. + @required_rubygems_version ||= _remote_specification.required_rubygems_version || Gem::Requirement.default + + super + end + end + + module MatchRemoteMetadata + include MatchMetadata + + prepend FetchMetadata + end +end diff --git a/lib/bundler/remote_specification.rb b/lib/bundler/remote_specification.rb index b5d7e3a6c9a61b..601957746f3af6 100644 --- a/lib/bundler/remote_specification.rb +++ b/lib/bundler/remote_specification.rb @@ -6,6 +6,7 @@ module Bundler # be seeded with what we're given from the source's abbreviated index - the # full specification will only be fetched when necessary. class RemoteSpecification + include MatchRemoteMetadata include MatchPlatform include Comparable @@ -28,13 +29,6 @@ def fetch_platform @platform = _remote_specification.platform end - # A fallback is included because the original version of the specification - # API didn't include that field, so some marshalled specs in the index have it - # set to +nil+. - def required_rubygems_version - @required_rubygems_version ||= _remote_specification.required_rubygems_version || Gem::Requirement.default - end - def full_name if @original_platform == Gem::Platform::RUBY "#{@name}-#{@version}" diff --git a/lib/bundler/resolver.rb b/lib/bundler/resolver.rb index ca1bdbda7bcdf7..e382319112fc08 100644 --- a/lib/bundler/resolver.rb +++ b/lib/bundler/resolver.rb @@ -7,6 +7,8 @@ class Resolver include GemHelpers + attr_writer :platforms + # Figures out the best possible configuration of gems that satisfies # the list of passed dependencies and any child dependencies without # causing any gem activation errors. @@ -19,41 +21,48 @@ class Resolver # collection of gemspecs is returned. Otherwise, nil is returned. def self.resolve(requirements, source_requirements = {}, base = [], gem_version_promoter = GemVersionPromoter.new, additional_base_requirements = [], platforms = nil) base = SpecSet.new(base) unless base.is_a?(SpecSet) - metadata_requirements, regular_requirements = requirements.partition {|dep| dep.name.end_with?("\0") } - resolver = new(source_requirements, base, gem_version_promoter, additional_base_requirements, platforms, metadata_requirements) - result = resolver.start(requirements) - SpecSet.new(SpecSet.new(result).for(regular_requirements, false, platforms)) + resolver = new(source_requirements, base, gem_version_promoter, additional_base_requirements, platforms) + resolver.start(requirements) end - def initialize(source_requirements, base, gem_version_promoter, additional_base_requirements, platforms, metadata_requirements) + def initialize(source_requirements, base, gem_version_promoter, additional_base_requirements, platforms) @source_requirements = source_requirements - @metadata_requirements = metadata_requirements @base = base @resolver = Molinillo::Resolver.new(self, self) + @results_for = {} @search_for = {} - @base_dg = Molinillo::DependencyGraph.new - base.each do |ls| - dep = Dependency.new(ls.name, ls.version) - @base_dg.add_vertex(ls.name, DepProxy.get_proxy(dep, ls.platform), true) - end - additional_base_requirements.each {|d| @base_dg.add_vertex(d.name, d) } - @platforms = platforms.reject {|p| p != Gem::Platform::RUBY && (platforms - [p]).any? {|pl| generic(pl) == p } } + @additional_base_requirements = additional_base_requirements + @platforms = platforms @resolving_only_for_ruby = platforms == [Gem::Platform::RUBY] @gem_version_promoter = gem_version_promoter @use_gvp = Bundler.feature_flag.use_gem_version_promoter_for_major_updates? || !@gem_version_promoter.major? end - def start(requirements) + def start(requirements, exclude_specs: []) + @metadata_requirements, regular_requirements = requirements.partition {|dep| dep.name.end_with?("\0") } + + exclude_specs.each do |spec| + remove_from_candidates(spec) + end + + @base_dg = Molinillo::DependencyGraph.new + @base.each do |ls| + dep = Dependency.new(ls.name, ls.version) + @base_dg.add_vertex(ls.name, DepProxy.get_proxy(dep, ls.platform), true) + end + @additional_base_requirements.each {|d| @base_dg.add_vertex(d.name, d) } + @gem_version_promoter.prerelease_specified = @prerelease_specified = {} requirements.each {|dep| @prerelease_specified[dep.name] ||= dep.prerelease? } verify_gemfile_dependencies_are_found!(requirements) - dg = @resolver.resolve(requirements, @base_dg) - dg. + result = @resolver.resolve(requirements, @base_dg). map(&:payload). reject {|sg| sg.name.end_with?("\0") }. map(&:to_specs). flatten + + SpecSet.new(SpecSet.new(result).for(regular_requirements, false, @platforms)) rescue Molinillo::VersionConflict => e message = version_conflict_message(e) raise VersionConflict.new(e.conflicts.keys.uniq, message) @@ -177,7 +186,7 @@ def source_for(name) end def results_for(dependency) - index_for(dependency).search(dependency) + @results_for[dependency] ||= index_for(dependency).search(dependency) end def name_for(dependency) @@ -228,6 +237,19 @@ def self.platform_sort_key(platform) private + def remove_from_candidates(spec) + @base.delete(spec) + @gem_version_promoter.reset + + @results_for.keys.each do |dep| + next unless dep.name == spec.name + + @results_for[dep].reject {|s| s.name == spec.name && s.version == spec.version } + end + + @search_for = {} + end + # returns an integer \in (-\infty, 0] # a number closer to 0 means the dependency is less constraining # diff --git a/lib/bundler/resolver/spec_group.rb b/lib/bundler/resolver/spec_group.rb index 1c016fc55d40ef..4e5b0082d3d316 100644 --- a/lib/bundler/resolver/spec_group.rb +++ b/lib/bundler/resolver/spec_group.rb @@ -105,7 +105,7 @@ def metadata_dependencies(platform) end def metadata_dependency(name, requirement, platform) - return if requirement.none? + return if requirement.nil? || requirement.none? DepProxy.get_proxy(Dependency.new("#{name}\0", requirement), platform) end diff --git a/lib/bundler/rubygems_ext.rb b/lib/bundler/rubygems_ext.rb index 938c58e64d855d..dee15f8ac28f39 100644 --- a/lib/bundler/rubygems_ext.rb +++ b/lib/bundler/rubygems_ext.rb @@ -15,6 +15,7 @@ # `Gem::Source` from the redefined `Gem::Specification#source`. require "rubygems/source" +require_relative "match_metadata" require_relative "match_platform" # Cherry-pick fixes to `Gem.ruby_version` to be useful for modern Bundler @@ -28,6 +29,7 @@ module Gem class Specification + include ::Bundler::MatchMetadata include ::Bundler::MatchPlatform attr_accessor :remote, :location, :relative_loaded_from diff --git a/lib/bundler/spec_set.rb b/lib/bundler/spec_set.rb index 14733269d611d2..4965ca9e60c51b 100644 --- a/lib/bundler/spec_set.rb +++ b/lib/bundler/spec_set.rb @@ -7,8 +7,11 @@ class SpecSet include Enumerable include TSort - def initialize(specs) + attr_reader :incomplete_specs + + def initialize(specs, incomplete_specs = []) @specs = specs + @incomplete_specs = incomplete_specs end def for(dependencies, check = false, platforms = [nil]) @@ -19,7 +22,10 @@ def for(dependencies, check = false, platforms = [nil]) loop do break unless dep = deps.shift - key = [dep[0].name, dep[1]] + name = dep[0].name + platform = dep[1] + + key = [name, platform] next if handled.key?(key) handled[key] = true @@ -33,7 +39,7 @@ def for(dependencies, check = false, platforms = [nil]) deps << [d, dep[1]] end elsif check - specs << IncompleteSpecification.new(*key) + @incomplete_specs += lookup[name] end end @@ -51,6 +57,12 @@ def []=(key, value) @sorted = nil end + def delete(spec) + @specs.delete(spec) + @lookup = nil + @sorted = nil + end + def sort! self end @@ -66,7 +78,7 @@ def to_hash def materialize(deps) materialized = self.for(deps, true) - SpecSet.new(materialized) + SpecSet.new(materialized, incomplete_specs) end # Materialize for all the specs in the spec set, regardless of what platform they're for @@ -83,17 +95,15 @@ def materialized_for_all_platforms end def incomplete_ruby_specs?(deps) - self.class.new(self.for(deps, true, [Gem::Platform::RUBY])).incomplete_specs.any? + self.for(deps, true, [Gem::Platform::RUBY]) + + @incomplete_specs.any? end def missing_specs @specs.select {|s| s.is_a?(LazySpecification) } end - def incomplete_specs - @specs.select {|s| s.is_a?(IncompleteSpecification) } - end - def merge(set) arr = sorted.dup set.each do |set_spec| diff --git a/spec/bundler/install/gems/dependency_api_spec.rb b/spec/bundler/install/gems/dependency_api_spec.rb index 79317a7fad3aad..a3c5bc32aa6414 100644 --- a/spec/bundler/install/gems/dependency_api_spec.rb +++ b/spec/bundler/install/gems/dependency_api_spec.rb @@ -443,6 +443,22 @@ def require(*args) expect(the_bundle).to include_gems "back_deps 1.0" end + it "does not fetch all marshaled specs" do + build_repo2 do + build_gem "foo", "1.0" + build_gem "foo", "2.0" + end + + install_gemfile <<-G, :artifice => "endpoint", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo2.to_s }, :verbose => true + source "#{source_uri}" + + gem "foo" + G + + expect(out).to include("foo-2.0.gemspec.rz") + expect(out).not_to include("foo-1.0.gemspec.rz") + end + it "does not refetch if the only unmet dependency is bundler" do build_repo2 do build_gem "bundler_dep" do |s| diff --git a/spec/bundler/install/gems/resolving_spec.rb b/spec/bundler/install/gems/resolving_spec.rb index 9c0d6bfe56cba7..9405f146b9afe1 100644 --- a/spec/bundler/install/gems/resolving_spec.rb +++ b/spec/bundler/install/gems/resolving_spec.rb @@ -305,6 +305,77 @@ end end + context "in a transitive dependencies in a lockfile" do + before do + build_repo2 do + build_gem "rubocop", "1.28.2" do |s| + s.required_ruby_version = ">= #{current_ruby_minor}" + + s.add_dependency "rubocop-ast", ">= 1.17.0", "< 2.0" + end + + build_gem "rubocop", "1.35.0" do |s| + s.required_ruby_version = ">= #{next_ruby_minor}" + + s.add_dependency "rubocop-ast", ">= 1.20.1", "< 2.0" + end + + build_gem "rubocop-ast", "1.17.0" do |s| + s.required_ruby_version = ">= #{current_ruby_minor}" + end + + build_gem "rubocop-ast", "1.21.0" do |s| + s.required_ruby_version = ">= #{next_ruby_minor}" + end + end + + gemfile <<-G + source "http://localgemserver.test/" + gem 'rubocop' + G + + lockfile <<~L + GEM + remote: http://localgemserver.test/ + specs: + rubocop (1.35.0) + rubocop-ast (>= 1.20.1, < 2.0) + rubocop-ast (1.21.0) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + parallel_tests + + BUNDLED WITH + #{Bundler::VERSION} + L + end + + it "automatically updates lockfile to use the older compatible versions" do + bundle "install --verbose", :artifice => "compact_index", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo2.to_s } + + expect(lockfile).to eq <<~L + GEM + remote: http://localgemserver.test/ + specs: + rubocop (1.28.2) + rubocop-ast (>= 1.17.0, < 2.0) + rubocop-ast (1.17.0) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + rubocop + + BUNDLED WITH + #{Bundler::VERSION} + L + end + end + it "gives a meaningful error on ruby version mismatches between dependencies" do build_repo4 do build_gem "requires-old-ruby" do |s| From c1ecc498e4b879c85ee9ea7bfb3cf496777b6fcc Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Tue, 23 Aug 2022 09:34:32 +0900 Subject: [PATCH 225/546] Use rbs HEAD for https://github.com/ruby/rbs/pull/1090 --- gems/bundled_gems | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gems/bundled_gems b/gems/bundled_gems index 63536571d9cef1..be637125fb5b47 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -11,6 +11,6 @@ net-pop 0.1.1 https://github.com/ruby/net-pop net-smtp 0.3.1 https://github.com/ruby/net-smtp matrix 0.4.2 https://github.com/ruby/matrix prime 0.1.2 https://github.com/ruby/prime -rbs 2.6.0 https://github.com/ruby/rbs 5202d4eeed3257448f19004b4baac4bcf4127717 +rbs 2.6.0 https://github.com/ruby/rbs 5ec9d53efe4bf0a97f33c3016aed430be135583a typeprof 0.21.3 https://github.com/ruby/typeprof debug 1.6.2 https://github.com/ruby/debug e7c37486ff9579251e5d25645b8d38ec96708f12 From 545c2b0047309e3a94745cea95d36d44f2223276 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 22 Aug 2022 19:28:57 -0700 Subject: [PATCH 226/546] Fix an inconsistent include guard macro I was thinking about making it internal/mjit.h, but didn't. --- mjit_unit.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mjit_unit.h b/mjit_unit.h index 2e23a8d5fc60c4..8f80a070e07dc3 100644 --- a/mjit_unit.h +++ b/mjit_unit.h @@ -1,5 +1,5 @@ -#ifndef INTERNAL_MJIT_H -#define INTERNAL_MJIT_H +#ifndef MJIT_UNIT_H +#define MJIT_UNIT_H #include "ccan/list/list.h" @@ -26,4 +26,4 @@ struct rb_mjit_unit { unsigned int cc_entries_size; // ISEQ_BODY(iseq)->ci_size + ones of inlined iseqs }; -#endif /* INTERNAL_MJIT_H */ +#endif /* MJIT_UNIT_H */ From 892fe9bbbaa8b1c286d8d5404eb707ad4288a274 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Tue, 23 Aug 2022 12:49:46 +0900 Subject: [PATCH 227/546] omit all assertions at TestIO_Console#test_intr when running with FreeBSD --- test/io/console/test_io_console.rb | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/test/io/console/test_io_console.rb b/test/io/console/test_io_console.rb index b5382555f553e8..3c44181f2ac0b6 100644 --- a/test/io/console/test_io_console.rb +++ b/test/io/console/test_io_console.rb @@ -357,6 +357,15 @@ def assert_ctrl(expect, cc, r, w) end def test_intr + # This test fails randomly on FreeBSD 13 + # http://rubyci.s3.amazonaws.com/freebsd13/ruby-master/log/20220304T163001Z.fail.html.gz + # + # 1) Failure: + # TestIO_Console#test_intr [/usr/home/chkbuild/chkbuild/tmp/build/20220304T163001Z/ruby/test/io/console/test_io_console.rb:387]: + # <"25"> expected but was + # <"-e:12:in `p': \e[1mexecution expired (\e[1;4mTimeout::Error\e[m\e[1m)\e[m">. + omit if /freebsd/ =~ RUBY_PLATFORM + run_pty("#{<<~"begin;"}\n#{<<~'end;'}") do |r, w, _| begin; require 'timeout' @@ -383,19 +392,12 @@ def test_intr assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("Interrupt", cc, r, w) unless /linux|solaris/ =~ RUBY_PLATFORM end - # This test fails randomly on FreeBSD 13 - # http://rubyci.s3.amazonaws.com/freebsd13/ruby-master/log/20220304T163001Z.fail.html.gz - # - # 1) Failure: - # TestIO_Console#test_intr [/usr/home/chkbuild/chkbuild/tmp/build/20220304T163001Z/ruby/test/io/console/test_io_console.rb:387]: - # <"25"> expected but was - # <"-e:12:in `p': \e[1mexecution expired (\e[1;4mTimeout::Error\e[m\e[1m)\e[m">. - if (cc = ctrl["dsusp"]) && /freebsd/ !~ RUBY_PLATFORM + if cc = ctrl["dsusp"] assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("#{cc.ord}", cc, r, w) end - if (cc = ctrl["lnext"]) && /freebsd/ !~ RUBY_PLATFORM + if cc = ctrl["lnext"] assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("#{cc.ord}", cc, r, w) From 9f6fcfcd7f74d5537f7631837e425fd4b61c0400 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Tue, 23 Aug 2022 14:04:09 +0900 Subject: [PATCH 228/546] Ubuntu 18.04 in GHA is deprecated from Aug 8,2022 https://github.com/actions/runner-images/issues/6002 --- .github/workflows/ubuntu.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 4e4d1a02e1a689..372eb7cc2f9882 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -24,7 +24,6 @@ jobs: test_task: ["check", "test-bundler-parallel", "test-bundled-gems"] os: - ubuntu-20.04 -# - ubuntu-18.04 configure: ["", "cppflags=-DRUBY_DEBUG"] include: - test_task: "check" From 22a416a3bbdcd868ad20f51ac690bef9f85303e7 Mon Sep 17 00:00:00 2001 From: Takuya Noguchi Date: Tue, 23 Aug 2022 03:26:36 +0000 Subject: [PATCH 229/546] [rubygems/rubygems] Bundler: update the link suggested on error with the new one Also typo is fixed. Signed-off-by: Takuya Noguchi https://github.com/rubygems/rubygems/commit/9c1ea52ddf --- lib/bundler.rb | 4 ++-- spec/bundler/bundler/bundler_spec.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/bundler.rb b/lib/bundler.rb index 79f65ccbe1bfb6..24785ef5ebd07b 100644 --- a/lib/bundler.rb +++ b/lib/bundler.rb @@ -331,9 +331,9 @@ def rm_rf(path) FileUtils.remove_entry_secure(path) if path && File.exist?(path) rescue ArgumentError message = < Date: Tue, 23 Aug 2022 04:11:47 +0000 Subject: [PATCH 230/546] [rubygems/rubygems] Bundler: avoid use of "can not" in spec literals Signed-off-by: Takuya Noguchi https://github.com/rubygems/rubygems/commit/73b5cf9bd1 --- spec/bundler/commands/init_spec.rb | 2 +- spec/bundler/commands/remove_spec.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/bundler/commands/init_spec.rb b/spec/bundler/commands/init_spec.rb index 683a453c7d7577..eaf8fa170ade5d 100644 --- a/spec/bundler/commands/init_spec.rb +++ b/spec/bundler/commands/init_spec.rb @@ -42,7 +42,7 @@ context "when the dir is not writable by the current user" do let(:subdir) { "child_dir" } - it "notifies the user that it can not write to it" do + it "notifies the user that it cannot write to it" do FileUtils.mkdir bundled_app(subdir) # chmod a-w it mode = File.stat(bundled_app(subdir)).mode ^ 0o222 diff --git a/spec/bundler/commands/remove_spec.rb b/spec/bundler/commands/remove_spec.rb index 093130f7d5f805..d757e0be4bbbab 100644 --- a/spec/bundler/commands/remove_spec.rb +++ b/spec/bundler/commands/remove_spec.rb @@ -522,7 +522,7 @@ end end - context "when gems can not be removed from other gemfile" do + context "when gems cannot be removed from other gemfile" do it "shows error" do create_file "Gemfile-other", <<-G gem "rails"; gem "rack" @@ -574,7 +574,7 @@ end context "when gem present in gemfiles but could not be removed from one from one of them" do - it "removes gem which can be removed and shows warning for file from which it can not be removed" do + it "removes gem which can be removed and shows warning for file from which it cannot be removed" do create_file "Gemfile-other", <<-G gem "rack" G From 073f3b7e0ad94657c04573983affb9d66e6bff2c Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 26 Jul 2022 21:17:30 +0900 Subject: [PATCH 231/546] [ruby/fileutils] Fix the test permission of "test_rm_rf" The test was added for [Bug #6756]. The ticket insisted `FileUtils.rm_rf` should delete an empty directory even if its permission is 000. However, the test tried to delete a directory with permission 700. https://github.com/ruby/fileutils/commit/d6c2ab2c01 --- test/fileutils/test_fileutils.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fileutils/test_fileutils.rb b/test/fileutils/test_fileutils.rb index 4afc8085ef2dc7..8c49eb39bb5e70 100644 --- a/test/fileutils/test_fileutils.rb +++ b/test/fileutils/test_fileutils.rb @@ -1798,7 +1798,7 @@ def test_rm_rf return if /mswin|mingw/ =~ RUBY_PLATFORM mkdir 'tmpdatadir' - chmod 0o700, 'tmpdatadir' + chmod 0o000, 'tmpdatadir' rm_rf 'tmpdatadir' assert_file_not_exist 'tmpdatadir' From 96562a517d3373466ec306b5f821a41f4758d2a6 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 26 Jul 2022 21:23:47 +0900 Subject: [PATCH 232/546] [ruby/fileutils] Narrow the scope of ensure The ensure in postorder_traverse was added for [Bug #6756]. The intention was to try to delete the parent directory if it failed to get the children. (It may be possible to delete the directory if it is empty.) However, the ensure region rescue'ed not only "failure to get children" but also "failure to delete each child". Thus, the following raised Errno::ENOTEMPTY, but we expect it to raise Errno::EACCES. ``` $ mkdir foo $ touch foo/bar $ chmod 555 foo $ ruby -rfileutils -e 'FileUtils.rm_rf("foo")' ``` This changeset narrows the ensure region so that it rescues only "failure to get children". https://github.com/ruby/fileutils/commit/ec5d3b84ea --- lib/fileutils.rb | 12 ++++++++++-- test/fileutils/test_fileutils.rb | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/lib/fileutils.rb b/lib/fileutils.rb index 8ae52668646b45..4ba7d18a5d5a15 100644 --- a/lib/fileutils.rb +++ b/lib/fileutils.rb @@ -2328,13 +2328,21 @@ def preorder_traverse def postorder_traverse if directory? - entries().each do |ent| + begin + children = entries() + rescue Errno::EACCES + # Failed to get the list of children. + # Assuming there is no children, try to process the parent directory. + yield self + return + end + + children.each do |ent| ent.postorder_traverse do |e| yield e end end end - ensure yield self end diff --git a/test/fileutils/test_fileutils.rb b/test/fileutils/test_fileutils.rb index 8c49eb39bb5e70..05ba8d184ae307 100644 --- a/test/fileutils/test_fileutils.rb +++ b/test/fileutils/test_fileutils.rb @@ -750,6 +750,24 @@ def test_rm_r_pathname assert_file_not_exist 'tmp/tmpdir3' end + def test_rm_r_no_permissions + check_singleton :rm_rf + + return if /mswin|mingw/ =~ RUBY_PLATFORM + + mkdir 'tmpdatadir' + touch 'tmpdatadir/tmpdata' + chmod "-x", 'tmpdatadir' + + begin + assert_raise Errno::EACCES do + rm_r 'tmpdatadir' + end + ensure + chmod "+x", 'tmpdatadir' + end + end + def test_remove_entry_cjk_path dir = "tmpdir\u3042" my_rm_rf dir From 983115cf3c8f75b1afbe3274f02c1529e1ce3a81 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 26 Jul 2022 21:31:27 +0900 Subject: [PATCH 233/546] [ruby/fileutils] FileUtils.rm* methods swallows only Errno::ENOENT when force is true MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... instead of any StandardError. To behave like the standard `rm` command, it should only ignore exceptions about not existing files, not every exception. This should make debugging some errors easier, because the expectation is that `rm -rf` will succeed if and only if, all given files (previously existent or not) are removed. However, due to this exception swallowing, this is not always the case. From the `rm` man page > COMPATIBILITY > > The rm utility differs from historical implementations in that the -f > option only masks attempts to remove non-existent files instead of > masking a large variety of errors. https://github.com/ruby/fileutils/commit/fa65d676ec Co-Authored-By: David Rodríguez --- lib/fileutils.rb | 17 +++++++++++------ test/fileutils/test_fileutils.rb | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/lib/fileutils.rb b/lib/fileutils.rb index 4ba7d18a5d5a15..74bb904e28143a 100644 --- a/lib/fileutils.rb +++ b/lib/fileutils.rb @@ -1165,7 +1165,7 @@ def mv(src, dest, force: nil, noop: nil, verbose: nil, secure: nil) # # Keyword arguments: # - # - force: true - ignores raised exceptions of StandardError + # - force: true - ignores raised exceptions of Errno::ENOENT # and its descendants. # - noop: true - does not remove files; returns +nil+. # - verbose: true - prints an equivalent command: @@ -1248,7 +1248,7 @@ def rm_f(list, noop: nil, verbose: nil) # # Keyword arguments: # - # - force: true - ignores raised exceptions of StandardError + # - force: true - ignores raised exceptions of Errno::ENOENT # and its descendants. # - noop: true - does not remove entries; returns +nil+. # - secure: true - removes +src+ securely; @@ -1315,7 +1315,7 @@ def rm_rf(list, noop: nil, verbose: nil, secure: nil) # see {Avoiding the TOCTTOU Vulnerability}[rdoc-ref:FileUtils@Avoiding+the+TOCTTOU+Vulnerability]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of StandardError and its descendants. + # raised exceptions of Errno::ENOENT and its descendants. # # Related: {methods for deleting}[rdoc-ref:FileUtils@Deleting]. # @@ -1384,10 +1384,12 @@ def remove_entry_secure(path, force = false) ent.remove rescue raise unless force + raise unless Errno::ENOENT === $! end end rescue raise unless force + raise unless Errno::ENOENT === $! end module_function :remove_entry_secure @@ -1413,7 +1415,7 @@ def fu_stat_identical_entry?(a, b) #:nodoc: # should be {interpretable as a path}[rdoc-ref:FileUtils@Path+Arguments]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of StandardError and its descendants. + # raised exceptions of Errno::ENOENT and its descendants. # # Related: FileUtils.remove_entry_secure. # @@ -1423,10 +1425,12 @@ def remove_entry(path, force = false) ent.remove rescue raise unless force + raise unless Errno::ENOENT === $! end end rescue raise unless force + raise unless Errno::ENOENT === $! end module_function :remove_entry @@ -1437,7 +1441,7 @@ def remove_entry(path, force = false) # should be {interpretable as a path}[rdoc-ref:FileUtils@Path+Arguments]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of StandardError and its descendants. + # raised exceptions of Errno::ENOENT and its descendants. # # Related: {methods for deleting}[rdoc-ref:FileUtils@Deleting]. # @@ -1445,6 +1449,7 @@ def remove_file(path, force = false) Entry_.new(path).remove_file rescue raise unless force + raise unless Errno::ENOENT === $! end module_function :remove_file @@ -1456,7 +1461,7 @@ def remove_file(path, force = false) # should be {interpretable as a path}[rdoc-ref:FileUtils@Path+Arguments]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of StandardError and its descendants. + # raised exceptions of Errno::ENOENT and its descendants. # # Related: {methods for deleting}[rdoc-ref:FileUtils@Deleting]. # diff --git a/test/fileutils/test_fileutils.rb b/test/fileutils/test_fileutils.rb index 05ba8d184ae307..bce7271a3bda46 100644 --- a/test/fileutils/test_fileutils.rb +++ b/test/fileutils/test_fileutils.rb @@ -1822,6 +1822,26 @@ def test_rm_rf assert_file_not_exist 'tmpdatadir' end + def test_rm_rf_no_permissions + check_singleton :rm_rf + + return if /mswin|mingw/ =~ RUBY_PLATFORM + + mkdir 'tmpdatadir' + touch 'tmpdatadir/tmpdata' + chmod "-x", 'tmpdatadir' + + begin + assert_raise Errno::EACCES do + rm_rf 'tmpdatadir' + end + + assert_file_exist 'tmpdatadir' + ensure + chmod "+x", 'tmpdatadir' + end + end + def test_rmdir check_singleton :rmdir From 46c3a93982d0b81668668a65c10bc3670a488d8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADt=20Ondruch?= Date: Tue, 23 Aug 2022 10:41:28 +0200 Subject: [PATCH 234/546] [ruby/irb] Drop hard dependency on RDoc. This has been introduced in https://github.com/ruby/irb/commit/026700499dfd, but it seems that this is just be mistake, otherwise the later handling of `LoadError` would not be needed. https://github.com/ruby/irb/commit/54c8df06ff --- lib/irb/input-method.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/irb/input-method.rb b/lib/irb/input-method.rb index fd68239ee38d61..a8227caa9c32dd 100644 --- a/lib/irb/input-method.rb +++ b/lib/irb/input-method.rb @@ -14,7 +14,6 @@ require_relative 'completion' require 'io/console' require 'reline' -require 'rdoc' module IRB STDIN_FILE_NAME = "(line)" # :nodoc: From 60c900f452327b9b5c82b1e2a8d1d9f88ce8a9b7 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 22 Aug 2022 13:42:38 -0400 Subject: [PATCH 235/546] Add @k0kubun to CODEOWNERS for YJIT --- .github/CODEOWNERS | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index c8d7ec5e0dc19f..b7cd624b0dda95 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -3,9 +3,9 @@ # Code owners will be automatically tagged as reviewers when a pull request is opened # YJIT sources and tests -yjit* @maximecb @xrxr @tenderlove -yjit/* @maximecb @xrxr @tenderlove -doc/yjit/* @maximecb @xrxr @tenderlove -bootstraptest/test_yjit* @maximecb @xrxr @tenderlove -test/ruby/test_yjit* @maximecb @xrxr @tenderlove -.github/workflows/yjit* @maximecb @xrxr @tenderlove +yjit* @maximecb @xrxr @tenderlove @k0kubun +yjit/* @maximecb @xrxr @tenderlove @k0kubun +doc/yjit/* @maximecb @xrxr @tenderlove @k0kubun +bootstraptest/test_yjit* @maximecb @xrxr @tenderlove @k0kubun +test/ruby/test_yjit* @maximecb @xrxr @tenderlove @k0kubun +.github/workflows/yjit* @maximecb @xrxr @tenderlove @k0kubun From feff6833060d025b843063b744b6b4043bd72157 Mon Sep 17 00:00:00 2001 From: git Date: Wed, 24 Aug 2022 06:28:39 +0900 Subject: [PATCH 236/546] * 2022-08-24 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 8155bb9d100575..195be17f30943c 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 23 +#define RUBY_RELEASE_DAY 24 #include "ruby/version.h" #include "ruby/internal/abi.h" From 314b76a567e84f3ef245e84e844bdd4aaaad4f2a Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Wed, 24 Aug 2022 10:36:17 +0900 Subject: [PATCH 237/546] test/-ext-/eval/test_eval.rb: Prevent "assigned but unused variable" --- test/-ext-/eval/test_eval.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/-ext-/eval/test_eval.rb b/test/-ext-/eval/test_eval.rb index 27952996e29c90..e37d301b2eaa1e 100644 --- a/test/-ext-/eval/test_eval.rb +++ b/test/-ext-/eval/test_eval.rb @@ -4,9 +4,9 @@ class EvalTest < Test::Unit::TestCase def test_rb_eval_string - a = 1 + _a = 1 assert_equal [self, 1, __method__], rb_eval_string(%q{ - [self, a, __method__] + [self, _a, __method__] }) end end From 0ad9cc16966c2e56f0fe7e5992edf76033d3a83f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Thu, 4 Aug 2022 13:03:29 +0200 Subject: [PATCH 238/546] [rubygems/rubygems] Backport non-gnu libc on linux platform matching to Bundler https://github.com/rubygems/rubygems/commit/703373b41f Co-authored-by: Loic Nageleisen --- lib/bundler/rubygems_ext.rb | 26 +++++++++ spec/bundler/resolver/platform_spec.rb | 73 ++++++++++++++++++++++++++ spec/bundler/support/indexes.rb | 4 ++ 3 files changed, 103 insertions(+) diff --git a/lib/bundler/rubygems_ext.rb b/lib/bundler/rubygems_ext.rb index dee15f8ac28f39..d976170f12fba8 100644 --- a/lib/bundler/rubygems_ext.rb +++ b/lib/bundler/rubygems_ext.rb @@ -237,6 +237,32 @@ class Platform MINGW = Gem::Platform.new("x86-mingw32") X64_MINGW = [Gem::Platform.new("x64-mingw32"), Gem::Platform.new("x64-mingw-ucrt")].freeze + + if Gem::Platform.new("x86_64-linux-musl") === Gem::Platform.new("x86_64-linux") + remove_method :=== + + def ===(other) + return nil unless Gem::Platform === other + + # universal-mingw32 matches x64-mingw-ucrt + return true if (@cpu == "universal" || other.cpu == "universal") && + @os.start_with?("mingw") && other.os.start_with?("mingw") + + # cpu + ([nil,"universal"].include?(@cpu) || [nil, "universal"].include?(other.cpu) || @cpu == other.cpu || + (@cpu == "arm" && other.cpu.start_with?("arm"))) && + + # os + @os == other.os && + + # version + ( + (@os != "linux" && (@version.nil? || other.version.nil?)) || + (@os == "linux" && ((@version.nil? && ["gnu", "musl"].include?(other.version)) || (@version == "gnu" && other.version.nil?))) || + @version == other.version + ) + end + end end Platform.singleton_class.module_eval do diff --git a/spec/bundler/resolver/platform_spec.rb b/spec/bundler/resolver/platform_spec.rb index 8eaed4220abb5f..418293365cd985 100644 --- a/spec/bundler/resolver/platform_spec.rb +++ b/spec/bundler/resolver/platform_spec.rb @@ -82,6 +82,79 @@ should_resolve_as %w[foo-1.0.0-x64-mingw32] end + describe "on a linux platform", :rubygems => ">= 3.1.0.pre.1" do + # Ruby's platform is *-linux => platform's libc is glibc, so not musl + # Ruby's platform is *-linux-musl => platform's libc is musl, so not glibc + # Gem's platform is *-linux => gem is glibc + maybe musl compatible + # Gem's platform is *-linux-musl => gem is musl compatible but not glibc + + it "favors the platform version-specific gem on a version-specifying linux platform" do + @index = build_index do + gem "foo", "1.0.0" + gem "foo", "1.0.0", "x86_64-linux" + gem "foo", "1.0.0", "x86_64-linux-musl" + end + dep "foo" + platforms "x86_64-linux-musl" + + should_resolve_as %w[foo-1.0.0-x86_64-linux-musl] + end + + it "favors the version-less gem over the version-specific gem on a gnu linux platform" do + @index = build_index do + gem "foo", "1.0.0" + gem "foo", "1.0.0", "x86_64-linux" + gem "foo", "1.0.0", "x86_64-linux-musl" + end + dep "foo" + platforms "x86_64-linux" + + should_resolve_as %w[foo-1.0.0-x86_64-linux] + end + + it "ignores the platform version-specific gem on a gnu linux platform" do + @index = build_index do + gem "foo", "1.0.0", "x86_64-linux-musl" + end + dep "foo" + platforms "x86_64-linux" + + should_not_resolve + end + + it "falls back to the platform version-less gem on a linux platform with a version" do + @index = build_index do + gem "foo", "1.0.0" + gem "foo", "1.0.0", "x86_64-linux" + end + dep "foo" + platforms "x86_64-linux-musl" + + should_resolve_as %w[foo-1.0.0-x86_64-linux] + end + + it "falls back to the ruby platform gem on a gnu linux platform when only a version-specifying gem is available" do + @index = build_index do + gem "foo", "1.0.0" + gem "foo", "1.0.0", "x86_64-linux-musl" + end + dep "foo" + platforms "x86_64-linux" + + should_resolve_as %w[foo-1.0.0] + end + + it "falls back to the platform version-less gem on a version-specifying linux platform and no ruby platform gem is available" do + @index = build_index do + gem "foo", "1.0.0", "x86_64-linux" + end + dep "foo" + platforms "x86_64-linux-musl" + + should_resolve_as %w[foo-1.0.0-x86_64-linux] + end + end + it "takes the latest ruby gem if the platform specific gem doesn't match the required_ruby_version" do @index = build_index do gem "foo", "1.0.0" diff --git a/spec/bundler/support/indexes.rb b/spec/bundler/support/indexes.rb index 55d798a90a11ff..c496679ee63562 100644 --- a/spec/bundler/support/indexes.rb +++ b/spec/bundler/support/indexes.rb @@ -33,6 +33,10 @@ def resolve(args = []) Bundler::Resolver.resolve(deps, source_requirements, *args) end + def should_not_resolve + expect { resolve }.to raise_error(Bundler::GemNotFound) + end + def should_resolve_as(specs) got = resolve got = got.map(&:full_name).sort From b4be3c00c5737649166db676278fd28f768a5e3c Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Thu, 28 Jul 2022 16:45:08 +0100 Subject: [PATCH 239/546] add --yjit-dump-iseqs param (https://github.com/Shopify/ruby/pull/332) --- yjit.c | 12 +++++ yjit/bindgen/src/main.rs | 7 +++ yjit/src/core.rs | 27 +++++++++++ yjit/src/cruby_bindings.inc.rs | 26 +++++++++++ yjit/src/disasm.rs | 84 ++++++++++++++++++---------------- yjit/src/options.rs | 20 +++++++- yjit/src/utils.rs | 35 ++++++++++++++ 7 files changed, 170 insertions(+), 41 deletions(-) diff --git a/yjit.c b/yjit.c index 1a2f71a9599f32..0dddcfdc5aee28 100644 --- a/yjit.c +++ b/yjit.c @@ -399,6 +399,18 @@ rb_str_bytesize(VALUE str) return LONG2NUM(RSTRING_LEN(str)); } +unsigned long +rb_RSTRING_LEN(VALUE str) +{ + return RSTRING_LEN(str); +} + +char * +rb_RSTRING_PTR(VALUE str) +{ + return RSTRING_PTR(str); +} + // This is defined only as a named struct inside rb_iseq_constant_body. // By giving it a separate typedef, we make it nameable by rust-bindgen. // Bindgen's temp/anon name isn't guaranteed stable. diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index f54addc7957ba3..f8d87aeec8c48c 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -70,6 +70,9 @@ fn main() { .allowlist_function("rb_str_buf_append") .allowlist_function("rb_str_dup") + // From encindex.h + .allowlist_type("ruby_preserved_encindex") + // This struct is public to Ruby C extensions // From include/ruby/internal/core/rbasic.h .allowlist_type("RBasic") @@ -240,6 +243,7 @@ fn main() { .allowlist_var("VM_ENV_DATA_INDEX_SPECVAL") .allowlist_var("VM_ENV_DATA_INDEX_FLAGS") .allowlist_var("VM_ENV_DATA_SIZE") + .allowlist_function("rb_iseq_path") // From yjit.c .allowlist_function("rb_iseq_(get|set)_yjit_payload") @@ -265,6 +269,8 @@ fn main() { .allowlist_function("rb_yjit_for_each_iseq") .allowlist_function("rb_yjit_obj_written") .allowlist_function("rb_yjit_str_simple_append") + .allowlist_function("rb_RSTRING_PTR") + .allowlist_function("rb_RSTRING_LEN") .allowlist_function("rb_ENCODING_GET") .allowlist_function("rb_yjit_exit_locations_dict") @@ -282,6 +288,7 @@ fn main() { .allowlist_function("rb_vm_insn_addr2opcode") .allowlist_function("rb_iseqw_to_iseq") .allowlist_function("rb_iseq_each") + .allowlist_function("rb_iseq_method_name") // From builtin.h .allowlist_type("rb_builtin_function.*") diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 64585653d94e47..cb026f6a3b96b2 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -6,6 +6,8 @@ use crate::cruby::*; use crate::options::*; use crate::stats::*; use crate::utils::*; +#[cfg(feature="disasm")] +use crate::disasm::*; use core::ffi::c_void; use std::cell::*; use std::hash::{Hash, Hasher}; @@ -1426,6 +1428,20 @@ fn gen_block_series_body( last_blockref = new_blockref; } + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is active, see if this iseq's location matches the given substring. + // If so, we print the new blocks to the console. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(blockid.iseq); + if iseq_location.contains(substr) { + let last_block = last_blockref.borrow(); + println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, blockid.idx, last_block.end_idx); + println!("{}", disasm_iseq_insn_range(blockid.iseq, blockid.idx, last_block.end_idx)); + } + } + } + Some(first_block) } @@ -1956,6 +1972,17 @@ pub fn invalidate_block_version(blockref: &BlockRef) { verify_blockid(block.blockid); + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(block.blockid.iseq); + if iseq_location.contains(substr) { + println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, block.blockid.idx, block.end_idx); + } + } + } + // Remove this block from the version array remove_block_version(blockref); diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 31f09ef98d4bd5..a329dadc9b3721 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -246,6 +246,20 @@ pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22; pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608; pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42; pub type ruby_encoding_consts = u32; +pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0; +pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1; +pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2; +pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3; +pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4; +pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5; +pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6; +pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7; +pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8; +pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9; +pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10; +pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11; +pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12; +pub type ruby_preserved_encindex = u32; extern "C" { pub fn rb_obj_info_dump(obj: VALUE); } @@ -649,6 +663,9 @@ pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4; pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8; pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16; pub type vm_frame_env_flags = u32; +extern "C" { + pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE; +} extern "C" { pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE; } @@ -969,6 +986,9 @@ extern "C" { extern "C" { pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t; } +extern "C" { + pub fn rb_iseq_method_name(iseq: *const rb_iseq_t) -> VALUE; +} extern "C" { pub fn rb_vm_barrier(); } @@ -1020,6 +1040,12 @@ extern "C" { extern "C" { pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; } +extern "C" { + pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong; +} +extern "C" { + pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; +} pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; extern "C" { pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool; diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 2082648c4a32ca..83c80d6c668d6f 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -26,15 +26,17 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU // Get the iseq pointer from the wrapper let iseq = unsafe { rb_iseqw_to_iseq(iseqw) }; - let out_string = disasm_iseq(iseq); + // This will truncate disassembly of methods with 10k+ bytecodes. + // That's a good thing - this prints to console. + let out_string = disasm_iseq_insn_range(iseq, 0, 9999); return rust_str_to_ruby(&out_string); } } #[cfg(feature = "disasm")] -fn disasm_iseq(iseq: IseqPtr) -> String { - let mut out = String::from(""); +pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String { + let mut out = String::from(""); // Get a list of block versions generated for this iseq let mut block_list = get_iseq_block_list(iseq); @@ -84,47 +86,49 @@ fn disasm_iseq(iseq: IseqPtr) -> String { for block_idx in 0..block_list.len() { let block = block_list[block_idx].borrow(); let blockid = block.get_blockid(); - let end_idx = block.get_end_idx(); - let start_addr = block.get_start_addr().unwrap().raw_ptr(); - let end_addr = block.get_end_addr().unwrap().raw_ptr(); - let code_size = block.code_size(); - - // Write some info about the current block - let block_ident = format!( - "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", - block_idx + 1, - block_list.len(), - blockid.idx, - end_idx, - code_size - ); - out.push_str(&format!("== {:=<60}\n", block_ident)); - - // Disassemble the instructions - let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) }; - let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); - - // For each instruction in this block - for insn in insns.as_ref() { - // Comments for this block - if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) { - for comment in comment_list { - out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment)); + if blockid.idx >= start_idx && blockid.idx < end_idx { + let end_idx = block.get_end_idx(); + let start_addr = block.get_start_addr().unwrap().raw_ptr(); + let end_addr = block.get_end_addr().unwrap().raw_ptr(); + let code_size = block.code_size(); + + // Write some info about the current block + let block_ident = format!( + "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", + block_idx + 1, + block_list.len(), + blockid.idx, + end_idx, + code_size + ); + out.push_str(&format!("== {:=<60}\n", block_ident)); + + // Disassemble the instructions + let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) }; + let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); + + // For each instruction in this block + for insn in insns.as_ref() { + // Comments for this block + if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) { + for comment in comment_list { + out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment)); + } } + out.push_str(&format!(" {}\n", insn)); } - out.push_str(&format!(" {}\n", insn)); - } - // If this is not the last block - if block_idx < block_list.len() - 1 { - // Compute the size of the gap between this block and the next - let next_block = block_list[block_idx + 1].borrow(); - let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr(); - let gap_size = (next_start_addr as usize) - (end_addr as usize); + // If this is not the last block + if block_idx < block_list.len() - 1 { + // Compute the size of the gap between this block and the next + let next_block = block_list[block_idx + 1].borrow(); + let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr(); + let gap_size = (next_start_addr as usize) - (end_addr as usize); - // Log the size of the gap between the blocks if nonzero - if gap_size > 0 { - out.push_str(&format!("... {} byte gap ...\n", gap_size)); + // Log the size of the gap between the blocks if nonzero + if gap_size > 0 { + out.push_str(&format!("... {} byte gap ...\n", gap_size)); + } } } } diff --git a/yjit/src/options.rs b/yjit/src/options.rs index 704c709baeaad8..7436b3583bb24a 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -1,7 +1,7 @@ use std::ffi::CStr; // Command-line options -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Clone, PartialEq, Eq, Debug)] #[repr(C)] pub struct Options { // Size of the executable memory block to allocate in MiB @@ -30,6 +30,9 @@ pub struct Options { /// Dump compiled and executed instructions for debugging pub dump_insns: bool, + /// Print when specific ISEQ items are compiled or invalidated + pub dump_iseq_disasm: Option, + /// Verify context objects (debug mode only) pub verify_ctx: bool, @@ -52,6 +55,7 @@ pub static mut OPTIONS: Options = Options { dump_insns: false, verify_ctx: false, global_constant_state: false, + dump_iseq_disasm: None, }; /// Macro to get an option value by name @@ -64,6 +68,16 @@ macro_rules! get_option { } pub(crate) use get_option; +/// Macro to reference an option value by name; we assume it's a cloneable type like String or an Option of same. +macro_rules! get_option_ref { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($option_name:ident) => { + unsafe { &(OPTIONS.$option_name) } + }; +} +pub(crate) use get_option_ref; + /// Expected to receive what comes after the third dash in "--yjit-*". /// Empty string means user passed only "--yjit". C code rejects when /// they pass exact "--yjit-". @@ -105,6 +119,10 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { } }, + ("dump-iseq-disasm", _) => unsafe { + OPTIONS.dump_iseq_disasm = Some(opt_val.to_string()); + }, + ("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true }, ("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true }, ("stats", "") => unsafe { OPTIONS.gen_stats = true }, diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index 02fbce47d895e6..ade573b8da9881 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -71,6 +71,41 @@ macro_rules! offset_of { #[allow(unused)] pub(crate) use offset_of; +// Convert a CRuby UTF-8-encoded RSTRING into a Rust string. +// This should work fine on ASCII strings and anything else +// that is considered legal UTF-8, including embedded nulls. +fn ruby_str_to_rust(v: VALUE) -> String { + // Make sure the CRuby encoding is UTF-8 compatible + let encoding = unsafe { rb_ENCODING_GET(v) } as u32; + assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII); + + let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; + let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); + let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) }; + String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation +} + +// Location is the file defining the method, colon, method name. +// Filenames are sometimes internal strings supplied to eval, +// so be careful with them. +pub fn iseq_get_location(iseq: IseqPtr) -> String { + let iseq_path = unsafe { rb_iseq_path(iseq) }; + let iseq_method = unsafe { rb_iseq_method_name(iseq) }; + + let mut s = if iseq_path == Qnil { + "None".to_string() + } else { + ruby_str_to_rust(iseq_path) + }; + s.push_str(":"); + if iseq_method == Qnil { + s.push_str("None"); + } else { + s.push_str(& ruby_str_to_rust(iseq_method)); + } + s +} + #[cfg(test)] mod tests { #[test] From fa9f4d387c2a46553051f01f4a28ae17d874e4c7 Mon Sep 17 00:00:00 2001 From: git Date: Thu, 25 Aug 2022 02:43:03 +0900 Subject: [PATCH 240/546] * 2022-08-25 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 195be17f30943c..806a93f92a761a 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 24 +#define RUBY_RELEASE_DAY 25 #include "ruby/version.h" #include "ruby/internal/abi.h" From 28a3434634a0116a6f2b9e2df0bcbbfb0cfbd28b Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Tue, 23 Aug 2022 13:23:40 -0700 Subject: [PATCH 241/546] Disable Ractor check on 32bit architectures Ractor verification requires storing the ractor id in the top 32 bits of the object header. Unfortunately 32 bit machines only have 32 bits in the object header. The verification code has a 32 bit left shift which doesn't work on i686 and will clobber existing flags. This commit disables the verification code on i686 since i686 will crash if it's enabled. Co-Authored-By: John Hawthorn Co-Authored-By: Jemma Issroff --- ractor.c | 4 ++++ ractor_core.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ractor.c b/ractor.c index 0306736c18a5e8..0eddc165fa4b23 100644 --- a/ractor.c +++ b/ractor.c @@ -74,7 +74,9 @@ static void ractor_lock_self(rb_ractor_t *cr, const char *file, int line) { VM_ASSERT(cr == GET_RACTOR()); +#if RACTOR_CHECK_MODE > 0 VM_ASSERT(cr->sync.locked_by != cr->pub.self); +#endif ractor_lock(cr, file, line); } @@ -94,7 +96,9 @@ static void ractor_unlock_self(rb_ractor_t *cr, const char *file, int line) { VM_ASSERT(cr == GET_RACTOR()); +#if RACTOR_CHECK_MODE > 0 VM_ASSERT(cr->sync.locked_by == cr->pub.self); +#endif ractor_unlock(cr, file, line); } diff --git a/ractor_core.h b/ractor_core.h index 412971decfc418..a065f5f809d0cf 100644 --- a/ractor_core.h +++ b/ractor_core.h @@ -5,7 +5,7 @@ #include "vm_debug.h" #ifndef RACTOR_CHECK_MODE -#define RACTOR_CHECK_MODE (0 || VM_CHECK_MODE || RUBY_DEBUG) +#define RACTOR_CHECK_MODE (VM_CHECK_MODE || RUBY_DEBUG) && (SIZEOF_UINT64_T == SIZEOF_VALUE) #endif enum rb_ractor_basket_type { From f5f81bb777bb1dbf8da3f976136733e65b026fef Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 24 Aug 2022 14:15:41 -0700 Subject: [PATCH 242/546] Update Module#instance_methods documentation for visibility changes/aliases Requested by matz in comment on #18435. --- class.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/class.c b/class.c index 54d9e6e17795dc..5e57068f03c5ec 100644 --- a/class.c +++ b/class.c @@ -1755,6 +1755,15 @@ class_instance_method_list(int argc, const VALUE *argv, VALUE mod, int obj, int * B.instance_methods(true).include?(:method1) #=> true * C.instance_methods(false) #=> [:method3] * C.instance_methods.include?(:method2) #=> true + * + * Note that method visibility changes in the current class, as well as aliases, + * are considered as methods of the current class by this method: + * + * class C < B + * alias method4 method2 + * protected :method2 + * end + * C.instance_methods(false).sort #=> [:method2, :method3, :method4] */ VALUE From ad8774f8e537a3ad73ce56bd12e75c85271f93a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Wed, 24 Aug 2022 22:54:33 +0200 Subject: [PATCH 243/546] [rubygems/rubygems] Fix another regression for sorbet Recently a changed was introduced to update the resolver platforms after it has been created, in order to remove the "ruby" platform from it if it's to be removed from the lockfile. However, it did not update the `@resolving_only_for_ruby` instance variable in that case, so the resolver was not properly doing the right thing anymore. To fix this, I tweaked the code to restore not changing resolver platforms after the resolver has been instantiated. https://github.com/rubygems/rubygems/commit/8fbc30a1d0 --- lib/bundler/definition.rb | 7 +- lib/bundler/resolver.rb | 2 - .../install/gemfile/specific_platform_spec.rb | 71 +++++++++++++++++++ 3 files changed, 73 insertions(+), 7 deletions(-) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 66efd82b53552d..8bd9e11f32bfb2 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -484,15 +484,13 @@ def unlocking? def resolver @resolver ||= begin last_resolve = converge_locked_specs + remove_ruby_from_platforms_if_necessary!(dependencies) Resolver.new(source_requirements, last_resolve, gem_version_promoter, additional_base_requirements_for_resolve, platforms) end end def expanded_dependencies - @expanded_dependencies ||= begin - remove_ruby_from_platforms_if_necessary!(dependencies) - expand_dependencies(dependencies + metadata_dependencies, true) - end + @expanded_dependencies ||= expand_dependencies(dependencies + metadata_dependencies, true) end def filter_specs(specs, deps) @@ -896,7 +894,6 @@ def remove_ruby_from_platforms_if_necessary!(dependencies) remove_platform(Gem::Platform::RUBY) add_current_platform - resolver.platforms = @platforms end def source_map diff --git a/lib/bundler/resolver.rb b/lib/bundler/resolver.rb index e382319112fc08..a74af45027f935 100644 --- a/lib/bundler/resolver.rb +++ b/lib/bundler/resolver.rb @@ -7,8 +7,6 @@ class Resolver include GemHelpers - attr_writer :platforms - # Figures out the best possible configuration of gems that satisfies # the list of passed dependencies and any child dependencies without # causing any gem activation errors. diff --git a/spec/bundler/install/gemfile/specific_platform_spec.rb b/spec/bundler/install/gemfile/specific_platform_spec.rb index bb5526203fc376..094186e63d7ea0 100644 --- a/spec/bundler/install/gemfile/specific_platform_spec.rb +++ b/spec/bundler/install/gemfile/specific_platform_spec.rb @@ -445,6 +445,77 @@ L end + it "automatically fixes the lockfile if only RUBY platform is locked and some gem has no RUBY variant available" do + build_repo4 do + build_gem("sorbet-static-and-runtime", "0.5.10160") do |s| + s.add_runtime_dependency "sorbet", "= 0.5.10160" + s.add_runtime_dependency "sorbet-runtime", "= 0.5.10160" + end + + build_gem("sorbet", "0.5.10160") do |s| + s.add_runtime_dependency "sorbet-static", "= 0.5.10160" + end + + build_gem("sorbet-runtime", "0.5.10160") + + build_gem("sorbet-static", "0.5.10160") do |s| + s.platform = Gem::Platform.local + end + end + + gemfile <<~G + source "#{file_uri_for(gem_repo4)}" + + gem "sorbet-static-and-runtime" + G + + lockfile <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + sorbet (0.5.10160) + sorbet-static (= 0.5.10160) + sorbet-runtime (0.5.10160) + sorbet-static (0.5.10160-#{Gem::Platform.local}) + sorbet-static-and-runtime (0.5.10160) + sorbet (= 0.5.10160) + sorbet-runtime (= 0.5.10160) + + PLATFORMS + ruby + + DEPENDENCIES + sorbet-static-and-runtime + + BUNDLED WITH + #{Bundler::VERSION} + L + + bundle "update" + + expect(lockfile).to eq <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + sorbet (0.5.10160) + sorbet-static (= 0.5.10160) + sorbet-runtime (0.5.10160) + sorbet-static (0.5.10160-#{Gem::Platform.local}) + sorbet-static-and-runtime (0.5.10160) + sorbet (= 0.5.10160) + sorbet-runtime (= 0.5.10160) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + sorbet-static-and-runtime + + BUNDLED WITH + #{Bundler::VERSION} + L + end + it "does not remove ruby if gems for other platforms, and not present in the lockfile, exist in the Gemfile" do build_repo4 do build_gem "nokogiri", "1.13.8" From c069f50401583ca0d021869b104e2eb9df2cfa6a Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 25 Aug 2022 23:50:24 +0900 Subject: [PATCH 244/546] [DOC] Tell RDoc aliases of singleton class Since RDoc C parser cannot capture aliases which are using an expression other than a single variable as the class, use an intermediate variable for the singleton class. --- time.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/time.c b/time.c index 2b1523915279e1..d2d0de91e52950 100644 --- a/time.c +++ b/time.c @@ -5632,13 +5632,14 @@ Init_Time(void) rb_gc_register_mark_object(str_empty); rb_cTime = rb_define_class("Time", rb_cObject); + VALUE scTime = rb_singleton_class(rb_cTime); rb_include_module(rb_cTime, rb_mComparable); rb_define_alloc_func(rb_cTime, time_s_alloc); rb_define_singleton_method(rb_cTime, "utc", time_s_mkutc, -1); rb_define_singleton_method(rb_cTime, "local", time_s_mktime, -1); - rb_define_alias(rb_singleton_class(rb_cTime), "gm", "utc"); - rb_define_alias(rb_singleton_class(rb_cTime), "mktime", "local"); + rb_define_alias(scTime, "gm", "utc"); + rb_define_alias(scTime, "mktime", "local"); rb_define_method(rb_cTime, "to_i", time_to_i, 0); rb_define_method(rb_cTime, "to_f", time_to_f, 0); @@ -5707,7 +5708,7 @@ Init_Time(void) /* methods for marshaling */ rb_define_private_method(rb_cTime, "_dump", time_dump, -1); - rb_define_private_method(rb_singleton_class(rb_cTime), "_load", time_load, 1); + rb_define_private_method(scTime, "_load", time_load, 1); #if 0 /* Time will support marshal_dump and marshal_load in the future (1.9 maybe) */ rb_define_private_method(rb_cTime, "marshal_dump", time_mdump, 0); From c2daa056934185641469e27713daf1fbe27552c4 Mon Sep 17 00:00:00 2001 From: git Date: Fri, 26 Aug 2022 00:08:09 +0900 Subject: [PATCH 245/546] * 2022-08-26 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 806a93f92a761a..1b29b880e0d02b 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 25 +#define RUBY_RELEASE_DAY 26 #include "ruby/version.h" #include "ruby/internal/abi.h" From b2d0f788694c680d2abf695358e42d819b11b2ec Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 25 Aug 2022 09:28:07 +0200 Subject: [PATCH 246/546] Fix private methods reported as protected when called via Symbol#to_proc Ref: bfa6a8ddc84fffe0aef5a0f91b417167e124dbbf Ref: [Bug #18826] --- spec/ruby/core/symbol/to_proc_spec.rb | 8 ++++---- vm_insnhelper.c | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/spec/ruby/core/symbol/to_proc_spec.rb b/spec/ruby/core/symbol/to_proc_spec.rb index 81939e0046c540..6d9c4bc622aa45 100644 --- a/spec/ruby/core/symbol/to_proc_spec.rb +++ b/spec/ruby/core/symbol/to_proc_spec.rb @@ -58,8 +58,8 @@ @a = [] singleton_class.class_eval(&body) tap(&:pub) - proc{tap(&:pro)}.should raise_error(NoMethodError) - proc{tap(&:pri)}.should raise_error(NoMethodError) + proc{tap(&:pro)}.should raise_error(NoMethodError, /protected method `pro' called/) + proc{tap(&:pri)}.should raise_error(NoMethodError, /private method `pri' called/) @a.should == [:pub] @a = [] @@ -67,8 +67,8 @@ o = c.new o.instance_variable_set(:@a, []) o.tap(&:pub) - proc{tap(&:pro)}.should raise_error(NoMethodError) - proc{o.tap(&:pri)}.should raise_error(NoMethodError) + proc{tap(&:pro)}.should raise_error(NoMethodError, /protected method `pro' called/) + proc{o.tap(&:pri)}.should raise_error(NoMethodError, /private method `pri' called/) o.a.should == [:pub] end end diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 68362ddf60cea9..3c41adcdc9f6ea 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3256,6 +3256,7 @@ vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, return vm_call_method_each_type(ec, reg_cfp, calling); case METHOD_VISI_PRIVATE: vm_cc_method_missing_reason_set(cc, MISSING_PRIVATE); + break; case METHOD_VISI_PROTECTED: vm_cc_method_missing_reason_set(cc, MISSING_PROTECTED); break; From 8706b74b902db70c5c00c8008a0f0b045381eb7e Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Thu, 25 Aug 2022 13:02:18 -0500 Subject: [PATCH 247/546] [DOC] Enhanced RDoc for Time (#6277) Deletes the :include: files in doc/time, which became no longer workable when @nobu pointed out that some (but not all) creator methods accept string values as well as integer-like values. Changes to methods: Time.utc Time.local Time.at Time.new --- doc/time/in.rdoc | 4 - doc/time/mon-min.rdoc | 8 -- doc/time/msec.rdoc | 2 - doc/time/nsec.rdoc | 2 - doc/time/sec.rdoc | 2 - doc/time/sec_i.rdoc | 1 - doc/time/usec.rdoc | 2 - doc/time/year.rdoc | 1 - doc/time/zone_and_in.rdoc | 5 -- time.c | 145 +++++++++++++++++++++++------------ timev.rb | 155 +++++++++++++++++++++++++++----------- 11 files changed, 209 insertions(+), 118 deletions(-) delete mode 100644 doc/time/in.rdoc delete mode 100644 doc/time/mon-min.rdoc delete mode 100644 doc/time/msec.rdoc delete mode 100644 doc/time/nsec.rdoc delete mode 100644 doc/time/sec.rdoc delete mode 100644 doc/time/sec_i.rdoc delete mode 100644 doc/time/usec.rdoc delete mode 100644 doc/time/year.rdoc delete mode 100644 doc/time/zone_and_in.rdoc diff --git a/doc/time/in.rdoc b/doc/time/in.rdoc deleted file mode 100644 index 506bd916280a24..00000000000000 --- a/doc/time/in.rdoc +++ /dev/null @@ -1,4 +0,0 @@ -- in: zone: a timezone +zone+. - -For forms of argument +zone+, see -{Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. diff --git a/doc/time/mon-min.rdoc b/doc/time/mon-min.rdoc deleted file mode 100644 index 5bd430c74a75c2..00000000000000 --- a/doc/time/mon-min.rdoc +++ /dev/null @@ -1,8 +0,0 @@ -- +month+: a month value, which may be: - - An integer month in the range 1..12. - - A 3-character string that matches regular expression - /jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec/i. -- +day+: an integer day in the range 1..31 - (less than 31 for some months). -- +hour+: an integer hour in the range 0..23. -- +min+: an integer minute in the range 0..59. diff --git a/doc/time/msec.rdoc b/doc/time/msec.rdoc deleted file mode 100644 index ce5d1e614516d3..00000000000000 --- a/doc/time/msec.rdoc +++ /dev/null @@ -1,2 +0,0 @@ -- +msec+ is the number of milliseconds (Integer, Float, or Rational) - in the range 0..1000. diff --git a/doc/time/nsec.rdoc b/doc/time/nsec.rdoc deleted file mode 100644 index a2dfe2d6086304..00000000000000 --- a/doc/time/nsec.rdoc +++ /dev/null @@ -1,2 +0,0 @@ -- +nsec+ is the number of nanoseconds (Integer, Float, or Rational) - in the range 0..1000000000. diff --git a/doc/time/sec.rdoc b/doc/time/sec.rdoc deleted file mode 100644 index 049c712110d89a..00000000000000 --- a/doc/time/sec.rdoc +++ /dev/null @@ -1,2 +0,0 @@ -- +sec+ is the number of seconds (Integer, Float, or Rational) - in the range 0..60. diff --git a/doc/time/sec_i.rdoc b/doc/time/sec_i.rdoc deleted file mode 100644 index fd5519082cbb09..00000000000000 --- a/doc/time/sec_i.rdoc +++ /dev/null @@ -1 +0,0 @@ -- +sec_i+ is the integer number of seconds in the range 0..60. diff --git a/doc/time/usec.rdoc b/doc/time/usec.rdoc deleted file mode 100644 index bb5a46419a18af..00000000000000 --- a/doc/time/usec.rdoc +++ /dev/null @@ -1,2 +0,0 @@ -- +usec+ is the number of microseconds (Integer, Float, or Rational) - in the range 0..1000000. diff --git a/doc/time/year.rdoc b/doc/time/year.rdoc deleted file mode 100644 index 2222b830d76339..00000000000000 --- a/doc/time/year.rdoc +++ /dev/null @@ -1 +0,0 @@ -- +year+: an integer year. diff --git a/doc/time/zone_and_in.rdoc b/doc/time/zone_and_in.rdoc deleted file mode 100644 index f36940ee13ccad..00000000000000 --- a/doc/time/zone_and_in.rdoc +++ /dev/null @@ -1,5 +0,0 @@ -- +zone+: a timezone +zone+. -- in: zone: a timezone +zone+. - -For forms of +zone+, see -{Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. diff --git a/time.c b/time.c index d2d0de91e52950..505ab4835c111a 100644 --- a/time.c +++ b/time.c @@ -3348,32 +3348,100 @@ tmcmp(struct tm *a, struct tm *b) /* * call-seq: - * Time.utc(year, month = 1, day = 1, hour = 0, min = 0, sec_i = 0, usec = 0) -> new_time - * Time.utc(sec_i, min, hour, day, month, year, dummy, dummy, dummy, dummy) -> new_time + * Time.utc(year, month = 1, mday = 1, hour = 0, min = 0, sec = 0, usec = 0) -> new_time + * Time.utc(sec, min, hour, mday, month, year, dummy, dummy, dummy, dummy) -> new_time * - * Returns a new \Time object based the on given arguments; - * its timezone is UTC. + * Returns a new \Time object based the on given arguments, + * in the UTC timezone. * - * In the first form (up to seven arguments), argument +year+ is required. + * With one to seven arguments given, + * the arguments are interpreted as in the first calling sequence above: * - * Time.utc(2000) # => 2000-01-01 00:00:00 UTC - * Time.utc(0, 1, 2, 3, 4, 5, 6.5) # => 0000-01-02 03:04:05.0000065 UTC + * Time.utc(year, month = 1, mday = 1, hour = 0, min = 0, sec = 0, usec = 0) * - * In the second form, all ten arguments are required, - * though the last four are ignored. - * This form is useful for creating a time from a 10-element array - * such as is returned by #to_a. + * Examples: * - * array = Time.now.to_a - * # => [55, 14, 10, 7, 7, 2022, 4, 188, true, "Central Daylight Time"] - * array[5] = 2000 - * Time.utc(*array) # => 2000-07-07 10:14:55 UTC + * Time.utc(2000) # => 2000-01-01 00:00:00 UTC + * Time.utc(-2000) # => -2000-01-01 00:00:00 UTC * - * Parameters: - * :include: doc/time/year.rdoc - * :include: doc/time/mon-min.rdoc - * :include: doc/time/sec_i.rdoc - * :include: doc/time/usec.rdoc + * There are no minimum and maximum values for the required argument +year+. + * + * For the optional arguments: + * + * - +month+: Month in range (1..12), or case-insensitive + * 3-letter month name: + * + * Time.utc(2000, 1) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 12) # => 2000-12-01 00:00:00 UTC + * Time.utc(2000, 'jan') # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 'JAN') # => 2000-01-01 00:00:00 UTC + * + * - +mday+: Month day in range(1..31): + * + * Time.utc(2000, 1, 1) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 31) # => 2000-01-31 00:00:00 UTC + * + * - +hour+: Hour in range (0..23), or 24 if +min+, +sec+, and +usec+ + * are zero: + * + * Time.utc(2000, 1, 1, 0) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 1, 23) # => 2000-01-01 23:00:00 UTC + * Time.utc(2000, 1, 1, 24) # => 2000-01-02 00:00:00 UTC + * + * - +min+: Minute in range (0..59): + * + * Time.utc(2000, 1, 1, 0, 0) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 1, 0, 59) # => 2000-01-01 00:59:00 UTC + * + * - +sec+: Second in range (0..59), or 60 if +usec+ is zero: + * + * Time.utc(2000, 1, 1, 0, 0, 0) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 1, 0, 0, 59) # => 2000-01-01 00:00:59 UTC + * Time.utc(2000, 1, 1, 0, 0, 60) # => 2000-01-01 00:01:00 UTC + * + * - +usec+: Microsecond in range (0..999999): + * + * Time.utc(2000, 1, 1, 0, 0, 0, 0) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 1, 0, 0, 0, 999999) # => 2000-01-01 00:00:00.999999 UTC + * + * The values may be: + * + * - Integers, as above. + * - Numerics convertible to integers: + * + * Time.utc(Float(0.0), Rational(1, 1), 1.0, 0.0, 0.0, 0.0, 0.0) + * # => 0000-01-01 00:00:00 UTC + * + * - \String integers: + * + * a = %w[0 1 1 0 0 0 0 0] + * # => ["0", "1", "1", "0", "0", "0", "0", "0"] + * Time.utc(*a) # => 0000-01-01 00:00:00 UTC + * + * When exactly ten arguments are given, + * the arguments are interpreted as in the second calling sequence above: + * + * Time.utc(sec, min, hour, mday, month, year, dummy, dummy, dummy, dummy) + * + * where the +dummy+ arguments are ignored: + * + * a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + * # => [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + * Time.utc(*a) # => 0005-04-03 02:01:00 UTC + * + * This form is useful for creating a \Time object from a 10-element + * array returned by Time.to_a: + * + * t = Time.new(2000, 1, 2, 3, 4, 5, 6) # => 2000-01-02 03:04:05 +000006 + * a = t.to_a # => [5, 4, 3, 2, 1, 2000, 0, 2, false, nil] + * Time.utc(*a) # => 2000-01-02 03:04:05 UTC + * + * The two forms have their first six arguments in common, + * though in different orders; + * the ranges of these common arguments are the same for both forms; see above. + * + * Raises an exception if the number of arguments is eight, nine, + * or greater than ten. * * Time.gm is an alias for Time.utc. * @@ -3391,36 +3459,19 @@ time_s_mkutc(int argc, VALUE *argv, VALUE klass) /* * call-seq: - * Time.local(year, month = 1, day = 1, hour = 0, min = 0, sec_i = 0, usec = 0) -> new_time - * Time.local(sec, min, hour, day, month, year, dummy, dummy, dummy, dummy) -> new_time - * - * Returns a new \Time object based the on given arguments; - * its timezone is the local timezone. - * - * In the first form (up to seven arguments), argument +year+ is required. - * - * Time.local(2000) # => 2000-01-01 00:00:00 -0600 - * Time.local(0, 1, 2, 3, 4, 5, 6.5) # => 0000-01-02 03:04:05.0000065 -0600 - * - * In the second form, all ten arguments are required, - * though the last four are ignored. - * This form is useful for creating a time from a 10-element array - * such as those returned by #to_a. - * - * array = Time.now.to_a - * # => [57, 18, 10, 7, 7, 2022, 4, 188, true, "Central Daylight Time"] - * array[5] = 2000 - * Time.local(*array) # => 2000-07-07 10:18:57 -0500 + * Time.local(year, month = 1, mday = 1, hour = 0, min = 0, sec = 0, usec = 0) -> new_time + * Time.local(sec, min, hour, mday, month, year, dummy, dummy, dummy, dummy) -> new_time * - * Parameters: - * :include: doc/time/year.rdoc - * :include: doc/time/mon-min.rdoc - * :include: doc/time/sec_i.rdoc - * :include: doc/time/usec.rdoc + * Like Time.utc, except that the returned \Time object + * has the local timezone, not the UTC timezone: * - * Time.mktime is an alias for Time.local. + * # With seven arguments. + * Time.local(0, 1, 2, 3, 4, 5, 6) + * # => 0000-01-02 03:04:05.000006 -0600 + * # With exactly ten arguments. + * Time.local(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + * # => 0005-04-03 02:01:00 -0600 * - * Related: Time.utc. */ static VALUE diff --git a/timev.rb b/timev.rb index f4777358753024..ad97d63b5514ac 100644 --- a/timev.rb +++ b/timev.rb @@ -223,48 +223,58 @@ def self.now(in: nil) Primitive.time_s_now(Primitive.arg!(:in)) end - # _Time_ + # Returns a new \Time object based on the given arguments. + # + # Required argument +time+ may be either of: + # + # - A \Time object, whose value is the basis for the returned time; + # also influenced by optional keyword argument +in:+ (see below). + # - A numeric number of seconds (since the epoch) for the returned time. + # + # Examples: + # + # t = Time.new(2000, 12, 31, 23, 59, 59) # => 2000-12-31 23:59:59 -0600 + # secs = t.to_i # => 978328799 + # Time.at(secs) # => 2000-12-31 23:59:59 -0600 + # Time.at(secs + 0.5) # => 2000-12-31 23:59:59.5 -0600 + # Time.at(1000000000) # => 2001-09-08 20:46:40 -0500 + # Time.at(0) # => 1969-12-31 18:00:00 -0600 + # Time.at(-1000000000) # => 1938-04-24 17:13:20 -0500 + # + # Optional numeric argument +subsec+ and optional symbol argument +units+ + # work together to specify subseconds for the returned time; + # argument +units+ specifies the units for +subsec+: + # + # - +:millisecond+: +subsec+ in milliseconds: # - # This form accepts a \Time object +time+ - # and optional keyword argument +in+: + # Time.at(secs, 0, :millisecond) # => 2000-12-31 23:59:59 -0600 + # Time.at(secs, 500, :millisecond) # => 2000-12-31 23:59:59.5 -0600 + # Time.at(secs, 1000, :millisecond) # => 2001-01-01 00:00:00 -0600 + # Time.at(secs, -1000, :millisecond) # => 2000-12-31 23:59:58 -0600 # - # Time.at(Time.new) # => 2021-04-26 08:52:31.6023486 -0500 - # Time.at(Time.new, in: '+09:00') # => 2021-04-26 22:52:31.6023486 +0900 + # - +:microsecond+ or +:usec+: +subsec+ in microseconds: # - # _Seconds_ + # Time.at(secs, 0, :microsecond) # => 2000-12-31 23:59:59 -0600 + # Time.at(secs, 500000, :microsecond) # => 2000-12-31 23:59:59.5 -0600 + # Time.at(secs, 1000000, :microsecond) # => 2001-01-01 00:00:00 -0600 + # Time.at(secs, -1000000, :microsecond) # => 2000-12-31 23:59:58 -0600 # - # This form accepts a numeric number of seconds +sec+ - # and optional keyword argument +in+: + # - +:nsec+ or +:nanosecond+: +subsec+ in nanoseconds: # - # Time.at(946702800) # => 1999-12-31 23:00:00 -0600 - # Time.at(946702800, in: '+09:00') # => 2000-01-01 14:00:00 +0900 + # Time.at(secs, 0, :nanosecond) # => 2000-12-31 23:59:59 -0600 + # Time.at(secs, 500000000, :nanosecond) # => 2000-12-31 23:59:59.5 -0600 + # Time.at(secs, 1000000000, :nanosecond) # => 2001-01-01 00:00:00 -0600 + # Time.at(secs, -1000000000, :nanosecond) # => 2000-12-31 23:59:58 -0600 # - # Seconds with Subseconds and Units # - # This form accepts an integer number of seconds +sec_i+, - # a numeric number of milliseconds +msec+, - # a symbol argument for the subsecond unit type (defaulting to :usec), - # and an optional keyword argument +in+: + # Optional keyword argument +in: zone specifies the timezone + # for the returned time: # - # Time.at(946702800, 500, :millisecond) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500, :millisecond, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000, :usec) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000, :microsecond) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000, :usec, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000, :microsecond, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000000, :nsec) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000000, :nanosecond) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000000, :nsec, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000000, :nanosecond, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 + # Time.at(secs, in: '+12:00') # => 2001-01-01 17:59:59 +1200 + # Time.at(secs, in: '-12:00') # => 2000-12-31 17:59:59 -1200 # - # Parameters: - # :include: doc/time/sec_i.rdoc - # :include: doc/time/msec.rdoc - # :include: doc/time/usec.rdoc - # :include: doc/time/nsec.rdoc - # :include: doc/time/in.rdoc + # For the forms of argument +zone+, see + # {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. # def self.at(time, subsec = false, unit = :microsecond, in: nil) if Primitive.mandatory_only? @@ -274,24 +284,81 @@ def self.at(time, subsec = false, unit = :microsecond, in: nil) end end - # Returns a new \Time object based on the given arguments. + # Returns a new \Time object based on the given arguments, + # by default in the local timezone. # # With no positional arguments, returns the value of Time.now: # - # Time.new # => 2021-04-24 17:27:46.0512465 -0500 + # Time.new # => 2021-04-24 17:27:46.0512465 -0500 + # + # With one to six arguments, returns a new \Time object + # based on the given arguments, in the local timezone. + # + # Time.new(2000, 1, 2, 3, 4, 5) # => 2000-01-02 03:04:05 -0600 + # + # For the positional arguments (other than +zone+): + # + # - +year+: Year, with no range limits: + # + # Time.new(999999999) # => 999999999-01-01 00:00:00 -0600 + # Time.new(-999999999) # => -999999999-01-01 00:00:00 -0600 + # + # - +month+: Month in range (1..12), or case-insensitive + # 3-letter month name: + # + # Time.new(2000, 1) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 12) # => 2000-12-01 00:00:00 -0600 + # Time.new(2000, 'jan') # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 'JAN') # => 2000-01-01 00:00:00 -0600 + # + # - +mday+: Month day in range(1..31): + # + # Time.new(2000, 1, 1) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 1, 31) # => 2000-01-31 00:00:00 -0600 + # + # - +hour+: Hour in range (0..23), or 24 if +min+, +sec+, and +usec+ + # are zero: + # + # Time.new(2000, 1, 1, 0) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 1, 1, 23) # => 2000-01-01 23:00:00 -0600 + # Time.new(2000, 1, 1, 24) # => 2000-01-02 00:00:00 -0600 + # + # - +min+: Minute in range (0..59): + # + # Time.new(2000, 1, 1, 0, 0) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 1, 1, 0, 59) # => 2000-01-01 00:59:00 -0600 + # + # - +sec+: Second in range (0..59), or 60 if +usec+ is zero: + # + # Time.new(2000, 1, 1, 0, 0, 0) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 1, 1, 0, 0, 59) # => 2000-01-01 00:00:59 -0600 + # Time.new(2000, 1, 1, 0, 0, 60) # => 2000-01-01 00:01:00 -0600 + # + # These values may be: + # + # - Integers, as above. + # - Numerics convertible to integers: + # + # Time.new(Float(0.0), Rational(1, 1), 1.0, 0.0, 0.0, 0.0) + # # => 0000-01-01 00:00:00 -0600 # - # Otherwise, returns a new \Time object based on the given parameters: + # - \String integers: # - # Time.new(2000) # => 2000-01-01 00:00:00 -0600 - # Time.new(2000, 12, 31, 23, 59, 59.5) # => 2000-12-31 23:59:59.5 -0600 - # Time.new(2000, 12, 31, 23, 59, 59.5, '+09:00') # => 2000-12-31 23:59:59.5 +0900 + # a = %w[0 1 1 0 0 0] + # # => ["0", "1", "1", "0", "0", "0"] + # Time.new(*a) # => 0000-01-01 00:00:00 -0600 # - # Parameters: + # When positional argument +zone+ or keyword argument +in:+ is given, + # the new \Time object is in the specified timezone. + # For the forms of argument +zone+, see + # {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]: # - # :include: doc/time/year.rdoc - # :include: doc/time/mon-min.rdoc - # :include: doc/time/sec.rdoc - # :include: doc/time/zone_and_in.rdoc + # Time.new(2000, 1, 1, 0, 0, 0, '+12:00') + # # => 2000-01-01 00:00:00 +1200 + # Time.new(2000, 1, 1, 0, 0, 0, in: '-12:00') + # # => 2000-01-01 00:00:00 -1200 + # Time.new(in: '-12:00') + # # => 2022-08-23 08:49:26.1941467 -1200 # def initialize(year = (now = true), mon = nil, mday = nil, hour = nil, min = nil, sec = nil, zone = nil, in: nil) if zone From 52560a92631e7b327365383e7ce83c80fcfafc05 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 25 Aug 2022 11:07:23 -0700 Subject: [PATCH 248/546] Skip flaky mkmf tests on AppVeyor It seems like TestMkmfTryConstant started to randomly fail, maybe related to 96562a517d3373466ec306b5f821a41f4758d2a6 or 073f3b7e0ad94657c04573983affb9d66e6bff2c. Some of them seem to have failed even after retries, so it feels like there's a bug in the implementation, which leaves an unrecoverable situation. https://ci.appveyor.com/project/ruby/ruby/builds/44559958/job/7uub5bmkvy4pwwl8 https://ci.appveyor.com/project/ruby/ruby/builds/44579924/job/n81xmb2mqs6no7dm https://ci.appveyor.com/project/ruby/ruby/builds/44558471/job/s4qwymmlxmfjjd35 Until we figure out what it is, I'd like to stabilize the CI by not running it. --- test/mkmf/test_constant.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/mkmf/test_constant.rb b/test/mkmf/test_constant.rb index f6834c7f284f98..e12250d0a22834 100644 --- a/test/mkmf/test_constant.rb +++ b/test/mkmf/test_constant.rb @@ -2,6 +2,12 @@ require_relative 'base' class TestMkmfTryConstant < TestMkmf + def setup + if ENV.key?('APPVEYOR') + omit 'This test fails too often on AppVeyor' + end + end + def test_simple assert_equal( 0, mkmf {try_constant("0")}, MKMFLOG) assert_equal( 1, mkmf {try_constant("1")}, MKMFLOG) From af9fa16c96284f61b595a8f994926533fd51fd3b Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 25 Aug 2022 11:31:16 -0700 Subject: [PATCH 249/546] Make sure super is called and fix teardown --- test/mkmf/base.rb | 1 + test/mkmf/test_constant.rb | 2 ++ 2 files changed, 3 insertions(+) diff --git a/test/mkmf/base.rb b/test/mkmf/base.rb index e097c396d6e018..ec42bca100b935 100644 --- a/test/mkmf/base.rb +++ b/test/mkmf/base.rb @@ -106,6 +106,7 @@ def setup end def teardown + return if @omitted rbconfig0 = @rbconfig mkconfig0 = @mkconfig RbConfig.module_eval { diff --git a/test/mkmf/test_constant.rb b/test/mkmf/test_constant.rb index e12250d0a22834..f22b82ff950b4f 100644 --- a/test/mkmf/test_constant.rb +++ b/test/mkmf/test_constant.rb @@ -4,8 +4,10 @@ class TestMkmfTryConstant < TestMkmf def setup if ENV.key?('APPVEYOR') + @omitted = true omit 'This test fails too often on AppVeyor' end + super end def test_simple From 881bc2a1765e7c19ab389c53841adc5ac329f1aa Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 25 Aug 2022 11:36:04 -0700 Subject: [PATCH 250/546] Skip running a DRb test on MinGW It hangs even after a retry https://github.com/ruby/ruby/runs/7966439530?check_suite_focus=true We contacted GitHub Suppport about this before, and we concluded that the problem is on our end. Unfortunately we don't have a bandwidth to fix this MinGW problem, so until we get to work on it, this should be just skipped to avoid a sporadic CI timeout. --- test/drb/drbtest.rb | 1 + test/drb/test_drbssl.rb | 4 ++++ test/rinda/test_rinda.rb | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/test/drb/drbtest.rb b/test/drb/drbtest.rb index 3c33aedb6f1763..56d73eb5097f6b 100644 --- a/test/drb/drbtest.rb +++ b/test/drb/drbtest.rb @@ -90,6 +90,7 @@ def setup_service(service_name) end def teardown + return if @omitted @ext.stop_service if defined?(@ext) && @ext if defined?(@service_name) && @service_name @drb_service.manager.unregist(@service_name) diff --git a/test/drb/test_drbssl.rb b/test/drb/test_drbssl.rb index 0254c7ab50d8bc..4369c6614b9796 100644 --- a/test/drb/test_drbssl.rb +++ b/test/drb/test_drbssl.rb @@ -41,6 +41,10 @@ def start class TestDRbSSLCore < Test::Unit::TestCase include DRbCore def setup + if RUBY_PLATFORM.match?(/mingw/) + @omitted = true + omit 'This test seems to randomly hang on GitHub Actions MinGW' + end @drb_service = DRbSSLService.new super setup_service 'ut_drb_drbssl.rb' diff --git a/test/rinda/test_rinda.rb b/test/rinda/test_rinda.rb index dbe414b783fefd..74d8d363b4b705 100644 --- a/test/rinda/test_rinda.rb +++ b/test/rinda/test_rinda.rb @@ -498,7 +498,7 @@ class TupleSpaceProxyTest < Test::Unit::TestCase def setup if RUBY_PLATFORM.match?(/mingw/) @omitted = true - omit 'This test seems to randomly hang on GitHub Actions MinGW UCRT64' + omit 'This test seems to randomly hang on GitHub Actions MinGW' end super ThreadGroup.new.add(Thread.current) From 21cac42385e1a116d287e155e461453b830640d2 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Thu, 26 May 2022 10:37:01 -0700 Subject: [PATCH 251/546] Raise ArgumentError for IO.foreach with limit of 0 Makes behavior consistent with IO.readlines. Fixes [Bug #18767] --- io.c | 2 ++ test/ruby/test_io.rb | 2 ++ 2 files changed, 4 insertions(+) diff --git a/io.c b/io.c index f2c64989df75f8..1eb48dd19b22d0 100644 --- a/io.c +++ b/io.c @@ -11518,6 +11518,8 @@ io_s_foreach(VALUE v) struct getline_arg *arg = (void *)v; VALUE str; + if (arg->limit == 0) + rb_raise(rb_eArgError, "invalid limit: 0 for foreach"); while (!NIL_P(str = rb_io_getline_1(arg->rs, arg->limit, arg->chomp, arg->io))) { rb_lastline_set(str); rb_yield(str); diff --git a/test/ruby/test_io.rb b/test/ruby/test_io.rb index 6a3d7594cf04e9..d6fcf16ddd1ee2 100644 --- a/test/ruby/test_io.rb +++ b/test/ruby/test_io.rb @@ -2602,6 +2602,8 @@ def test_foreach bug = '[ruby-dev:31525]' assert_raise(ArgumentError, bug) {IO.foreach} + assert_raise(ArgumentError, "[Bug #18767] [ruby-core:108499]") {IO.foreach(__FILE__, 0){}} + a = nil assert_nothing_raised(ArgumentError, bug) {a = IO.foreach(t.path).to_a} assert_equal(["foo\n", "bar\n", "baz\n"], a, bug) From 3504be1bc13235407e01f55d3df6fe0b4cb5ba9e Mon Sep 17 00:00:00 2001 From: Jun Aruga Date: Thu, 25 Aug 2022 20:11:34 +0200 Subject: [PATCH 252/546] [ruby/irb] Require RDoc in `input-method.rb` again in a limited scope. RDoc is implemented as soft dependency in IRB. See how the rdoc is required in the files. I reverted the commit below. ``` $ grep -ril rdoc lib/ lib/irb/cmd/help.rb lib/irb/completion.rb lib/irb/easter-egg.rb lib/irb/input-method.rb ``` --- Revert "Remove `require` in signal handler to avoid ThreadError" This reverts commit https://github.com/ruby/irb/commit/5f749c613c89. https://github.com/ruby/irb/commit/b24852058f --- lib/irb/input-method.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/irb/input-method.rb b/lib/irb/input-method.rb index a8227caa9c32dd..b77fd3207def87 100644 --- a/lib/irb/input-method.rb +++ b/lib/irb/input-method.rb @@ -320,6 +320,11 @@ def auto_indent(&block) [195, 164], # The "ä" that appears when Alt+d is pressed on xterm. [226, 136, 130] # The "∂" that appears when Alt+d in pressed on iTerm2. ] + begin + require 'rdoc' + rescue LoadError + return nil + end if just_cursor_moving and completion_journey_data.nil? return nil From 0d9f4ea0d45f6577a4a13f898e981958a1f039c6 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 19 Aug 2022 15:37:45 +0900 Subject: [PATCH 253/546] Import spec examples from ruby/syntax_suggest --- .../fixtures/derailed_require_tree.rb.txt | 74 + spec/syntax_suggest/fixtures/rexe.rb.txt | 569 + spec/syntax_suggest/fixtures/routes.rb.txt | 121 + .../fixtures/ruby_buildpack.rb.txt | 1344 +++ .../fixtures/syntax_tree.rb.txt | 9234 +++++++++++++++++ .../fixtures/this_project_extra_def.rb.txt | 64 + spec/syntax_suggest/fixtures/webmock.rb.txt | 35 + .../integration/exe_cli_spec.rb | 22 + .../integration/ruby_command_line_spec.rb | 151 + .../integration/syntax_suggest_spec.rb | 211 + spec/syntax_suggest/spec_helper.rb | 90 + spec/syntax_suggest/unit/api_spec.rb | 83 + .../unit/around_block_scan_spec.rb | 165 + spec/syntax_suggest/unit/block_expand_spec.rb | 200 + .../unit/capture_code_context_spec.rb | 202 + .../unit/clean_document_spec.rb | 259 + spec/syntax_suggest/unit/cli_spec.rb | 224 + spec/syntax_suggest/unit/code_block_spec.rb | 77 + .../syntax_suggest/unit/code_frontier_spec.rb | 135 + spec/syntax_suggest/unit/code_line_spec.rb | 164 + spec/syntax_suggest/unit/code_search_spec.rb | 505 + .../unit/display_invalid_blocks_spec.rb | 172 + .../unit/explain_syntax_spec.rb | 255 + spec/syntax_suggest/unit/lex_all_spec.rb | 29 + .../unit/pathname_from_message_spec.rb | 56 + .../unit/priority_queue_spec.rb | 95 + 26 files changed, 14536 insertions(+) create mode 100644 spec/syntax_suggest/fixtures/derailed_require_tree.rb.txt create mode 100755 spec/syntax_suggest/fixtures/rexe.rb.txt create mode 100644 spec/syntax_suggest/fixtures/routes.rb.txt create mode 100644 spec/syntax_suggest/fixtures/ruby_buildpack.rb.txt create mode 100644 spec/syntax_suggest/fixtures/syntax_tree.rb.txt create mode 100644 spec/syntax_suggest/fixtures/this_project_extra_def.rb.txt create mode 100644 spec/syntax_suggest/fixtures/webmock.rb.txt create mode 100644 spec/syntax_suggest/integration/exe_cli_spec.rb create mode 100644 spec/syntax_suggest/integration/ruby_command_line_spec.rb create mode 100644 spec/syntax_suggest/integration/syntax_suggest_spec.rb create mode 100644 spec/syntax_suggest/spec_helper.rb create mode 100644 spec/syntax_suggest/unit/api_spec.rb create mode 100644 spec/syntax_suggest/unit/around_block_scan_spec.rb create mode 100644 spec/syntax_suggest/unit/block_expand_spec.rb create mode 100644 spec/syntax_suggest/unit/capture_code_context_spec.rb create mode 100644 spec/syntax_suggest/unit/clean_document_spec.rb create mode 100644 spec/syntax_suggest/unit/cli_spec.rb create mode 100644 spec/syntax_suggest/unit/code_block_spec.rb create mode 100644 spec/syntax_suggest/unit/code_frontier_spec.rb create mode 100644 spec/syntax_suggest/unit/code_line_spec.rb create mode 100644 spec/syntax_suggest/unit/code_search_spec.rb create mode 100644 spec/syntax_suggest/unit/display_invalid_blocks_spec.rb create mode 100644 spec/syntax_suggest/unit/explain_syntax_spec.rb create mode 100644 spec/syntax_suggest/unit/lex_all_spec.rb create mode 100644 spec/syntax_suggest/unit/pathname_from_message_spec.rb create mode 100644 spec/syntax_suggest/unit/priority_queue_spec.rb diff --git a/spec/syntax_suggest/fixtures/derailed_require_tree.rb.txt b/spec/syntax_suggest/fixtures/derailed_require_tree.rb.txt new file mode 100644 index 00000000000000..668ac4010ba2f8 --- /dev/null +++ b/spec/syntax_suggest/fixtures/derailed_require_tree.rb.txt @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +# Tree structure used to store and sort require memory costs +# RequireTree.new('get_process_mem') +module DerailedBenchmarks + class RequireTree + REQUIRED_BY = {} + + attr_reader :name + attr_writer :cost + attr_accessor :parent + + def initialize(name) + @name = name + @children = {} + @cost = 0 + + def self.reset! + REQUIRED_BY.clear + if defined?(Kernel::REQUIRE_STACK) + Kernel::REQUIRE_STACK.clear + + Kernel::REQUIRE_STACK.push(TOP_REQUIRE) + end + end + + def <<(tree) + @children[tree.name.to_s] = tree + tree.parent = self + (REQUIRED_BY[tree.name.to_s] ||= []) << self.name + end + + def [](name) + @children[name.to_s] + end + + # Returns array of child nodes + def children + @children.values + end + + def cost + @cost || 0 + end + + # Returns sorted array of child nodes from Largest to Smallest + def sorted_children + children.sort { |c1, c2| c2.cost <=> c1.cost } + end + + def to_string + str = String.new("#{name}: #{cost.round(4)} MiB") + if parent && REQUIRED_BY[self.name.to_s] + names = REQUIRED_BY[self.name.to_s].uniq - [parent.name.to_s] + if names.any? + str << " (Also required by: #{ names.first(2).join(", ") }" + str << ", and #{names.count - 2} others" if names.count > 3 + str << ")" + end + end + str + end + + # Recursively prints all child nodes + def print_sorted_children(level = 0, out = STDOUT) + return if cost < ENV['CUT_OFF'].to_f + out.puts " " * level + self.to_string + level += 1 + sorted_children.each do |child| + child.print_sorted_children(level, out) + end + end + end +end diff --git a/spec/syntax_suggest/fixtures/rexe.rb.txt b/spec/syntax_suggest/fixtures/rexe.rb.txt new file mode 100755 index 00000000000000..92e44d4d1ea4bf --- /dev/null +++ b/spec/syntax_suggest/fixtures/rexe.rb.txt @@ -0,0 +1,569 @@ +#!/usr/bin/env ruby +# +# rexe - Ruby Command Line Executor Filter +# +# Inspired by https://github.com/thisredone/rb + +# frozen_string_literal: true + + +require 'bundler' +require 'date' +require 'optparse' +require 'ostruct' +require 'shellwords' + +class Rexe + + VERSION = '1.5.1' + + PROJECT_URL = 'https://github.com/keithrbennett/rexe' + + + module Helpers + + # Try executing code. If error raised, print message (but not stack trace) & exit -1. + def try + begin + yield + rescue Exception => e + unless e.class == SystemExit + $stderr.puts("rexe: #{e}") + $stderr.puts("Use the -h option to get help.") + exit(-1) + end + end + end + end + + + class Options < Struct.new( + :input_filespec, + :input_format, + :input_mode, + :loads, + :output_format, + :output_format_tty, + :output_format_block, + :requires, + :log_format, + :noop) + + + def initialize + super + clear + end + + + def clear + self.input_filespec = nil + self.input_format = :none + self.input_mode = :none + self.output_format = :none + self.output_format_tty = :none + self.output_format_block = :none + self.loads = [] + self.requires = [] + self.log_format = :none + self.noop = false + end + end + + + + + + class Lookups + def input_modes + @input_modes ||= { + 'l' => :line, + 'e' => :enumerator, + 'b' => :one_big_string, + 'n' => :none + } + end + + + def input_formats + @input_formats ||= { + 'j' => :json, + 'm' => :marshal, + 'n' => :none, + 'y' => :yaml, + } + end + + + def input_parsers + @input_parsers ||= { + json: ->(string) { JSON.parse(string) }, + marshal: ->(string) { Marshal.load(string) }, + none: ->(string) { string }, + yaml: ->(string) { YAML.load(string) }, + } + end + + + def output_formats + @output_formats ||= { + 'a' => :amazing_print, + 'i' => :inspect, + 'j' => :json, + 'J' => :pretty_json, + 'm' => :marshal, + 'n' => :none, + 'p' => :puts, # default + 'P' => :pretty_print, + 's' => :to_s, + 'y' => :yaml, + } + end + + + def formatters + @formatters ||= { + amazing_print: ->(obj) { obj.ai + "\n" }, + inspect: ->(obj) { obj.inspect + "\n" }, + json: ->(obj) { obj.to_json }, + marshal: ->(obj) { Marshal.dump(obj) }, + none: ->(_obj) { nil }, + pretty_json: ->(obj) { JSON.pretty_generate(obj) }, + pretty_print: ->(obj) { obj.pretty_inspect }, + puts: ->(obj) { require 'stringio'; sio = StringIO.new; sio.puts(obj); sio.string }, + to_s: ->(obj) { obj.to_s + "\n" }, + yaml: ->(obj) { obj.to_yaml }, + } + end + + + def format_requires + @format_requires ||= { + json: 'json', + pretty_json: 'json', + amazing_print: 'amazing_print', + pretty_print: 'pp', + yaml: 'yaml' + } + end + end + + + + class CommandLineParser + + include Helpers + + attr_reader :lookups, :options + + def initialize + @lookups = Lookups.new + @options = Options.new + end + + + # Inserts contents of REXE_OPTIONS environment variable at the beginning of ARGV. + private def prepend_environment_options + env_opt_string = ENV['REXE_OPTIONS'] + if env_opt_string + args_to_prepend = Shellwords.shellsplit(env_opt_string) + ARGV.unshift(args_to_prepend).flatten! + end + end + + + private def add_format_requires_to_requires_list + formats = [options.input_format, options.output_format, options.log_format] + requires = formats.map { |format| lookups.format_requires[format] }.uniq.compact + requires.each { |r| options.requires << r } + end + + + private def help_text + unless @help_text + @help_text ||= <<~HEREDOC + + rexe -- Ruby Command Line Executor/Filter -- v#{VERSION} -- #{PROJECT_URL} + + Executes Ruby code on the command line, + optionally automating management of standard input and standard output, + and optionally parsing input and formatting output with YAML, JSON, etc. + + rexe [options] [Ruby source code] + + Options: + + -c --clear_options Clear all previous command line options specified up to now + -f --input_file Use this file instead of stdin for preprocessed input; + if filespec has a YAML and JSON file extension, + sets input format accordingly and sets input mode to -mb + -g --log_format FORMAT Log format, logs to stderr, defaults to -gn (none) + (see -o for format options) + -h, --help Print help and exit + -i, --input_format FORMAT Input format, defaults to -in (None) + -ij JSON + -im Marshal + -in None (default) + -iy YAML + -l, --load RUBY_FILE(S) Ruby file(s) to load, comma separated; + ! to clear all, or precede a name with '-' to remove + -m, --input_mode MODE Input preprocessing mode (determines what `self` will be) + defaults to -mn (none) + -ml line; each line is ingested as a separate string + -me enumerator (each_line on STDIN or File) + -mb big string; all lines combined into one string + -mn none (default); no input preprocessing; + self is an Object.new + -n, --[no-]noop Do not execute the code (useful with -g); + For true: yes, true, y, +; for false: no, false, n + -o, --output_format FORMAT Output format, defaults to -on (no output): + -oa Amazing Print + -oi Inspect + -oj JSON + -oJ Pretty JSON + -om Marshal + -on No Output (default) + -op Puts + -oP Pretty Print + -os to_s + -oy YAML + If 2 letters are provided, 1st is for tty devices, 2nd for block + --project-url Outputs project URL on Github, then exits + -r, --require REQUIRE(S) Gems and built-in libraries to require, comma separated; + ! to clear all, or precede a name with '-' to remove + -v, --version Prints version and exits + + --------------------------------------------------------------------------------------- + + In many cases you will need to enclose your source code in single or double quotes. + + If source code is not specified, it will default to 'self', + which is most likely useful only in a filter mode (-ml, -me, -mb). + + If there is a .rexerc file in your home directory, it will be run as Ruby code + before processing the input. + + If there is a REXE_OPTIONS environment variable, its content will be prepended + to the command line so that you can specify options implicitly + (e.g. `export REXE_OPTIONS="-r amazing_print,yaml"`) + + HEREDOC + + @help_text.freeze + end + + @help_text + end + + + # File file input mode; detects the input mode (JSON, YAML, or None) from the extension. + private def autodetect_file_format(filespec) + extension = File.extname(filespec).downcase + if extension == '.json' + :json + elsif extension == '.yml' || extension == '.yaml' + :yaml + else + :none + end + end + + + private def open_resource(resource_identifier) + command = case (`uname`.chomp) + when 'Darwin' + 'open' + when 'Linux' + 'xdg-open' + else + 'start' + end + + `#{command} #{resource_identifier}` + end + + + # Using 'optparse', parses the command line. + # Settings go into this instance's properties (see Struct declaration). + def parse + + prepend_environment_options + + OptionParser.new do |parser| + + parser.on('-c', '--clear_options', "Clear all previous command line options") do |v| + options.clear + end + + parser.on('-f', '--input_file FILESPEC', + 'Use this file instead of stdin; autodetects YAML and JSON file extensions') do |v| + unless File.exist?(v) + raise "File #{v} does not exist." + end + options.input_filespec = v + options.input_format = autodetect_file_format(v) + if [:json, :yaml].include?(options.input_format) + options.input_mode = :one_big_string + end + end + + parser.on('-g', '--log_format FORMAT', 'Log format, logs to stderr, defaults to none (see -o for format options)') do |v| + options.log_format = lookups.output_formats[v] + if options.log_format.nil? + raise("Output mode was '#{v}' but must be one of #{lookups.output_formats.keys}.") + end + end + + parser.on("-h", "--help", "Show help") do |_help_requested| + puts help_text + exit + end + + parser.on('-i', '--input_format FORMAT', + 'Mode with which to parse input values (n = none (default), j = JSON, m = Marshal, y = YAML') do |v| + + options.input_format = lookups.input_formats[v] + if options.input_format.nil? + raise("Input mode was '#{v}' but must be one of #{lookups.input_formats.keys}.") + end + end + + parser.on('-l', '--load RUBY_FILE(S)', 'Ruby file(s) to load, comma separated, or ! to clear') do |v| + if v == '!' + options.loads.clear + else + loadfiles = v.split(',').map(&:strip).map { |s| File.expand_path(s) } + removes, adds = loadfiles.partition { |filespec| filespec[0] == '-' } + + existent, nonexistent = adds.partition { |filespec| File.exists?(filespec) } + if nonexistent.any? + raise("\nDid not find the following files to load: #{nonexistent}\n\n") + else + existent.each { |filespec| options.loads << filespec } + end + + removes.each { |filespec| options.loads -= [filespec[1..-1]] } + end + end + + parser.on('-m', '--input_mode MODE', + 'Mode with which to handle input (-ml, -me, -mb, -mn (default)') do |v| + + options.input_mode = lookups.input_modes[v] + if options.input_mode.nil? + raise("Input mode was '#{v}' but must be one of #{lookups.input_modes.keys}.") + end + end + + # See https://stackoverflow.com/questions/54576873/ruby-optionparser-short-code-for-boolean-option + # for an excellent explanation of this optparse incantation. + # According to the answer, valid options are: + # -n no, -n yes, -n false, -n true, -n n, -n y, -n +, but not -n -. + parser.on('-n', '--[no-]noop [FLAG]', TrueClass, "Do not execute the code (useful with -g)") do |v| + options.noop = (v.nil? ? true : v) + end + + parser.on('-o', '--output_format FORMAT', + 'Mode with which to format values for output (`-o` + [aijJmnpsy])') do |v| + options.output_format_tty = lookups.output_formats[v[0]] + options.output_format_block = lookups.output_formats[v[-1]] + options.output_format = ($stdout.tty? ? options.output_format_tty : options.output_format_block) + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '#{v}'; each must be one of #{lookups.output_formats.keys}.") + end + end + + parser.on('-r', '--require REQUIRE(S)', + 'Gems and built-in libraries (e.g. shellwords, yaml) to require, comma separated, or ! to clear') do |v| + if v == '!' + options.requires.clear + else + v.split(',').map(&:strip).each do |r| + if r[0] == '-' + options.requires -= [r[1..-1]] + else + options.requires << r + end + end + end + end + + parser.on('-v', '--version', 'Print version') do + puts VERSION + exit(0) + end + + # Undocumented feature: open Github project with default web browser on a Mac + parser.on('', '--open-project') do + open_resource(PROJECT_URL) + exit(0) + end + + parser.on('', '--project-url') do + puts PROJECT_URL + exit(0) + end + + end.parse! + + # We want to do this after all options have been processed because we don't want any clearing of the + # options (by '-c', etc.) to result in exclusion of these needed requires. + add_format_requires_to_requires_list + + options.requires = options.requires.sort.uniq + options.loads.uniq! + + options + + end + end + + + class Main + + include Helpers + + attr_reader :callable, :input_parser, :lookups, + :options, :output_formatter, + :log_formatter, :start_time, :user_source_code + + + def initialize + @lookups = Lookups.new + @start_time = DateTime.now + end + + + private def load_global_config_if_exists + filespec = File.join(Dir.home, '.rexerc') + load(filespec) if File.exists?(filespec) + end + + + private def init_parser_and_formatters + @input_parser = lookups.input_parsers[options.input_format] + @output_formatter = lookups.formatters[options.output_format] + @log_formatter = lookups.formatters[options.log_format] + end + + + # Executes the user specified code in the manner appropriate to the input mode. + # Performs any optionally specified parsing on input and formatting on output. + private def execute(eval_context_object, code) + if options.input_format != :none && options.input_mode != :none + eval_context_object = input_parser.(eval_context_object) + end + + value = eval_context_object.instance_eval(&code) + + unless options.output_format == :none + print output_formatter.(value) + end + rescue Errno::EPIPE + exit(-13) + end + + + # The global $RC (Rexe Context) OpenStruct is available in your user code. + # In order to make it possible to access this object in your loaded files, we are not creating + # it here; instead we add properties to it. This way, you can initialize an OpenStruct yourself + # in your loaded code and it will still work. If you do that, beware, any properties you add will be + # included in the log output. If the to_s of your added objects is large, that might be a pain. + private def init_rexe_context + $RC ||= OpenStruct.new + $RC.count = 0 + $RC.rexe_version = VERSION + $RC.start_time = start_time.iso8601 + $RC.source_code = user_source_code + $RC.options = options.to_h + + def $RC.i; count end # `i` aliases `count` so you can more concisely get the count in your user code + end + + + private def create_callable + eval("Proc.new { #{user_source_code} }") + end + + + private def lookup_action(mode) + input = options.input_filespec ? File.open(options.input_filespec) : STDIN + { + line: -> { input.each { |l| execute(l.chomp, callable); $RC.count += 1 } }, + enumerator: -> { execute(input.each_line, callable); $RC.count += 1 }, + one_big_string: -> { big_string = input.read; execute(big_string, callable); $RC.count += 1 }, + none: -> { execute(Object.new, callable) } + }.fetch(mode) + end + + + private def output_log_entry + if options.log_format != :none + $RC.duration_secs = Time.now - start_time.to_time + STDERR.puts(log_formatter.($RC.to_h)) + end + end + + + # Bypasses Bundler's restriction on loading gems + # (see https://stackoverflow.com/questions/55144094/bundler-doesnt-permit-using-gems-in-project-home-directory) + private def require!(the_require) + begin + require the_require + rescue LoadError => error + gem_path = `gem which #{the_require}` + if gem_path.chomp.strip.empty? + raise error # re-raise the error, can't fix it + else + load_dir = File.dirname(gem_path) + $LOAD_PATH += load_dir + require the_require + end + end + end + + + # This class' entry point. + def call + + try do + + @options = CommandLineParser.new.parse + + options.requires.each { |r| require!(r) } + load_global_config_if_exists + options.loads.each { |file| load(file) } + + @user_source_code = ARGV.join(' ') + @user_source_code = 'self' if @user_source_code == '' + + @callable = create_callable + + init_rexe_context + init_parser_and_formatters + + # This is where the user's source code will be executed; the action will in turn call `execute`. + lookup_action(options.input_mode).call unless options.noop + + output_log_entry + end + end + end +end + + +def bundler_run(&block) + # This used to be an unconditional call to with_clean_env but that method is now deprecated: + # [DEPRECATED] `Bundler.with_clean_env` has been deprecated in favor of `Bundler.with_unbundled_env`. + # If you instead want the environment before bundler was originally loaded, + # use `Bundler.with_original_env` + + if Bundler.respond_to?(:with_unbundled_env) + Bundler.with_unbundled_env { block.call } + else + Bundler.with_clean_env { block.call } + end +end + + +bundler_run { Rexe::Main.new.call } diff --git a/spec/syntax_suggest/fixtures/routes.rb.txt b/spec/syntax_suggest/fixtures/routes.rb.txt new file mode 100644 index 00000000000000..86733821c07374 --- /dev/null +++ b/spec/syntax_suggest/fixtures/routes.rb.txt @@ -0,0 +1,121 @@ +Rails.application.routes.draw do + constraints -> { Rails.application.config.non_production } do + namespace :foo do + resource :bar + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + + namespace :admin do + resource :session + + match "/foobar(*path)", via: :all, to: redirect { |_params, req| + uri = URI(req.path.gsub("foobar", "foobaz")) + uri.query = req.query_string.presence + uri.to_s + } +end diff --git a/spec/syntax_suggest/fixtures/ruby_buildpack.rb.txt b/spec/syntax_suggest/fixtures/ruby_buildpack.rb.txt new file mode 100644 index 00000000000000..9acdbf3a61d967 --- /dev/null +++ b/spec/syntax_suggest/fixtures/ruby_buildpack.rb.txt @@ -0,0 +1,1344 @@ +require "tmpdir" +require "digest/md5" +require "benchmark" +require "rubygems" +require "language_pack" +require "language_pack/base" +require "language_pack/ruby_version" +require "language_pack/helpers/nodebin" +require "language_pack/helpers/node_installer" +require "language_pack/helpers/yarn_installer" +require "language_pack/helpers/layer" +require "language_pack/helpers/binstub_check" +require "language_pack/version" + +# base Ruby Language Pack. This is for any base ruby app. +class LanguagePack::Ruby < LanguagePack::Base + NAME = "ruby" + LIBYAML_VERSION = "0.1.7" + LIBYAML_PATH = "libyaml-#{LIBYAML_VERSION}" + RBX_BASE_URL = "http://binaries.rubini.us/heroku" + NODE_BP_PATH = "vendor/node/bin" + + Layer = LanguagePack::Helpers::Layer + + # detects if this is a valid Ruby app + # @return [Boolean] true if it's a Ruby app + def self.use? + instrument "ruby.use" do + File.exist?("Gemfile") + end + end + + def self.bundler + @@bundler ||= LanguagePack::Helpers::BundlerWrapper.new.install + end + + def bundler + self.class.bundler + end + + def initialize(*args) + super(*args) + @fetchers[:mri] = LanguagePack::Fetcher.new(VENDOR_URL, @stack) + @fetchers[:rbx] = LanguagePack::Fetcher.new(RBX_BASE_URL, @stack) + @node_installer = LanguagePack::Helpers::NodeInstaller.new + @yarn_installer = LanguagePack::Helpers::YarnInstaller.new + end + + def name + "Ruby" + end + + def default_addons + instrument "ruby.default_addons" do + add_dev_database_addon + end + end + + def default_config_vars + instrument "ruby.default_config_vars" do + vars = { + "LANG" => env("LANG") || "en_US.UTF-8", + } + + ruby_version.jruby? ? vars.merge({ + "JRUBY_OPTS" => default_jruby_opts + }) : vars + end + end + + def default_process_types + instrument "ruby.default_process_types" do + { + "rake" => "bundle exec rake", + "console" => "bundle exec irb" + } + end + end + + def best_practice_warnings + if bundler.has_gem?("asset_sync") + warn(<<-WARNING) +You are using the `asset_sync` gem. +This is not recommended. +See https://devcenter.heroku.com/articles/please-do-not-use-asset-sync for more information. +WARNING + end + end + + def compile + instrument 'ruby.compile' do + # check for new app at the beginning of the compile + new_app? + Dir.chdir(build_path) + remove_vendor_bundle + warn_bundler_upgrade + warn_bad_binstubs + install_ruby(slug_vendor_ruby, build_ruby_path) + setup_language_pack_environment( + ruby_layer_path: File.expand_path("."), + gem_layer_path: File.expand_path("."), + bundle_path: "vendor/bundle", + bundle_default_without: "development:test" + ) + allow_git do + install_bundler_in_app(slug_vendor_base) + load_bundler_cache + build_bundler + post_bundler + create_database_yml + install_binaries + run_assets_precompile_rake_task + end + config_detect + best_practice_warnings + warn_outdated_ruby + setup_profiled(ruby_layer_path: "$HOME", gem_layer_path: "$HOME") # $HOME is set to /app at run time + setup_export + cleanup + super + end + rescue => e + warn_outdated_ruby + raise e + end + + + def build + new_app? + remove_vendor_bundle + warn_bad_binstubs + ruby_layer = Layer.new(@layer_dir, "ruby", launch: true) + install_ruby("#{ruby_layer.path}/#{slug_vendor_ruby}") + ruby_layer.metadata[:version] = ruby_version.version + ruby_layer.metadata[:patchlevel] = ruby_version.patchlevel if ruby_version.patchlevel + ruby_layer.metadata[:engine] = ruby_version.engine.to_s + ruby_layer.metadata[:engine_version] = ruby_version.engine_version + ruby_layer.write + + gem_layer = Layer.new(@layer_dir, "gems", launch: true, cache: true, build: true) + setup_language_pack_environment( + ruby_layer_path: ruby_layer.path, + gem_layer_path: gem_layer.path, + bundle_path: "#{gem_layer.path}/vendor/bundle", + bundle_default_without: "development:test" + ) + allow_git do + # TODO install bundler in separate layer + topic "Loading Bundler Cache" + gem_layer.validate! do |metadata| + valid_bundler_cache?(gem_layer.path, gem_layer.metadata) + end + install_bundler_in_app("#{gem_layer.path}/#{slug_vendor_base}") + build_bundler + # TODO post_bundler might need to be done in a new layer + bundler.clean + gem_layer.metadata[:gems] = Digest::SHA2.hexdigest(File.read("Gemfile.lock")) + gem_layer.metadata[:stack] = @stack + gem_layer.metadata[:ruby_version] = run_stdout(%q(ruby -v)).strip + gem_layer.metadata[:rubygems_version] = run_stdout(%q(gem -v)).strip + gem_layer.metadata[:buildpack_version] = BUILDPACK_VERSION + gem_layer.write + + create_database_yml + # TODO replace this with multibuildpack stuff? put binaries in their own layer? + install_binaries + run_assets_precompile_rake_task + end + setup_profiled(ruby_layer_path: ruby_layer.path, gem_layer_path: gem_layer.path) + setup_export(gem_layer) + config_detect + best_practice_warnings + cleanup + + super + end + + def cleanup + end + + def config_detect + end + +private + + # A bad shebang line looks like this: + # + # ``` + # #!/usr/bin/env ruby2.5 + # ``` + # + # Since `ruby2.5` is not a valid binary name + # + def warn_bad_binstubs + check = LanguagePack::Helpers::BinstubCheck.new(app_root_dir: Dir.pwd, warn_object: self) + check.call + end + + def default_malloc_arena_max? + return true if @metadata.exists?("default_malloc_arena_max") + return @metadata.touch("default_malloc_arena_max") if new_app? + + return false + end + + def warn_bundler_upgrade + old_bundler_version = @metadata.read("bundler_version").strip if @metadata.exists?("bundler_version") + + if old_bundler_version && old_bundler_version != bundler.version + warn(<<-WARNING, inline: true) +Your app was upgraded to bundler #{ bundler.version }. +Previously you had a successful deploy with bundler #{ old_bundler_version }. + +If you see problems related to the bundler version please refer to: +https://devcenter.heroku.com/articles/bundler-version#known-upgrade-issues + +WARNING + end + end + + # For example "vendor/bundle/ruby/2.6.0" + def self.slug_vendor_base + @slug_vendor_base ||= begin + command = %q(ruby -e "require 'rbconfig';puts \"vendor/bundle/#{RUBY_ENGINE}/#{RbConfig::CONFIG['ruby_version']}\"") + out = run_no_pipe(command, user_env: true).strip + error "Problem detecting bundler vendor directory: #{out}" unless $?.success? + out + end + end + + # the relative path to the bundler directory of gems + # @return [String] resulting path + def slug_vendor_base + instrument 'ruby.slug_vendor_base' do + @slug_vendor_base ||= self.class.slug_vendor_base + end + end + + # the relative path to the vendored ruby directory + # @return [String] resulting path + def slug_vendor_ruby + "vendor/#{ruby_version.version_without_patchlevel}" + end + + # the absolute path of the build ruby to use during the buildpack + # @return [String] resulting path + def build_ruby_path + "/tmp/#{ruby_version.version_without_patchlevel}" + end + + # fetch the ruby version from bundler + # @return [String, nil] returns the ruby version if detected or nil if none is detected + def ruby_version + instrument 'ruby.ruby_version' do + return @ruby_version if @ruby_version + new_app = !File.exist?("vendor/heroku") + last_version_file = "buildpack_ruby_version" + last_version = nil + last_version = @metadata.read(last_version_file).strip if @metadata.exists?(last_version_file) + + @ruby_version = LanguagePack::RubyVersion.new(bundler.ruby_version, + is_new: new_app, + last_version: last_version) + return @ruby_version + end + end + + def set_default_web_concurrency + <<-EOF +case $(ulimit -u) in +256) + export HEROKU_RAM_LIMIT_MB=${HEROKU_RAM_LIMIT_MB:-512} + export WEB_CONCURRENCY=${WEB_CONCURRENCY:-2} + ;; +512) + export HEROKU_RAM_LIMIT_MB=${HEROKU_RAM_LIMIT_MB:-1024} + export WEB_CONCURRENCY=${WEB_CONCURRENCY:-4} + ;; +16384) + export HEROKU_RAM_LIMIT_MB=${HEROKU_RAM_LIMIT_MB:-2560} + export WEB_CONCURRENCY=${WEB_CONCURRENCY:-8} + ;; +32768) + export HEROKU_RAM_LIMIT_MB=${HEROKU_RAM_LIMIT_MB:-6144} + export WEB_CONCURRENCY=${WEB_CONCURRENCY:-16} + ;; +*) + ;; +esac +EOF + end + + # default JRUBY_OPTS + # return [String] string of JRUBY_OPTS + def default_jruby_opts + "-Xcompile.invokedynamic=false" + end + + # sets up the environment variables for the build process + def setup_language_pack_environment(ruby_layer_path:, gem_layer_path:, bundle_path:, bundle_default_without:) + instrument 'ruby.setup_language_pack_environment' do + if ruby_version.jruby? + ENV["PATH"] += ":bin" + ENV["JRUBY_OPTS"] = env('JRUBY_BUILD_OPTS') || env('JRUBY_OPTS') + end + setup_ruby_install_env(ruby_layer_path) + + # By default Node can address 1.5GB of memory, a limitation it inherits from + # the underlying v8 engine. This can occasionally cause issues during frontend + # builds where memory use can exceed this threshold. + # + # This passes an argument to all Node processes during the build, so that they + # can take advantage of all available memory on the build dynos. + ENV["NODE_OPTIONS"] ||= "--max_old_space_size=2560" + + # TODO when buildpack-env-args rolls out, we can get rid of + # ||= and the manual setting below + default_config_vars.each do |key, value| + ENV[key] ||= value + end + + paths = [] + gem_path = "#{gem_layer_path}/#{slug_vendor_base}" + ENV["GEM_PATH"] = gem_path + ENV["GEM_HOME"] = gem_path + + ENV["DISABLE_SPRING"] = "1" + + # Rails has a binstub for yarn that doesn't work for all applications + # we need to ensure that yarn comes before local bin dir for that case + paths << yarn_preinstall_bin_path if yarn_preinstalled? + + # Need to remove `./bin` folder since it links to the wrong --prefix ruby binstubs breaking require in Ruby 1.9.2 and 1.8.7. + # Because for 1.9.2 and 1.8.7 there is a "build" ruby and a non-"build" Ruby + paths << "#{File.expand_path(".")}/bin" unless ruby_version.ruby_192_or_lower? + + paths << "#{gem_layer_path}/#{bundler_binstubs_path}" # Binstubs from bundler, eg. vendor/bundle/bin + paths << "#{gem_layer_path}/#{slug_vendor_base}/bin" # Binstubs from rubygems, eg. vendor/bundle/ruby/2.6.0/bin + paths << ENV["PATH"] + + ENV["PATH"] = paths.join(":") + + ENV["BUNDLE_WITHOUT"] = env("BUNDLE_WITHOUT") || bundle_default_without + if ENV["BUNDLE_WITHOUT"].include?(' ') + ENV["BUNDLE_WITHOUT"] = ENV["BUNDLE_WITHOUT"].tr(' ', ':') + + warn("Your BUNDLE_WITHOUT contains a space, we are converting it to a colon `:` BUNDLE_WITHOUT=#{ENV["BUNDLE_WITHOUT"]}", inline: true) + end + ENV["BUNDLE_PATH"] = bundle_path + ENV["BUNDLE_BIN"] = bundler_binstubs_path + ENV["BUNDLE_DEPLOYMENT"] = "1" + ENV["BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE"] = "1" if bundler.needs_ruby_global_append_path? + end + end + + # Sets up the environment variables for subsequent processes run by + # muiltibuildpack. We can't use profile.d because $HOME isn't set up + def setup_export(layer = nil) + instrument 'ruby.setup_export' do + if layer + paths = ENV["PATH"] + else + paths = ENV["PATH"].split(":").map do |path| + /^\/.*/ !~ path ? "#{build_path}/#{path}" : path + end.join(":") + end + + # TODO ensure path exported is correct + set_export_path "PATH", paths, layer + + if layer + gem_path = "#{layer.path}/#{slug_vendor_base}" + else + gem_path = "#{build_path}/#{slug_vendor_base}" + end + set_export_path "GEM_PATH", gem_path, layer + set_export_default "LANG", "en_US.UTF-8", layer + + # TODO handle jruby + if ruby_version.jruby? + set_export_default "JRUBY_OPTS", default_jruby_opts + end + + set_export_default "BUNDLE_PATH", ENV["BUNDLE_PATH"], layer + set_export_default "BUNDLE_WITHOUT", ENV["BUNDLE_WITHOUT"], layer + set_export_default "BUNDLE_BIN", ENV["BUNDLE_BIN"], layer + set_export_default "BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE", ENV["BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE"], layer if bundler.needs_ruby_global_append_path? + set_export_default "BUNDLE_DEPLOYMENT", ENV["BUNDLE_DEPLOYMENT"], layer if ENV["BUNDLE_DEPLOYMENT"] # Unset on windows since we delete the Gemfile.lock + end + end + + # sets up the profile.d script for this buildpack + def setup_profiled(ruby_layer_path: , gem_layer_path: ) + instrument 'setup_profiled' do + profiled_path = [] + + # Rails has a binstub for yarn that doesn't work for all applications + # we need to ensure that yarn comes before local bin dir for that case + if yarn_preinstalled? + profiled_path << yarn_preinstall_bin_path.gsub(File.expand_path("."), "$HOME") + elsif has_yarn_binary? + profiled_path << "#{ruby_layer_path}/vendor/#{@yarn_installer.binary_path}" + end + profiled_path << "$HOME/bin" # /app in production + profiled_path << "#{gem_layer_path}/#{bundler_binstubs_path}" # Binstubs from bundler, eg. vendor/bundle/bin + profiled_path << "#{gem_layer_path}/#{slug_vendor_base}/bin" # Binstubs from rubygems, eg. vendor/bundle/ruby/2.6.0/bin + profiled_path << "$PATH" + + set_env_default "LANG", "en_US.UTF-8" + set_env_override "GEM_PATH", "#{gem_layer_path}/#{slug_vendor_base}:$GEM_PATH" + set_env_override "PATH", profiled_path.join(":") + set_env_override "DISABLE_SPRING", "1" + + set_env_default "MALLOC_ARENA_MAX", "2" if default_malloc_arena_max? + + web_concurrency = env("SENSIBLE_DEFAULTS") ? set_default_web_concurrency : "" + add_to_profiled(web_concurrency, filename: "WEB_CONCURRENCY.sh", mode: "w") # always write that file, even if its empty (meaning no defaults apply), for interop with other buildpacks - and we overwrite the file rather than appending (which is the default) + + # TODO handle JRUBY + if ruby_version.jruby? + set_env_default "JRUBY_OPTS", default_jruby_opts + end + + set_env_default "BUNDLE_PATH", ENV["BUNDLE_PATH"] + set_env_default "BUNDLE_WITHOUT", ENV["BUNDLE_WITHOUT"] + set_env_default "BUNDLE_BIN", ENV["BUNDLE_BIN"] + set_env_default "BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE", ENV["BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE"] if bundler.needs_ruby_global_append_path? + set_env_default "BUNDLE_DEPLOYMENT", ENV["BUNDLE_DEPLOYMENT"] if ENV["BUNDLE_DEPLOYMENT"] # Unset on windows since we delete the Gemfile.lock + end + end + + def warn_outdated_ruby + return unless defined?(@outdated_version_check) + + @warn_outdated ||= begin + @outdated_version_check.join + + warn_outdated_minor + warn_outdated_eol + warn_stack_upgrade + true + end + end + + def warn_stack_upgrade + return unless defined?(@ruby_download_check) + return unless @ruby_download_check.next_stack(current_stack: stack) + return if @ruby_download_check.exists_on_next_stack?(current_stack: stack) + + warn(<<~WARNING) + Your Ruby version is not present on the next stack + + You are currently using #{ruby_version.version_for_download} on #{stack} stack. + This version does not exist on #{@ruby_download_check.next_stack(current_stack: stack)}. In order to upgrade your stack you will + need to upgrade to a supported Ruby version. + + For a list of supported Ruby versions see: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + + For a list of the oldest Ruby versions present on a given stack see: + https://devcenter.heroku.com/articles/ruby-support#oldest-available-runtimes + WARNING + end + + def warn_outdated_eol + return unless @outdated_version_check.maybe_eol? + + if @outdated_version_check.eol? + warn(<<~WARNING) + EOL Ruby Version + + You are using a Ruby version that has reached its End of Life (EOL) + + We strongly suggest you upgrade to Ruby #{@outdated_version_check.suggest_ruby_eol_version} or later + + Your current Ruby version no longer receives security updates from + Ruby Core and may have serious vulnerabilities. While you will continue + to be able to deploy on Heroku with this Ruby version you must upgrade + to a non-EOL version to be eligible to receive support. + + Upgrade your Ruby version as soon as possible. + + For a list of supported Ruby versions see: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + WARNING + else + # Maybe EOL + warn(<<~WARNING) + Potential EOL Ruby Version + + You are using a Ruby version that has either reached its End of Life (EOL) + or will reach its End of Life on December 25th of this year. + + We suggest you upgrade to Ruby #{@outdated_version_check.suggest_ruby_eol_version} or later + + Once a Ruby version becomes EOL, it will no longer receive + security updates from Ruby core and may have serious vulnerabilities. + + Please upgrade your Ruby version. + + For a list of supported Ruby versions see: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + WARNING + end + end + + def warn_outdated_minor + return if @outdated_version_check.latest_minor_version? + + warn(<<~WARNING) + There is a more recent Ruby version available for you to use: + + #{@outdated_version_check.suggested_ruby_minor_version} + + The latest version will include security and bug fixes. We always recommend + running the latest version of your minor release. + + Please upgrade your Ruby version. + + For all available Ruby versions see: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + WARNING + end + + # install the vendored ruby + # @return [Boolean] true if it installs the vendored ruby and false otherwise + def install_ruby(install_path, build_ruby_path = nil) + instrument 'ruby.install_ruby' do + # Could do a compare operation to avoid re-downloading ruby + return false unless ruby_version + installer = LanguagePack::Installers::RubyInstaller.installer(ruby_version).new(@stack) + + @ruby_download_check = LanguagePack::Helpers::DownloadPresence.new(ruby_version.file_name) + @ruby_download_check.call + + if ruby_version.build? + installer.fetch_unpack(ruby_version, build_ruby_path, true) + end + + installer.install(ruby_version, install_path) + + @outdated_version_check = LanguagePack::Helpers::OutdatedRubyVersion.new( + current_ruby_version: ruby_version, + fetcher: installer.fetcher + ) + @outdated_version_check.call + + @metadata.write("buildpack_ruby_version", ruby_version.version_for_download) + + topic "Using Ruby version: #{ruby_version.version_for_download}" + if !ruby_version.set + warn(<<~WARNING) + You have not declared a Ruby version in your Gemfile. + + To declare a Ruby version add this line to your Gemfile: + + ``` + ruby "#{LanguagePack::RubyVersion::DEFAULT_VERSION_NUMBER}" + ``` + + For more information see: + https://devcenter.heroku.com/articles/ruby-versions + WARNING + end + + if ruby_version.warn_ruby_26_bundler? + warn(<<~WARNING, inline: true) + There is a known bundler bug with your version of Ruby + + Your version of Ruby contains a problem with the built-in integration of bundler. If + you encounter a bundler error you need to upgrade your Ruby version. We suggest you upgrade to: + + #{@outdated_version_check.suggested_ruby_minor_version} + + For more information see: + https://devcenter.heroku.com/articles/bundler-version#known-upgrade-issues + WARNING + end + end + + true + rescue LanguagePack::Fetcher::FetchError + if @ruby_download_check.does_not_exist? + message = <<~ERROR + The Ruby version you are trying to install does not exist: #{ruby_version.version_for_download} + ERROR + else + message = <<~ERROR + The Ruby version you are trying to install does not exist on this stack. + + You are trying to install #{ruby_version.version_for_download} on #{stack}. + + Ruby #{ruby_version.version_for_download} is present on the following stacks: + + - #{@ruby_download_check.valid_stack_list.join("\n - ")} + ERROR + + if env("CI") + message << <<~ERROR + + On Heroku CI you can set your stack in the `app.json`. For example: + + ``` + "stack": "heroku-20" + ``` + ERROR + end + end + + message << <<~ERROR + + Heroku recommends you use the latest supported Ruby version listed here: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + + For more information on syntax for declaring a Ruby version see: + https://devcenter.heroku.com/articles/ruby-versions + ERROR + + error message + end + + # TODO make this compatible with CNB + def new_app? + @new_app ||= !File.exist?("vendor/heroku") + end + + # find the ruby install path for its binstubs during build + # @return [String] resulting path or empty string if ruby is not vendored + def ruby_install_binstub_path(ruby_layer_path = ".") + @ruby_install_binstub_path ||= + if ruby_version.build? + "#{build_ruby_path}/bin" + elsif ruby_version + "#{ruby_layer_path}/#{slug_vendor_ruby}/bin" + else + "" + end + end + + # setup the environment so we can use the vendored ruby + def setup_ruby_install_env(ruby_layer_path = ".") + instrument 'ruby.setup_ruby_install_env' do + ENV["PATH"] = "#{File.expand_path(ruby_install_binstub_path(ruby_layer_path))}:#{ENV["PATH"]}" + end + end + + # installs vendored gems into the slug + def install_bundler_in_app(bundler_dir) + instrument 'ruby.install_language_pack_gems' do + FileUtils.mkdir_p(bundler_dir) + Dir.chdir(bundler_dir) do |dir| + `cp -R #{bundler.bundler_path}/. .` + end + + # write bundler shim, so we can control the version bundler used + # Ruby 2.6.0 started vendoring bundler + write_bundler_shim("vendor/bundle/bin") if ruby_version.vendored_bundler? + end + end + + # default set of binaries to install + # @return [Array] resulting list + def binaries + add_node_js_binary + add_yarn_binary + end + + # vendors binaries into the slug + def install_binaries + instrument 'ruby.install_binaries' do + binaries.each {|binary| install_binary(binary) } + Dir["bin/*"].each {|path| run("chmod +x #{path}") } + end + end + + # vendors individual binary into the slug + # @param [String] name of the binary package from S3. + # Example: https://s3.amazonaws.com/language-pack-ruby/node-0.4.7.tgz, where name is "node-0.4.7" + def install_binary(name) + topic "Installing #{name}" + bin_dir = "bin" + FileUtils.mkdir_p bin_dir + Dir.chdir(bin_dir) do |dir| + if name.match(/^node\-/) + @node_installer.install + # need to set PATH here b/c `node-gyp` can change the CWD, but still depends on executing node. + # the current PATH is relative, but it needs to be absolute for this. + # doing this here also prevents it from being exported during runtime + node_bin_path = File.absolute_path(".") + # this needs to be set after so other binaries in bin/ don't take precedence" + ENV["PATH"] = "#{ENV["PATH"]}:#{node_bin_path}" + elsif name.match(/^yarn\-/) + FileUtils.mkdir_p("../vendor") + Dir.chdir("../vendor") do |vendor_dir| + @yarn_installer.install + yarn_path = File.absolute_path("#{vendor_dir}/#{@yarn_installer.binary_path}") + ENV["PATH"] = "#{yarn_path}:#{ENV["PATH"]}" + end + else + @fetchers[:buildpack].fetch_untar("#{name}.tgz") + end + end + end + + # removes a binary from the slug + # @param [String] relative path of the binary on the slug + def uninstall_binary(path) + FileUtils.rm File.join('bin', File.basename(path)), :force => true + end + + def load_default_cache? + new_app? && ruby_version.default? + end + + # loads a default bundler cache for new apps to speed up initial bundle installs + def load_default_cache + instrument "ruby.load_default_cache" do + if false # load_default_cache? + puts "New app detected loading default bundler cache" + patchlevel = run("ruby -e 'puts RUBY_PATCHLEVEL'").strip + cache_name = "#{LanguagePack::RubyVersion::DEFAULT_VERSION}-p#{patchlevel}-default-cache" + @fetchers[:buildpack].fetch_untar("#{cache_name}.tgz") + end + end + end + + # remove `vendor/bundle` that comes from the git repo + # in case there are native ext. + # users should be using `bundle pack` instead. + # https://github.com/heroku/heroku-buildpack-ruby/issues/21 + def remove_vendor_bundle + if File.exists?("vendor/bundle") + warn(<<-WARNING) +Removing `vendor/bundle`. +Checking in `vendor/bundle` is not supported. Please remove this directory +and add it to your .gitignore. To vendor your gems with Bundler, use +`bundle pack` instead. +WARNING + FileUtils.rm_rf("vendor/bundle") + end + end + + def bundler_binstubs_path + "vendor/bundle/bin" + end + + def bundler_path + @bundler_path ||= "#{slug_vendor_base}/gems/#{bundler.dir_name}" + end + + def write_bundler_shim(path) + FileUtils.mkdir_p(path) + shim_path = "#{path}/bundle" + File.open(shim_path, "w") do |file| + file.print <<-BUNDLE +#!/usr/bin/env ruby +require 'rubygems' + +version = "#{bundler.version}" + +if ARGV.first + str = ARGV.first + str = str.dup.force_encoding("BINARY") if str.respond_to? :force_encoding + if str =~ /\A_(.*)_\z/ and Gem::Version.correct?($1) then + version = $1 + ARGV.shift + end +end + +if Gem.respond_to?(:activate_bin_path) +load Gem.activate_bin_path('bundler', 'bundle', version) +else +gem "bundler", version +load Gem.bin_path("bundler", "bundle", version) +end +BUNDLE + end + FileUtils.chmod(0755, shim_path) + end + + # runs bundler to install the dependencies + def build_bundler + instrument 'ruby.build_bundler' do + log("bundle") do + if File.exist?("#{Dir.pwd}/.bundle/config") + warn(<<~WARNING, inline: true) + You have the `.bundle/config` file checked into your repository + It contains local state like the location of the installed bundle + as well as configured git local gems, and other settings that should + not be shared between multiple checkouts of a single repo. Please + remove the `.bundle/` folder from your repo and add it to your `.gitignore` file. + + https://devcenter.heroku.com/articles/bundler-configuration + WARNING + end + + if bundler.windows_gemfile_lock? + log("bundle", "has_windows_gemfile_lock") + + File.unlink("Gemfile.lock") + ENV.delete("BUNDLE_DEPLOYMENT") + + warn(<<~WARNING, inline: true) + Removing `Gemfile.lock` because it was generated on Windows. + Bundler will do a full resolve so native gems are handled properly. + This may result in unexpected gem versions being used in your app. + In rare occasions Bundler may not be able to resolve your dependencies at all. + + https://devcenter.heroku.com/articles/bundler-windows-gemfile + WARNING + end + + bundle_command = String.new("") + bundle_command << "BUNDLE_WITHOUT='#{ENV["BUNDLE_WITHOUT"]}' " + bundle_command << "BUNDLE_PATH=#{ENV["BUNDLE_PATH"]} " + bundle_command << "BUNDLE_BIN=#{ENV["BUNDLE_BIN"]} " + bundle_command << "BUNDLE_DEPLOYMENT=#{ENV["BUNDLE_DEPLOYMENT"]} " if ENV["BUNDLE_DEPLOYMENT"] # Unset on windows since we delete the Gemfile.lock + bundle_command << "BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE=#{ENV["BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE"]} " if bundler.needs_ruby_global_append_path? + bundle_command << "bundle install -j4" + + topic("Installing dependencies using bundler #{bundler.version}") + + bundler_output = String.new("") + bundle_time = nil + env_vars = {} + Dir.mktmpdir("libyaml-") do |tmpdir| + libyaml_dir = "#{tmpdir}/#{LIBYAML_PATH}" + + # need to setup compile environment for the psych gem + yaml_include = File.expand_path("#{libyaml_dir}/include").shellescape + yaml_lib = File.expand_path("#{libyaml_dir}/lib").shellescape + pwd = Dir.pwd + bundler_path = "#{pwd}/#{slug_vendor_base}/gems/#{bundler.dir_name}/lib" + + # we need to set BUNDLE_CONFIG and BUNDLE_GEMFILE for + # codon since it uses bundler. + env_vars["BUNDLE_GEMFILE"] = "#{pwd}/Gemfile" + env_vars["BUNDLE_CONFIG"] = "#{pwd}/.bundle/config" + env_vars["CPATH"] = noshellescape("#{yaml_include}:$CPATH") + env_vars["CPPATH"] = noshellescape("#{yaml_include}:$CPPATH") + env_vars["LIBRARY_PATH"] = noshellescape("#{yaml_lib}:$LIBRARY_PATH") + env_vars["RUBYOPT"] = syck_hack + env_vars["NOKOGIRI_USE_SYSTEM_LIBRARIES"] = "true" + env_vars["BUNDLE_DISABLE_VERSION_CHECK"] = "true" + env_vars["BUNDLER_LIB_PATH"] = "#{bundler_path}" if ruby_version.ruby_version == "1.8.7" + env_vars["BUNDLE_DISABLE_VERSION_CHECK"] = "true" + + puts "Running: #{bundle_command}" + instrument "ruby.bundle_install" do + bundle_time = Benchmark.realtime do + bundler_output << pipe("#{bundle_command} --no-clean", out: "2>&1", env: env_vars, user_env: true) + end + end + end + + if $?.success? + puts "Bundle completed (#{"%.2f" % bundle_time}s)" + log "bundle", :status => "success" + puts "Cleaning up the bundler cache." + instrument "ruby.bundle_clean" do + # Only show bundle clean output when not using default cache + if load_default_cache? + run("bundle clean > /dev/null", user_env: true, env: env_vars) + else + pipe("bundle clean", out: "2> /dev/null", user_env: true, env: env_vars) + end + end + @bundler_cache.store + + # Keep gem cache out of the slug + FileUtils.rm_rf("#{slug_vendor_base}/cache") + else + mcount "fail.bundle.install" + log "bundle", :status => "failure" + error_message = "Failed to install gems via Bundler." + puts "Bundler Output: #{bundler_output}" + if bundler_output.match(/An error occurred while installing sqlite3/) + mcount "fail.sqlite3" + error_message += <<~ERROR + + Detected sqlite3 gem which is not supported on Heroku: + https://devcenter.heroku.com/articles/sqlite3 + ERROR + end + + if bundler_output.match(/but your Gemfile specified/) + mcount "fail.ruby_version_mismatch" + error_message += <<~ERROR + + Detected a mismatch between your Ruby version installed and + Ruby version specified in Gemfile or Gemfile.lock. You can + correct this by running: + + $ bundle update --ruby + $ git add Gemfile.lock + $ git commit -m "update ruby version" + + If this does not solve the issue please see this documentation: + + https://devcenter.heroku.com/articles/ruby-versions#your-ruby-version-is-x-but-your-gemfile-specified-y + ERROR + end + + error error_message + end + end + end + end + + def post_bundler + instrument "ruby.post_bundler" do + Dir[File.join(slug_vendor_base, "**", ".git")].each do |dir| + FileUtils.rm_rf(dir) + end + bundler.clean + end + end + + # RUBYOPT line that requires syck_hack file + # @return [String] require string if needed or else an empty string + def syck_hack + instrument "ruby.syck_hack" do + syck_hack_file = File.expand_path(File.join(File.dirname(__FILE__), "../../vendor/syck_hack")) + rv = run_stdout('ruby -e "puts RUBY_VERSION"').strip + # < 1.9.3 includes syck, so we need to use the syck hack + if Gem::Version.new(rv) < Gem::Version.new("1.9.3") + "-r#{syck_hack_file}" + else + "" + end + end + end + + # writes ERB based database.yml for Rails. The database.yml uses the DATABASE_URL from the environment during runtime. + def create_database_yml + instrument 'ruby.create_database_yml' do + return false unless File.directory?("config") + return false if bundler.has_gem?('activerecord') && bundler.gem_version('activerecord') >= Gem::Version.new('4.1.0.beta1') + + log("create_database_yml") do + topic("Writing config/database.yml to read from DATABASE_URL") + File.open("config/database.yml", "w") do |file| + file.puts <<-DATABASE_YML +<% + +require 'cgi' +require 'uri' + +begin + uri = URI.parse(ENV["DATABASE_URL"]) +rescue URI::InvalidURIError + raise "Invalid DATABASE_URL" +end + +raise "No RACK_ENV or RAILS_ENV found" unless ENV["RAILS_ENV"] || ENV["RACK_ENV"] + +def attribute(name, value, force_string = false) + if value + value_string = + if force_string + '"' + value + '"' + else + value + end + "\#{name}: \#{value_string}" + else + "" + end +end + +adapter = uri.scheme +adapter = "postgresql" if adapter == "postgres" + +database = (uri.path || "").split("/")[1] + +username = uri.user +password = uri.password + +host = uri.host +port = uri.port + +params = CGI.parse(uri.query || "") + +%> + +<%= ENV["RAILS_ENV"] || ENV["RACK_ENV"] %>: + <%= attribute "adapter", adapter %> + <%= attribute "database", database %> + <%= attribute "username", username %> + <%= attribute "password", password, true %> + <%= attribute "host", host %> + <%= attribute "port", port %> + +<% params.each do |key, value| %> + <%= key %>: <%= value.first %> +<% end %> + DATABASE_YML + end + end + end + end + + def rake + @rake ||= begin + rake_gem_available = bundler.has_gem?("rake") || ruby_version.rake_is_vendored? + raise_on_fail = bundler.gem_version('railties') && bundler.gem_version('railties') > Gem::Version.new('3.x') + + topic "Detecting rake tasks" + rake = LanguagePack::Helpers::RakeRunner.new(rake_gem_available) + rake.load_rake_tasks!({ env: rake_env }, raise_on_fail) + rake + end + end + + def rake_env + if database_url + { "DATABASE_URL" => database_url } + else + {} + end.merge(user_env_hash) + end + + def database_url + env("DATABASE_URL") if env("DATABASE_URL") + end + + # executes the block with GIT_DIR environment variable removed since it can mess with the current working directory git thinks it's in + # @param [block] block to be executed in the GIT_DIR free context + def allow_git(&blk) + git_dir = ENV.delete("GIT_DIR") # can mess with bundler + blk.call + ENV["GIT_DIR"] = git_dir + end + + # decides if we need to enable the dev database addon + # @return [Array] the database addon if the pg gem is detected or an empty Array if it isn't. + def add_dev_database_addon + pg_adapters.any? {|a| bundler.has_gem?(a) } ? ['heroku-postgresql'] : [] + end + + def pg_adapters + [ + "pg", + "activerecord-jdbcpostgresql-adapter", + "jdbc-postgres", + "jdbc-postgresql", + "jruby-pg", + "rjack-jdbc-postgres", + "tgbyte-activerecord-jdbcpostgresql-adapter" + ] + end + + # decides if we need to install the node.js binary + # @note execjs will blow up if no JS RUNTIME is detected and is loaded. + # @return [Array] the node.js binary path if we need it or an empty Array + def add_node_js_binary + return [] if node_js_preinstalled? + + if Pathname(build_path).join("package.json").exist? || + bundler.has_gem?('execjs') || + bundler.has_gem?('webpacker') + [@node_installer.binary_path] + else + [] + end + end + + def add_yarn_binary + return [] if yarn_preinstalled? +| + if Pathname(build_path).join("yarn.lock").exist? || bundler.has_gem?('webpacker') + [@yarn_installer.name] + else + [] + end + end + + def has_yarn_binary? + add_yarn_binary.any? + end + + # checks if node.js is installed via the official heroku-buildpack-nodejs using multibuildpack + # @return String if it's detected and false if it isn't + def node_preinstall_bin_path + return @node_preinstall_bin_path if defined?(@node_preinstall_bin_path) + + legacy_path = "#{Dir.pwd}/#{NODE_BP_PATH}" + path = run("which node").strip + if path && $?.success? + @node_preinstall_bin_path = path + elsif run("#{legacy_path}/node -v") && $?.success? + @node_preinstall_bin_path = legacy_path + else + @node_preinstall_bin_path = false + end + end + alias :node_js_preinstalled? :node_preinstall_bin_path + + def node_not_preinstalled? + !node_js_preinstalled? + end + + # Example: tmp/build_8523f77fb96a956101d00988dfeed9d4/.heroku/yarn/bin/ (without the `yarn` at the end) + def yarn_preinstall_bin_path + (yarn_preinstall_binary_path || "").chomp("/yarn") + end + + # Example `tmp/build_8523f77fb96a956101d00988dfeed9d4/.heroku/yarn/bin/yarn` + def yarn_preinstall_binary_path + return @yarn_preinstall_binary_path if defined?(@yarn_preinstall_binary_path) + + path = run("which yarn").strip + if path && $?.success? + @yarn_preinstall_binary_path = path + else + @yarn_preinstall_binary_path = false + end + end + + def yarn_preinstalled? + yarn_preinstall_binary_path + end + + def yarn_not_preinstalled? + !yarn_preinstalled? + end + + def run_assets_precompile_rake_task + instrument 'ruby.run_assets_precompile_rake_task' do + + precompile = rake.task("assets:precompile") + return true unless precompile.is_defined? + + topic "Precompiling assets" + precompile.invoke(env: rake_env) + if precompile.success? + puts "Asset precompilation completed (#{"%.2f" % precompile.time}s)" + else + precompile_fail(precompile.output) + end + end + end + + def precompile_fail(output) + mcount "fail.assets_precompile" + log "assets_precompile", :status => "failure" + msg = "Precompiling assets failed.\n" + if output.match(/(127\.0\.0\.1)|(org\.postgresql\.util)/) + msg << "Attempted to access a nonexistent database:\n" + msg << "https://devcenter.heroku.com/articles/pre-provision-database\n" + end + + sprockets_version = bundler.gem_version('sprockets') + if output.match(/Sprockets::FileNotFound/) && (sprockets_version < Gem::Version.new('4.0.0.beta7') && sprockets_version > Gem::Version.new('4.0.0.beta4')) + mcount "fail.assets_precompile.file_not_found_beta" + msg << "If you have this file in your project\n" + msg << "try upgrading to Sprockets 4.0.0.beta7 or later:\n" + msg << "https://github.com/rails/sprockets/pull/547\n" + end + + error msg + end + + def bundler_cache + "vendor/bundle" + end + + def valid_bundler_cache?(path, metadata) + full_ruby_version = run_stdout(%q(ruby -v)).strip + rubygems_version = run_stdout(%q(gem -v)).strip + old_rubygems_version = nil + + old_rubygems_version = metadata[:ruby_version] + old_stack = metadata[:stack] + old_stack ||= DEFAULT_LEGACY_STACK + + stack_change = old_stack != @stack + if !new_app? && stack_change + return [false, "Purging Cache. Changing stack from #{old_stack} to #{@stack}"] + end + + # fix bug from v37 deploy + if File.exists?("#{path}/vendor/ruby_version") + puts "Broken cache detected. Purging build cache." + cache.clear("vendor") + FileUtils.rm_rf("#{path}/vendor/ruby_version") + return [false, "Broken cache detected. Purging build cache."] + # fix bug introduced in v38 + elsif !metadata.include?(:buildpack_version) && metadata.include?(:ruby_version) + puts "Broken cache detected. Purging build cache." + return [false, "Broken cache detected. Purging build cache."] + elsif (@bundler_cache.exists? || @bundler_cache.old?) && full_ruby_version != metadata[:ruby_version] + return [false, <<-MESSAGE] +Ruby version change detected. Clearing bundler cache. +Old: #{metadata[:ruby_version]} +New: #{full_ruby_version} +MESSAGE + end + + # fix git gemspec bug from Bundler 1.3.0+ upgrade + if File.exists?(bundler_cache) && !metadata.include?(:bundler_version) && !run("find #{path}/vendor/bundle/*/*/bundler/gems/*/ -name *.gemspec").include?("No such file or directory") + return [false, "Old bundler cache detected. Clearing bundler cache."] + end + + # fix for https://github.com/heroku/heroku-buildpack-ruby/issues/86 + if (!metadata.include?(:rubygems_version) || + (old_rubygems_version == "2.0.0" && old_rubygems_version != rubygems_version)) && + metadata.include?(:ruby_version) && metadata[:ruby_version].strip.include?("ruby 2.0.0p0") + return [false, "Updating to rubygems #{rubygems_version}. Clearing bundler cache."] + end + + # fix for https://github.com/sparklemotion/nokogiri/issues/923 + if metadata.include?(:buildpack_version) && (bv = metadata[:buildpack_version].sub('v', '').to_i) && bv != 0 && bv <= 76 + return [false, <<-MESSAGE] +Fixing nokogiri install. Clearing bundler cache. +See https://github.com/sparklemotion/nokogiri/issues/923. +MESSAGE + end + + # recompile nokogiri to use new libyaml + if metadata.include?(:buildpack_version) && (bv = metadata[:buildpack_version].sub('v', '').to_i) && bv != 0 && bv <= 99 && bundler.has_gem?("psych") + return [false, <<-MESSAGE] +Need to recompile psych for CVE-2013-6393. Clearing bundler cache. +See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=737076. +MESSAGE + end + + # recompile gems for libyaml 0.1.7 update + if metadata.include?(:buildpack_version) && (bv = metadata[:buildpack_version].sub('v', '').to_i) && bv != 0 && bv <= 147 && + (metadata.include?(:ruby_version) && metadata[:ruby_version].match(/ruby 2\.1\.(9|10)/) || + bundler.has_gem?("psych") + ) + return [false, <<-MESSAGE] +Need to recompile gems for CVE-2014-2014-9130. Clearing bundler cache. +See https://devcenter.heroku.com/changelog-items/1016. +MESSAGE + end + + true + end + + def load_bundler_cache + instrument "ruby.load_bundler_cache" do + cache.load "vendor" + + full_ruby_version = run_stdout(%q(ruby -v)).strip + rubygems_version = run_stdout(%q(gem -v)).strip + heroku_metadata = "vendor/heroku" + old_rubygems_version = nil + ruby_version_cache = "ruby_version" + buildpack_version_cache = "buildpack_version" + bundler_version_cache = "bundler_version" + rubygems_version_cache = "rubygems_version" + stack_cache = "stack" + + # bundle clean does not remove binstubs + FileUtils.rm_rf("vendor/bundler/bin") + + old_rubygems_version = @metadata.read(ruby_version_cache).strip if @metadata.exists?(ruby_version_cache) + old_stack = @metadata.read(stack_cache).strip if @metadata.exists?(stack_cache) + old_stack ||= DEFAULT_LEGACY_STACK + + stack_change = old_stack != @stack + convert_stack = @bundler_cache.old? + @bundler_cache.convert_stack(stack_change) if convert_stack + if !new_app? && stack_change + puts "Purging Cache. Changing stack from #{old_stack} to #{@stack}" + purge_bundler_cache(old_stack) + elsif !new_app? && !convert_stack + @bundler_cache.load + end + + # fix bug from v37 deploy + if File.exists?("vendor/ruby_version") + puts "Broken cache detected. Purging build cache." + cache.clear("vendor") + FileUtils.rm_rf("vendor/ruby_version") + purge_bundler_cache + # fix bug introduced in v38 + elsif !@metadata.include?(buildpack_version_cache) && @metadata.exists?(ruby_version_cache) + puts "Broken cache detected. Purging build cache." + purge_bundler_cache + elsif (@bundler_cache.exists? || @bundler_cache.old?) && @metadata.exists?(ruby_version_cache) && full_ruby_version != @metadata.read(ruby_version_cache).strip + puts "Ruby version change detected. Clearing bundler cache." + puts "Old: #{@metadata.read(ruby_version_cache).strip}" + puts "New: #{full_ruby_version}" + purge_bundler_cache + end + + # fix git gemspec bug from Bundler 1.3.0+ upgrade + if File.exists?(bundler_cache) && !@metadata.exists?(bundler_version_cache) && !run("find vendor/bundle/*/*/bundler/gems/*/ -name *.gemspec").include?("No such file or directory") + puts "Old bundler cache detected. Clearing bundler cache." + purge_bundler_cache + end + + # fix for https://github.com/heroku/heroku-buildpack-ruby/issues/86 + if (!@metadata.exists?(rubygems_version_cache) || + (old_rubygems_version == "2.0.0" && old_rubygems_version != rubygems_version)) && + @metadata.exists?(ruby_version_cache) && @metadata.read(ruby_version_cache).strip.include?("ruby 2.0.0p0") + puts "Updating to rubygems #{rubygems_version}. Clearing bundler cache." + purge_bundler_cache + end + + # fix for https://github.com/sparklemotion/nokogiri/issues/923 + if @metadata.exists?(buildpack_version_cache) && (bv = @metadata.read(buildpack_version_cache).sub('v', '').to_i) && bv != 0 && bv <= 76 + puts "Fixing nokogiri install. Clearing bundler cache." + puts "See https://github.com/sparklemotion/nokogiri/issues/923." + purge_bundler_cache + end + + # recompile nokogiri to use new libyaml + if @metadata.exists?(buildpack_version_cache) && (bv = @metadata.read(buildpack_version_cache).sub('v', '').to_i) && bv != 0 && bv <= 99 && bundler.has_gem?("psych") + puts "Need to recompile psych for CVE-2013-6393. Clearing bundler cache." + puts "See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=737076." + purge_bundler_cache + end + + # recompile gems for libyaml 0.1.7 update + if @metadata.exists?(buildpack_version_cache) && (bv = @metadata.read(buildpack_version_cache).sub('v', '').to_i) && bv != 0 && bv <= 147 && + (@metadata.exists?(ruby_version_cache) && @metadata.read(ruby_version_cache).strip.match(/ruby 2\.1\.(9|10)/) || + bundler.has_gem?("psych") + ) + puts "Need to recompile gems for CVE-2014-2014-9130. Clearing bundler cache." + puts "See https://devcenter.heroku.com/changelog-items/1016." + purge_bundler_cache + end + + FileUtils.mkdir_p(heroku_metadata) + @metadata.write(ruby_version_cache, full_ruby_version, false) + @metadata.write(buildpack_version_cache, BUILDPACK_VERSION, false) + @metadata.write(bundler_version_cache, bundler.version, false) + @metadata.write(rubygems_version_cache, rubygems_version, false) + @metadata.write(stack_cache, @stack, false) + @metadata.save + end + end + + def purge_bundler_cache(stack = nil) + instrument "ruby.purge_bundler_cache" do + @bundler_cache.clear(stack) + # need to reinstall language pack gems + install_bundler_in_app(slug_vendor_base) + end + end +end diff --git a/spec/syntax_suggest/fixtures/syntax_tree.rb.txt b/spec/syntax_suggest/fixtures/syntax_tree.rb.txt new file mode 100644 index 00000000000000..1c110783f97e57 --- /dev/null +++ b/spec/syntax_suggest/fixtures/syntax_tree.rb.txt @@ -0,0 +1,9234 @@ +# frozen_string_literal: true + +require 'ripper' +require_relative 'syntax_tree/version' + +class SyntaxTree < Ripper + # Represents a line in the source. If this class is being used, it means that + # every character in the string is 1 byte in length, so we can just return the + # start of the line + the index. + class SingleByteString + def initialize(start) + @start = start + end + + def [](byteindex) + @start + byteindex + end + end + + # Represents a line in the source. If this class is being used, it means that + # there are characters in the string that are multi-byte, so we will build up + # an array of indices, such that array[byteindex] will be equal to the index + # of the character within the string. + class MultiByteString + def initialize(start, line) + @indices = [] + + line + .each_char + .with_index(start) do |char, index| + char.bytesize.times { @indices << index } + end + end + + def [](byteindex) + @indices[byteindex] + end + end + + # Represents the location of a node in the tree from the source code. + class Location + attr_reader :start_line, :start_char, :end_line, :end_char + + def initialize(start_line:, start_char:, end_line:, end_char:) + @start_line = start_line + @start_char = start_char + @end_line = end_line + @end_char = end_char + end + + def ==(other) + other.is_a?(Location) && start_line == other.start_line && + start_char == other.start_char && end_line == other.end_line && + end_char == other.end_char + end + + def to(other) + Location.new( + start_line: start_line, + start_char: start_char, + end_line: other.end_line, + end_char: other.end_char + ) + end + + def to_json(*opts) + [start_line, start_char, end_line, end_char].to_json(*opts) + end + + def self.token(line:, char:, size:) + new( + start_line: line, + start_char: char, + end_line: line, + end_char: char + size + ) + end + + def self.fixed(line:, char:) + new(start_line: line, start_char: char, end_line: line, end_char: char) + end + end + + # A special parser error so that we can get nice syntax displays on the error + # message when prettier prints out the results. + class ParseError < StandardError + attr_reader :lineno, :column + + def initialize(error, lineno, column) + super(error) + @lineno = lineno + @column = column + end + end + + attr_reader :source, :lines, :tokens + + # This is an attr_accessor so Stmts objects can grab comments out of this + # array and attach them to themselves. + attr_accessor :comments + + def initialize(source, *) + super + + # We keep the source around so that we can refer back to it when we're + # generating the AST. Sometimes it's easier to just reference the source + # string when you want to check if it contains a certain character, for + # example. + @source = source + + # Similarly, we keep the lines of the source string around to be able to + # check if certain lines contain certain characters. For example, we'll use + # this to generate the content that goes after the __END__ keyword. Or we'll + # use this to check if a comment has other content on its line. + @lines = source.split("\n") + + # This is the full set of comments that have been found by the parser. It's + # a running list. At the end of every block of statements, they will go in + # and attempt to grab any comments that are on their own line and turn them + # into regular statements. So at the end of parsing the only comments left + # in here will be comments on lines that also contain code. + @comments = [] + + # This is the current embdoc (comments that start with =begin and end with + # =end). Since they can't be nested, there's no need for a stack here, as + # there can only be one active. These end up getting dumped into the + # comments list before getting picked up by the statements that surround + # them. + @embdoc = nil + + # This is an optional node that can be present if the __END__ keyword is + # used in the file. In that case, this will represent the content after that + # keyword. + @__end__ = nil + + # Heredocs can actually be nested together if you're using interpolation, so + # this is a stack of heredoc nodes that are currently being created. When we + # get to the token that finishes off a heredoc node, we pop the top + # one off. If there are others surrounding it, then the body events will now + # be added to the correct nodes. + @heredocs = [] + + # This is a running list of tokens that have fired. It's useful + # mostly for maintaining location information. For example, if you're inside + # the handle of a def event, then in order to determine where the AST node + # started, you need to look backward in the tokens to find a def + # keyword. Most of the time, when a parser event consumes one of these + # events, it will be deleted from the list. So ideally, this list stays + # pretty short over the course of parsing a source string. + @tokens = [] + + # Here we're going to build up a list of SingleByteString or MultiByteString + # objects. They're each going to represent a string in the source. They are + # used by the `char_pos` method to determine where we are in the source + # string. + @line_counts = [] + last_index = 0 + + @source.lines.each do |line| + if line.size == line.bytesize + @line_counts << SingleByteString.new(last_index) + else + @line_counts << MultiByteString.new(last_index, line) + end + + last_index += line.size + end + end + + def self.parse(source) + parser = new(source) + response = parser.parse + response unless parser.error? + end + + private + + # ---------------------------------------------------------------------------- + # :section: Helper methods + # The following methods are used by the ripper event handlers to either + # determine their bounds or query other nodes. + # ---------------------------------------------------------------------------- + + # This represents the current place in the source string that we've gotten to + # so far. We have a memoized line_counts object that we can use to get the + # number of characters that we've had to go through to get to the beginning of + # this line, then we add the number of columns into this line that we've gone + # through. + def char_pos + @line_counts[lineno - 1][column] + end + + # As we build up a list of tokens, we'll periodically need to go backwards and + # find the ones that we've already hit in order to determine the location + # information for nodes that use them. For example, if you have a module node + # then you'll look backward for a kw token to determine your start location. + # + # This works with nesting since we're deleting tokens from the list once + # they've been used up. For example if you had nested module declarations then + # the innermost declaration would grab the last kw node that matches "module" + # (which would happen to be the innermost keyword). Then the outer one would + # only be able to grab the first one. In this way all of the tokens act as + # their own stack. + def find_token(type, value = :any, consume: true) + index = + tokens.rindex do |token| + token.is_a?(type) && (value == :any || (token.value == value)) + end + + if consume + # If we're expecting to be able to find a token and consume it, + # but can't actually find it, then we need to raise an error. This is + # _usually_ caused by a syntax error in the source that we're printing. It + # could also be caused by accidentally attempting to consume a token twice + # by two different parser event handlers. + unless index + message = "Cannot find expected #{value == :any ? type : value}" + raise ParseError.new(message, lineno, column) + end + + tokens.delete_at(index) + elsif index + tokens[index] + end + end + + # A helper function to find a :: operator. We do special handling instead of + # using find_token here because we don't pop off all of the :: + # operators so you could end up getting the wrong information if you have for + # instance ::X::Y::Z. + def find_colon2_before(const) + index = + tokens.rindex do |token| + token.is_a?(Op) && token.value == '::' && + token.location.start_char < const.location.start_char + end + + tokens[index] + end + + # Finds the next position in the source string that begins a statement. This + # is used to bind statements lists and make sure they don't include a + # preceding comment. For example, we want the following comment to be attached + # to the class node and not the statement node: + # + # class Foo # :nodoc: + # ... + # end + # + # By finding the next non-space character, we can make sure that the bounds of + # the statement list are correct. + def find_next_statement_start(position) + remaining = source[position..-1] + + if remaining.sub(/\A +/, '')[0] == '#' + return position + remaining.index("\n") + end + + position + end + + # ---------------------------------------------------------------------------- + # :section: Ripper event handlers + # The following methods all handle a dispatched ripper event. + # ---------------------------------------------------------------------------- + + # BEGINBlock represents the use of the +BEGIN+ keyword, which hooks into the + # lifecycle of the interpreter. Whatever is inside the block will get executed + # when the program starts. + # + # BEGIN { + # } + # + # Interestingly, the BEGIN keyword doesn't allow the do and end keywords for + # the block. Only braces are permitted. + class BEGINBlock + # [LBrace] the left brace that is seen after the keyword + attr_reader :lbrace + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(lbrace:, statements:, location:) + @lbrace = lbrace + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('BEGIN') + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :BEGIN, + lbrace: lbrace, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_BEGIN: (Statements statements) -> BEGINBlock + def on_BEGIN(statements) + lbrace = find_token(LBrace) + rbrace = find_token(RBrace) + + statements.bind( + find_next_statement_start(lbrace.location.end_char), + rbrace.location.start_char + ) + + keyword = find_token(Kw, 'BEGIN') + + BEGINBlock.new( + lbrace: lbrace, + statements: statements, + location: keyword.location.to(rbrace.location) + ) + end + + # CHAR irepresents a single codepoint in the script encoding. + # + # ?a + # + # In the example above, the CHAR node represents the string literal "a". You + # can use control characters with this as well, as in ?\C-a. + class CHAR + # [String] the value of the character literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('CHAR') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :CHAR, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_CHAR: (String value) -> CHAR + def on_CHAR(value) + node = + CHAR.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # ENDBlock represents the use of the +END+ keyword, which hooks into the + # lifecycle of the interpreter. Whatever is inside the block will get executed + # when the program ends. + # + # END { + # } + # + # Interestingly, the END keyword doesn't allow the do and end keywords for the + # block. Only braces are permitted. + class ENDBlock + # [LBrace] the left brace that is seen after the keyword + attr_reader :lbrace + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(lbrace:, statements:, location:) + @lbrace = lbrace + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('END') + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { type: :END, lbrace: lbrace, stmts: statements, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_END: (Statements statements) -> ENDBlock + def on_END(statements) + lbrace = find_token(LBrace) + rbrace = find_token(RBrace) + + statements.bind( + find_next_statement_start(lbrace.location.end_char), + rbrace.location.start_char + ) + + keyword = find_token(Kw, 'END') + + ENDBlock.new( + lbrace: lbrace, + statements: statements, + location: keyword.location.to(rbrace.location) + ) + end + + # EndContent represents the use of __END__ syntax, which allows individual + # scripts to keep content after the main ruby code that can be read through + # the DATA constant. + # + # puts DATA.read + # + # __END__ + # some other content that is not executed by the program + # + class EndContent + # [String] the content after the script + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('__end__') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :__end__, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on___end__: (String value) -> EndContent + def on___end__(value) + @__end__ = + EndContent.new( + value: lines[lineno..-1].join("\n"), + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + end + + # Alias represents the use of the +alias+ keyword with regular arguments (not + # global variables). The +alias+ keyword is used to make a method respond to + # another name as well as the current one. + # + # alias aliased_name name + # + # For the example above, in the current context you can now call aliased_name + # and it will execute the name method. When you're aliasing two methods, you + # can either provide bare words (like the example above) or you can provide + # symbols (note that this includes dynamic symbols like + # :"left-#{middle}-right"). + class Alias + # [DynaSymbol | SymbolLiteral] the new name of the method + attr_reader :left + + # [DynaSymbol | SymbolLiteral] the old name of the method + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('alias') + q.breakable + q.pp(left) + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { type: :alias, left: left, right: right, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_alias: ( + # (DynaSymbol | SymbolLiteral) left, + # (DynaSymbol | SymbolLiteral) right + # ) -> Alias + def on_alias(left, right) + keyword = find_token(Kw, 'alias') + + Alias.new( + left: left, + right: right, + location: keyword.location.to(right.location) + ) + end + + # ARef represents when you're pulling a value out of a collection at a + # specific index. Put another way, it's any time you're calling the method + # #[]. + # + # collection[index] + # + # The nodes usually contains two children, the collection and the index. In + # some cases, you don't necessarily have the second child node, because you + # can call procs with a pretty esoteric syntax. In the following example, you + # wouldn't have a second child node: + # + # collection[] + # + class ARef + # [untyped] the value being indexed + attr_reader :collection + + # [nil | Args | ArgsAddBlock] the value being passed within the brackets + attr_reader :index + + # [Location] the location of this node + attr_reader :location + + def initialize(collection:, index:, location:) + @collection = collection + @index = index + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('aref') + q.breakable + q.pp(collection) + q.breakable + q.pp(index) + end + end + + def to_json(*opts) + { + type: :aref, + collection: collection, + index: index, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_aref: (untyped collection, (nil | Args | ArgsAddBlock) index) -> ARef + def on_aref(collection, index) + find_token(LBracket) + rbracket = find_token(RBracket) + + ARef.new( + collection: collection, + index: index, + location: collection.location.to(rbracket.location) + ) + end + + # ARefField represents assigning values into collections at specific indices. + # Put another way, it's any time you're calling the method #[]=. The + # ARefField node itself is just the left side of the assignment, and they're + # always wrapped in assign nodes. + # + # collection[index] = value + # + class ARefField + # [untyped] the value being indexed + attr_reader :collection + + # [nil | ArgsAddBlock] the value being passed within the brackets + attr_reader :index + + # [Location] the location of this node + attr_reader :location + + def initialize(collection:, index:, location:) + @collection = collection + @index = index + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('aref_field') + q.breakable + q.pp(collection) + q.breakable + q.pp(index) + end + end + + def to_json(*opts) + { + type: :aref_field, + collection: collection, + index: index, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_aref_field: ( + # untyped collection, + # (nil | ArgsAddBlock) index + # ) -> ARefField + def on_aref_field(collection, index) + find_token(LBracket) + rbracket = find_token(RBracket) + + ARefField.new( + collection: collection, + index: index, + location: collection.location.to(rbracket.location) + ) + end + + # def on_arg_ambiguous(value) + # value + # end + + # ArgParen represents wrapping arguments to a method inside a set of + # parentheses. + # + # method(argument) + # + # In the example above, there would be an ArgParen node around the + # ArgsAddBlock node that represents the set of arguments being sent to the + # method method. The argument child node can be +nil+ if no arguments were + # passed, as in: + # + # method() + # + class ArgParen + # [nil | Args | ArgsAddBlock | ArgsForward] the arguments inside the + # parentheses + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('arg_paren') + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :arg_paren, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_arg_paren: ( + # (nil | Args | ArgsAddBlock | ArgsForward) arguments + # ) -> ArgParen + def on_arg_paren(arguments) + lparen = find_token(LParen) + rparen = find_token(RParen) + + # If the arguments exceed the ending of the parentheses, then we know we + # have a heredoc in the arguments, and we need to use the bounds of the + # arguments to determine how large the arg_paren is. + ending = + if arguments && arguments.location.end_line > rparen.location.end_line + arguments + else + rparen + end + + ArgParen.new( + arguments: arguments, + location: lparen.location.to(ending.location) + ) + end + + # Args represents a list of arguments being passed to a method call or array + # literal. + # + # method(first, second, third) + # + class Args + # [Array[ untyped ]] the arguments that this node wraps + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('args') + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :args, parts: parts, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_args_add: (Args arguments, untyped argument) -> Args + def on_args_add(arguments, argument) + if arguments.parts.empty? + # If this is the first argument being passed into the list of arguments, + # then we're going to use the bounds of the argument to override the + # parent node's location since this will be more accurate. + Args.new(parts: [argument], location: argument.location) + else + # Otherwise we're going to update the existing list with the argument + # being added as well as the new end bounds. + Args.new( + parts: arguments.parts << argument, + location: arguments.location.to(argument.location) + ) + end + end + + # ArgsAddBlock represents a list of arguments and potentially a block + # argument. ArgsAddBlock is commonly seen being passed to any method where you + # use parentheses (wrapped in an ArgParen node). It’s also used to pass + # arguments to the various control-flow keywords like +return+. + # + # method(argument, &block) + # + class ArgsAddBlock + # [Args] the arguments before the optional block + attr_reader :arguments + + # [nil | untyped] the optional block argument + attr_reader :block + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, block:, location:) + @arguments = arguments + @block = block + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('args_add_block') + q.breakable + q.pp(arguments) + q.breakable + q.pp(block) + end + end + + def to_json(*opts) + { + type: :args_add_block, + args: arguments, + block: block, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_args_add_block: ( + # Args arguments, + # (false | untyped) block + # ) -> ArgsAddBlock + def on_args_add_block(arguments, block) + ending = block || arguments + + ArgsAddBlock.new( + arguments: arguments, + block: block || nil, + location: arguments.location.to(ending.location) + ) + end + + # Star represents using a splat operator on an expression. + # + # method(*arguments) + # + class ArgStar + # [untyped] the expression being splatted + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('arg_star') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :arg_star, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_args_add_star: (Args arguments, untyped star) -> Args + def on_args_add_star(arguments, argument) + beginning = find_token(Op, '*') + ending = argument || beginning + + location = + if arguments.parts.empty? + ending.location + else + arguments.location.to(ending.location) + end + + arg_star = + ArgStar.new( + value: argument, + location: beginning.location.to(ending.location) + ) + + Args.new(parts: arguments.parts << arg_star, location: location) + end + + # ArgsForward represents forwarding all kinds of arguments onto another method + # call. + # + # def request(method, path, **headers, &block); end + # + # def get(...) + # request(:GET, ...) + # end + # + # def post(...) + # request(:POST, ...) + # end + # + # In the example above, both the get and post methods are forwarding all of + # their arguments (positional, keyword, and block) on to the request method. + # The ArgsForward node appears in both the caller (the request method calls) + # and the callee (the get and post definitions). + class ArgsForward + # [String] the value of the operator + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('args_forward') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :args_forward, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_args_forward: () -> ArgsForward + def on_args_forward + op = find_token(Op, '...') + + ArgsForward.new(value: op.value, location: op.location) + end + + # :call-seq: + # on_args_new: () -> Args + def on_args_new + Args.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + end + + # ArrayLiteral represents any form of an array literal, and contains myriad + # child nodes because of the special array literal syntax like %w and %i. + # + # [] + # [one, two, three] + # [*one_two_three] + # %i[one two three] + # %w[one two three] + # %I[one two three] + # %W[one two three] + # + # Every line in the example above produces an ArrayLiteral node. In order, the + # child contents node of this ArrayLiteral node would be nil, Args, QSymbols, + # QWords, Symbols, and Words. + class ArrayLiteral + # [nil | Args | QSymbols | QWords | Symbols | Words] the + # contents of the array + attr_reader :contents + + # [Location] the location of this node + attr_reader :location + + def initialize(contents:, location:) + @contents = contents + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('array') + q.breakable + q.pp(contents) + end + end + + def to_json(*opts) + { type: :array, cnts: contents, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_array: ( + # (nil | Args | QSymbols | QWords | Symbols | Words) contents + # ) -> ArrayLiteral + def on_array(contents) + if !contents || contents.is_a?(Args) + lbracket = find_token(LBracket) + rbracket = find_token(RBracket) + + ArrayLiteral.new( + contents: contents, + location: lbracket.location.to(rbracket.location) + ) + else + tstring_end = find_token(TStringEnd) + contents = + contents.class.new( + elements: contents.elements, + location: contents.location.to(tstring_end.location) + ) + + ArrayLiteral.new(contents: contents, location: contents.location) + end + end + + # AryPtn represents matching against an array pattern using the Ruby 2.7+ + # pattern matching syntax. It’s one of the more complicated nodes, because + # the four parameters that it accepts can almost all be nil. + # + # case [1, 2, 3] + # in [Integer, Integer] + # "matched" + # in Container[Integer, Integer] + # "matched" + # in [Integer, *, Integer] + # "matched" + # end + # + # An AryPtn node is created with four parameters: an optional constant + # wrapper, an array of positional matches, an optional splat with identifier, + # and an optional array of positional matches that occur after the splat. + # All of the in clauses above would create an AryPtn node. + class AryPtn + # [nil | VarRef] the optional constant wrapper + attr_reader :constant + + # [Array[ untyped ]] the regular positional arguments that this array + # pattern is matching against + attr_reader :requireds + + # [nil | VarField] the optional starred identifier that grabs up a list of + # positional arguments + attr_reader :rest + + # [Array[ untyped ]] the list of positional arguments occurring after the + # optional star if there is one + attr_reader :posts + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, requireds:, rest:, posts:, location:) + @constant = constant + @requireds = requireds + @rest = rest + @posts = posts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('aryptn') + + if constant + q.breakable + q.pp(constant) + end + + if requireds.any? + q.breakable + q.group(2, '(', ')') do + q.seplist(requireds) { |required| q.pp(required) } + end + end + + if rest + q.breakable + q.pp(rest) + end + + if posts.any? + q.breakable + q.group(2, '(', ')') { q.seplist(posts) { |post| q.pp(post) } } + end + end + end + + def to_json(*opts) + { + type: :aryptn, + constant: constant, + reqs: requireds, + rest: rest, + posts: posts, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_aryptn: ( + # (nil | VarRef) constant, + # (nil | Array[untyped]) requireds, + # (nil | VarField) rest, + # (nil | Array[untyped]) posts + # ) -> AryPtn + def on_aryptn(constant, requireds, rest, posts) + parts = [constant, *requireds, rest, *posts].compact + + AryPtn.new( + constant: constant, + requireds: requireds || [], + rest: rest, + posts: posts || [], + location: parts[0].location.to(parts[-1].location) + ) + end + + # Assign represents assigning something to a variable or constant. Generally, + # the left side of the assignment is going to be any node that ends with the + # name "Field". + # + # variable = value + # + class Assign + # [ARefField | ConstPathField | Field | TopConstField | VarField] the target + # to assign the result of the expression to + attr_reader :target + + # [untyped] the expression to be assigned + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, value:, location:) + @target = target + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('assign') + q.breakable + q.pp(target) + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :assign, target: target, value: value, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_assign: ( + # (ARefField | ConstPathField | Field | TopConstField | VarField) target, + # untyped value + # ) -> Assign + def on_assign(target, value) + Assign.new( + target: target, + value: value, + location: target.location.to(value.location) + ) + end + + # Assoc represents a key-value pair within a hash. It is a child node of + # either an AssocListFromArgs or a BareAssocHash. + # + # { key1: value1, key2: value2 } + # + # In the above example, the would be two AssocNew nodes. + class Assoc + # [untyped] the key of this pair + attr_reader :key + + # [untyped] the value of this pair + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(key:, value:, location:) + @key = key + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('assoc') + q.breakable + q.pp(key) + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :assoc, key: key, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_assoc_new: (untyped key, untyped value) -> Assoc + def on_assoc_new(key, value) + Assoc.new( + key: key, + value: value, + location: key.location.to(value.location) + ) + end + + # AssocSplat represents double-splatting a value into a hash (either a hash + # literal or a bare hash in a method call). + # + # { **pairs } + # + class AssocSplat + # [untyped] the expression that is being splatted + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('assoc_splat') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :assoc_splat, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_assoc_splat: (untyped value) -> AssocSplat + def on_assoc_splat(value) + operator = find_token(Op, '**') + + AssocSplat.new(value: value, location: operator.location.to(value.location)) + end + + # AssocListFromArgs represents the key-value pairs of a hash literal. Its + # parent node is always a hash. + # + # { key1: value1, key2: value2 } + # + class AssocListFromArgs + # [Array[ AssocNew | AssocSplat ]] + attr_reader :assocs + + # [Location] the location of this node + attr_reader :location + + def initialize(assocs:, location:) + @assocs = assocs + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('assoclist_from_args') + q.breakable + q.group(2, '(', ')') { q.seplist(assocs) { |assoc| q.pp(assoc) } } + end + end + + def to_json(*opts) + { type: :assoclist_from_args, assocs: assocs, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_assoclist_from_args: ( + # Array[AssocNew | AssocSplat] assocs + # ) -> AssocListFromArgs + def on_assoclist_from_args(assocs) + AssocListFromArgs.new( + assocs: assocs, + location: assocs[0].location.to(assocs[-1].location) + ) + end + + # Backref represents a global variable referencing a matched value. It comes + # in the form of a $ followed by a positive integer. + # + # $1 + # + class Backref + # [String] the name of the global backreference variable + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('backref') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :backref, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_backref: (String value) -> Backref + def on_backref(value) + node = + Backref.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Backtick represents the use of the ` operator. It's usually found being used + # for an XStringLiteral, but could also be found as the name of a method being + # defined. + class Backtick + # [String] the backtick in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('backtick') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :backtick, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_backtick: (String value) -> Backtick + def on_backtick(value) + node = + Backtick.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # BareAssocHash represents a hash of contents being passed as a method + # argument (and therefore has omitted braces). It's very similar to an + # AssocListFromArgs node. + # + # method(key1: value1, key2: value2) + # + class BareAssocHash + # [Array[ AssocNew | AssocSplat ]] + attr_reader :assocs + + # [Location] the location of this node + attr_reader :location + + def initialize(assocs:, location:) + @assocs = assocs + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('bare_assoc_hash') + q.breakable + q.group(2, '(', ')') { q.seplist(assocs) { |assoc| q.pp(assoc) } } + end + end + + def to_json(*opts) + { type: :bare_assoc_hash, assocs: assocs, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_bare_assoc_hash: (Array[AssocNew | AssocSplat] assocs) -> BareAssocHash + def on_bare_assoc_hash(assocs) + BareAssocHash.new( + assocs: assocs, + location: assocs[0].location.to(assocs[-1].location) + ) + end + + # Begin represents a begin..end chain. + # + # begin + # value + # end + # + class Begin + # [BodyStmt] the bodystmt that contains the contents of this begin block + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(bodystmt:, location:) + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('begin') + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { type: :begin, bodystmt: bodystmt, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_begin: (BodyStmt bodystmt) -> Begin + def on_begin(bodystmt) + keyword = find_token(Kw, 'begin') + end_char = + if bodystmt.rescue_clause || bodystmt.ensure_clause || + bodystmt.else_clause + bodystmt.location.end_char + else + find_token(Kw, 'end').location.end_char + end + + bodystmt.bind(keyword.location.end_char, end_char) + + Begin.new( + bodystmt: bodystmt, + location: keyword.location.to(bodystmt.location) + ) + end + + # Binary represents any expression that involves two sub-expressions with an + # operator in between. This can be something that looks like a mathematical + # operation: + # + # 1 + 1 + # + # but can also be something like pushing a value onto an array: + # + # array << value + # + class Binary + # [untyped] the left-hand side of the expression + attr_reader :left + + # [String] the operator used between the two expressions + attr_reader :operator + + # [untyped] the right-hand side of the expression + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, operator:, right:, location:) + @left = left + @operator = operator + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('binary') + q.breakable + q.pp(left) + q.breakable + q.text(operator) + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { + type: :binary, + left: left, + op: operator, + right: right, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_binary: (untyped left, (Op | Symbol) operator, untyped right) -> Binary + def on_binary(left, operator, right) + # On most Ruby implementations, operator is a Symbol that represents that + # operation being performed. For instance in the example `1 < 2`, the + # `operator` object would be `:<`. However, on JRuby, it's an `@op` node, + # so here we're going to explicitly convert it into the same normalized + # form. + operator = tokens.delete(operator).value unless operator.is_a?(Symbol) + + Binary.new( + left: left, + operator: operator, + right: right, + location: left.location.to(right.location) + ) + end + + # BlockVar represents the parameters being declared for a block. Effectively + # this node is everything contained within the pipes. This includes all of the + # various parameter types, as well as block-local variable declarations. + # + # method do |positional, optional = value, keyword:, █ local| + # end + # + class BlockVar + # [Params] the parameters being declared with the block + attr_reader :params + + # [Array[ Ident ]] the list of block-local variable declarations + attr_reader :locals + + # [Location] the location of this node + attr_reader :location + + def initialize(params:, locals:, location:) + @params = params + @locals = locals + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('block_var') + q.breakable + q.pp(params) + + if locals.any? + q.breakable + q.group(2, '(', ')') { q.seplist(locals) { |local| q.pp(local) } } + end + end + end + + def to_json(*opts) + { + type: :block_var, + params: params, + locals: locals, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar + def on_block_var(params, locals) + index = + tokens.rindex do |node| + node.is_a?(Op) && %w[| ||].include?(node.value) && + node.location.start_char < params.location.start_char + end + + beginning = tokens[index] + ending = tokens[-1] + + BlockVar.new( + params: params, + locals: locals || [], + location: beginning.location.to(ending.location) + ) + end + + # BlockArg represents declaring a block parameter on a method definition. + # + # def method(&block); end + # + class BlockArg + # [Ident] the name of the block argument + attr_reader :name + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, location:) + @name = name + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('blockarg') + q.breakable + q.pp(name) + end + end + + def to_json(*opts) + { type: :blockarg, name: name, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_blockarg: (Ident name) -> BlockArg + def on_blockarg(name) + operator = find_token(Op, '&') + + BlockArg.new(name: name, location: operator.location.to(name.location)) + end + + # bodystmt can't actually determine its bounds appropriately because it + # doesn't necessarily know where it started. So the parent node needs to + # report back down into this one where it goes. + class BodyStmt + # [Statements] the list of statements inside the begin clause + attr_reader :statements + + # [nil | Rescue] the optional rescue chain attached to the begin clause + attr_reader :rescue_clause + + # [nil | Statements] the optional set of statements inside the else clause + attr_reader :else_clause + + # [nil | Ensure] the optional ensure clause + attr_reader :ensure_clause + + # [Location] the location of this node + attr_reader :location + + def initialize( + statements:, + rescue_clause:, + else_clause:, + ensure_clause:, + location: + ) + @statements = statements + @rescue_clause = rescue_clause + @else_clause = else_clause + @ensure_clause = ensure_clause + @location = location + end + + def bind(start_char, end_char) + @location = + Location.new( + start_line: location.start_line, + start_char: start_char, + end_line: location.end_line, + end_char: end_char + ) + + parts = [rescue_clause, else_clause, ensure_clause] + + # Here we're going to determine the bounds for the statements + consequent = parts.compact.first + statements.bind( + start_char, + consequent ? consequent.location.start_char : end_char + ) + + # Next we're going to determine the rescue clause if there is one + if rescue_clause + consequent = parts.drop(1).compact.first + rescue_clause.bind_end( + consequent ? consequent.location.start_char : end_char + ) + end + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('bodystmt') + q.breakable + q.pp(statements) + + if rescue_clause + q.breakable + q.pp(rescue_clause) + end + + if else_clause + q.breakable + q.pp(else_clause) + end + + if ensure_clause + q.breakable + q.pp(ensure_clause) + end + end + end + + def to_json(*opts) + { + type: :bodystmt, + stmts: statements, + rsc: rescue_clause, + els: else_clause, + ens: ensure_clause, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_bodystmt: ( + # Statements statements, + # (nil | Rescue) rescue_clause, + # (nil | Statements) else_clause, + # (nil | Ensure) ensure_clause + # ) -> BodyStmt + def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) + BodyStmt.new( + statements: statements, + rescue_clause: rescue_clause, + else_clause: else_clause, + ensure_clause: ensure_clause, + location: Location.fixed(line: lineno, char: char_pos) + ) + end + + # BraceBlock represents passing a block to a method call using the { } + # operators. + # + # method { |variable| variable + 1 } + # + class BraceBlock + # [LBrace] the left brace that opens this block + attr_reader :lbrace + + # [nil | BlockVar] the optional set of parameters to the block + attr_reader :block_var + + # [Statements] the list of expressions to evaluate within the block + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(lbrace:, block_var:, statements:, location:) + @lbrace = lbrace + @block_var = block_var + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('brace_block') + + if block_var + q.breakable + q.pp(block_var) + end + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :brace_block, + lbrace: lbrace, + block_var: block_var, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_brace_block: ( + # (nil | BlockVar) block_var, + # Statements statements + # ) -> BraceBlock + def on_brace_block(block_var, statements) + lbrace = find_token(LBrace) + rbrace = find_token(RBrace) + + statements.bind( + find_next_statement_start((block_var || lbrace).location.end_char), + rbrace.location.start_char + ) + + location = + Location.new( + start_line: lbrace.location.start_line, + start_char: lbrace.location.start_char, + end_line: [rbrace.location.end_line, statements.location.end_line].max, + end_char: rbrace.location.end_char + ) + + BraceBlock.new( + lbrace: lbrace, + block_var: block_var, + statements: statements, + location: location + ) + end + + # Break represents using the +break+ keyword. + # + # break + # + # It can also optionally accept arguments, as in: + # + # break 1 + # + class Break + # [Args | ArgsAddBlock] the arguments being sent to the keyword + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('break') + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :break, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_break: ((Args | ArgsAddBlock) arguments) -> Break + def on_break(arguments) + keyword = find_token(Kw, 'break') + + location = keyword.location + location = location.to(arguments.location) unless arguments.is_a?(Args) + + Break.new(arguments: arguments, location: location) + end + + # Call represents a method call. This node doesn't contain the arguments being + # passed (if arguments are passed, this node will get nested under a + # MethodAddArg node). + # + # receiver.message + # + class Call + # [untyped] the receiver of the method call + attr_reader :receiver + + # [:"::" | Op | Period] the operator being used to send the message + attr_reader :operator + + # [:call | Backtick | Const | Ident | Op] the message being sent + attr_reader :message + + # [Location] the location of this node + attr_reader :location + + def initialize(receiver:, operator:, message:, location:) + @receiver = receiver + @operator = operator + @message = message + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('call') + q.breakable + q.pp(receiver) + q.breakable + q.pp(operator) + q.breakable + q.pp(message) + end + end + + def to_json(*opts) + { + type: :call, + receiver: receiver, + op: operator, + message: message, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_call: ( + # untyped receiver, + # (:"::" | Op | Period) operator, + # (:call | Backtick | Const | Ident | Op) message + # ) -> Call + def on_call(receiver, operator, message) + ending = message + ending = operator if message == :call + + Call.new( + receiver: receiver, + operator: operator, + message: message, + location: + Location.new( + start_line: receiver.location.start_line, + start_char: receiver.location.start_char, + end_line: [ending.location.end_line, receiver.location.end_line].max, + end_char: ending.location.end_char + ) + ) + end + + # Case represents the beginning of a case chain. + # + # case value + # when 1 + # "one" + # when 2 + # "two" + # else + # "number" + # end + # + class Case + # [nil | untyped] optional value being switched on + attr_reader :value + + # [In | When] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, consequent:, location:) + @value = value + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('case') + + if value + q.breakable + q.pp(value) + end + + q.breakable + q.pp(consequent) + end + end + + def to_json(*opts) + { type: :case, value: value, cons: consequent, loc: location }.to_json( + *opts + ) + end + end + + # RAssign represents a single-line pattern match. + # + # value in pattern + # value => pattern + # + class RAssign + # [untyped] the left-hand expression + attr_reader :value + + # [Kw | Op] the operator being used to match against the pattern, which is + # either => or in + attr_reader :operator + + # [untyped] the pattern on the right-hand side of the expression + attr_reader :pattern + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, operator:, pattern:, location:) + @value = value + @operator = operator + @pattern = pattern + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rassign') + + q.breakable + q.pp(value) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(pattern) + end + end + + def to_json(*opts) + { + type: :rassign, + value: value, + op: operator, + pattern: pattern, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_case: (untyped value, untyped consequent) -> Case | RAssign + def on_case(value, consequent) + if keyword = find_token(Kw, 'case', consume: false) + tokens.delete(keyword) + + Case.new( + value: value, + consequent: consequent, + location: keyword.location.to(consequent.location) + ) + else + operator = find_token(Kw, 'in', consume: false) || find_token(Op, '=>') + + RAssign.new( + value: value, + operator: operator, + pattern: consequent, + location: value.location.to(consequent.location) + ) + end + end + + # Class represents defining a class using the +class+ keyword. + # + # class Container + # end + # + # Classes can have path names as their class name in case it's being nested + # under a namespace, as in: + # + # class Namespace::Container + # end + # + # Classes can also be defined as a top-level path, in the case that it's + # already in a namespace but you want to define it at the top-level instead, + # as in: + # + # module OtherNamespace + # class ::Namespace::Container + # end + # end + # + # All of these declarations can also have an optional superclass reference, as + # in: + # + # class Child < Parent + # end + # + # That superclass can actually be any Ruby expression, it doesn't necessarily + # need to be a constant, as in: + # + # class Child < method + # end + # + class ClassDeclaration + # [ConstPathRef | ConstRef | TopConstRef] the name of the class being + # defined + attr_reader :constant + + # [nil | untyped] the optional superclass declaration + attr_reader :superclass + + # [BodyStmt] the expressions to execute within the context of the class + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, superclass:, bodystmt:, location:) + @constant = constant + @superclass = superclass + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('class') + + q.breakable + q.pp(constant) + + if superclass + q.breakable + q.pp(superclass) + end + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :class, + constant: constant, + superclass: superclass, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_class: ( + # (ConstPathRef | ConstRef | TopConstRef) constant, + # untyped superclass, + # BodyStmt bodystmt + # ) -> ClassDeclaration + def on_class(constant, superclass, bodystmt) + beginning = find_token(Kw, 'class') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start((superclass || constant).location.end_char), + ending.location.start_char + ) + + ClassDeclaration.new( + constant: constant, + superclass: superclass, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # Comma represents the use of the , operator. + class Comma + # [String] the comma in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_comma: (String value) -> Comma + def on_comma(value) + node = + Comma.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Command represents a method call with arguments and no parentheses. Note + # that Command nodes only happen when there is no explicit receiver for this + # method. + # + # method argument + # + class Command + # [Const | Ident] the message being sent to the implicit receiver + attr_reader :message + + # [Args | ArgsAddBlock] the arguments being sent with the message + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(message:, arguments:, location:) + @message = message + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('command') + + q.breakable + q.pp(message) + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { + type: :command, + message: message, + args: arguments, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_command: ( + # (Const | Ident) message, + # (Args | ArgsAddBlock) arguments + # ) -> Command + def on_command(message, arguments) + Command.new( + message: message, + arguments: arguments, + location: message.location.to(arguments.location) + ) + end + + # CommandCall represents a method call on an object with arguments and no + # parentheses. + # + # object.method argument + # + class CommandCall + # [untyped] the receiver of the message + attr_reader :receiver + + # [:"::" | Op | Period] the operator used to send the message + attr_reader :operator + + # [Const | Ident | Op] the message being send + attr_reader :message + + # [Args | ArgsAddBlock] the arguments going along with the message + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(receiver:, operator:, message:, arguments:, location:) + @receiver = receiver + @operator = operator + @message = message + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('command_call') + + q.breakable + q.pp(receiver) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(message) + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { + type: :command_call, + receiver: receiver, + op: operator, + message: message, + args: arguments, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_command_call: ( + # untyped receiver, + # (:"::" | Op | Period) operator, + # (Const | Ident | Op) message, + # (Args | ArgsAddBlock) arguments + # ) -> CommandCall + def on_command_call(receiver, operator, message, arguments) + ending = arguments || message + + CommandCall.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + location: receiver.location.to(ending.location) + ) + end + + # Comment represents a comment in the source. + # + # # comment + # + class Comment + # [String] the contents of the comment + attr_reader :value + + # [boolean] whether or not there is code on the same line as this comment. + # If there is, then inline will be true. + attr_reader :inline + alias inline? inline + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, inline:, location:) + @value = value + @inline = inline + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('comment') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :comment, + value: value.force_encoding('UTF-8'), + inline: inline, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_comment: (String value) -> Comment + def on_comment(value) + line = lineno + comment = + Comment.new( + value: value[1..-1].chomp, + inline: value.strip != lines[line - 1], + location: + Location.token(line: line, char: char_pos, size: value.size - 1) + ) + + @comments << comment + comment + end + + # Const represents a literal value that _looks_ like a constant. This could + # actually be a reference to a constant: + # + # Constant + # + # It could also be something that looks like a constant in another context, as + # in a method call to a capitalized method: + # + # object.Constant + # + # or a symbol that starts with a capital letter: + # + # :Constant + # + class Const + # [String] the name of the constant + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('const') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :const, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_const: (String value) -> Const + def on_const(value) + node = + Const.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # ConstPathField represents the child node of some kind of assignment. It + # represents when you're assigning to a constant that is being referenced as + # a child of another variable. + # + # object::Const = value + # + class ConstPathField + # [untyped] the source of the constant + attr_reader :parent + + # [Const] the constant itself + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(parent:, constant:, location:) + @parent = parent + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('const_path_field') + + q.breakable + q.pp(parent) + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { + type: :const_path_field, + parent: parent, + constant: constant, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_const_path_field: (untyped parent, Const constant) -> ConstPathField + def on_const_path_field(parent, constant) + ConstPathField.new( + parent: parent, + constant: constant, + location: parent.location.to(constant.location) + ) + end + + # ConstPathRef represents referencing a constant by a path. + # + # object::Const + # + class ConstPathRef + # [untyped] the source of the constant + attr_reader :parent + + # [Const] the constant itself + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(parent:, constant:, location:) + @parent = parent + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('const_path_ref') + + q.breakable + q.pp(parent) + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { + type: :const_path_ref, + parent: parent, + constant: constant, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_const_path_ref: (untyped parent, Const constant) -> ConstPathRef + def on_const_path_ref(parent, constant) + ConstPathRef.new( + parent: parent, + constant: constant, + location: parent.location.to(constant.location) + ) + end + + # ConstRef represents the name of the constant being used in a class or module + # declaration. + # + # class Container + # end + # + class ConstRef + # [Const] the constant itself + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, location:) + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('const_ref') + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { type: :const_ref, constant: constant, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_const_ref: (Const constant) -> ConstRef + def on_const_ref(constant) + ConstRef.new(constant: constant, location: constant.location) + end + + # CVar represents the use of a class variable. + # + # @@variable + # + class CVar + # [String] the name of the class variable + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('cvar') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :cvar, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_cvar: (String value) -> CVar + def on_cvar(value) + node = + CVar.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Def represents defining a regular method on the current self object. + # + # def method(param) result end + # + class Def + # [Backtick | Const | Ident | Kw | Op] the name of the method + attr_reader :name + + # [Params | Paren] the parameter declaration for the method + attr_reader :params + + # [BodyStmt] the expressions to be executed by the method + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, params:, bodystmt:, location:) + @name = name + @params = params + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('def') + + q.breakable + q.pp(name) + + q.breakable + q.pp(params) + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :def, + name: name, + params: params, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # DefEndless represents defining a single-line method since Ruby 3.0+. + # + # def method = result + # + class DefEndless + # [Backtick | Const | Ident | Kw | Op] the name of the method + attr_reader :name + + # [Paren] the parameter declaration for the method + attr_reader :paren + + # [untyped] the expression to be executed by the method + attr_reader :statement + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, paren:, statement:, location:) + @name = name + @paren = paren + @statement = statement + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('def_endless') + + q.breakable + q.pp(name) + + q.breakable + q.pp(paren) + + q.breakable + q.pp(statement) + end + end + + def to_json(*opts) + { + type: :def_endless, + name: name, + paren: paren, + stmt: statement, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_def: ( + # (Backtick | Const | Ident | Kw | Op) name, + # (Params | Paren) params, + # untyped bodystmt + # ) -> Def | DefEndless + def on_def(name, params, bodystmt) + # Make sure to delete this token in case you're defining something like def + # class which would lead to this being a kw and causing all kinds of trouble + tokens.delete(name) + + # Find the beginning of the method definition, which works for single-line + # and normal method definitions. + beginning = find_token(Kw, 'def') + + # If we don't have a bodystmt node, then we have a single-line method + unless bodystmt.is_a?(BodyStmt) + node = + DefEndless.new( + name: name, + paren: params, + statement: bodystmt, + location: beginning.location.to(bodystmt.location) + ) + + return node + end + + # If there aren't any params then we need to correct the params node + # location information + if params.is_a?(Params) && params.empty? + end_char = name.location.end_char + location = + Location.new( + start_line: params.location.start_line, + start_char: end_char, + end_line: params.location.end_line, + end_char: end_char + ) + + params = Params.new(location: location) + end + + ending = find_token(Kw, 'end') + bodystmt.bind( + find_next_statement_start(params.location.end_char), + ending.location.start_char + ) + + Def.new( + name: name, + params: params, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # Defined represents the use of the +defined?+ operator. It can be used with + # and without parentheses. + # + # defined?(variable) + # + class Defined + # [untyped] the value being sent to the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('defined') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :defined, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_defined: (untyped value) -> Defined + def on_defined(value) + beginning = find_token(Kw, 'defined?') + ending = value + + range = beginning.location.end_char...value.location.start_char + if source[range].include?('(') + find_token(LParen) + ending = find_token(RParen) + end + + Defined.new(value: value, location: beginning.location.to(ending.location)) + end + + # Defs represents defining a singleton method on an object. + # + # def object.method(param) result end + # + class Defs + # [untyped] the target where the method is being defined + attr_reader :target + + # [Op | Period] the operator being used to declare the method + attr_reader :operator + + # [Backtick | Const | Ident | Kw | Op] the name of the method + attr_reader :name + + # [Params | Paren] the parameter declaration for the method + attr_reader :params + + # [BodyStmt] the expressions to be executed by the method + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, operator:, name:, params:, bodystmt:, location:) + @target = target + @operator = operator + @name = name + @params = params + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('defs') + + q.breakable + q.pp(target) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(name) + + q.breakable + q.pp(params) + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :defs, + target: target, + op: operator, + name: name, + params: params, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_defs: ( + # untyped target, + # (Op | Period) operator, + # (Backtick | Const | Ident | Kw | Op) name, + # (Params | Paren) params, + # BodyStmt bodystmt + # ) -> Defs + def on_defs(target, operator, name, params, bodystmt) + # Make sure to delete this token in case you're defining something + # like def class which would lead to this being a kw and causing all kinds + # of trouble + tokens.delete(name) + + # If there aren't any params then we need to correct the params node + # location information + if params.is_a?(Params) && params.empty? + end_char = name.location.end_char + location = + Location.new( + start_line: params.location.start_line, + start_char: end_char, + end_line: params.location.end_line, + end_char: end_char + ) + + params = Params.new(location: location) + end + + beginning = find_token(Kw, 'def') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start(params.location.end_char), + ending.location.start_char + ) + + Defs.new( + target: target, + operator: operator, + name: name, + params: params, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # DoBlock represents passing a block to a method call using the +do+ and +end+ + # keywords. + # + # method do |value| + # end + # + class DoBlock + # [Kw] the do keyword that opens this block + attr_reader :keyword + + # [nil | BlockVar] the optional variable declaration within this block + attr_reader :block_var + + # [BodyStmt] the expressions to be executed within this block + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(keyword:, block_var:, bodystmt:, location:) + @keyword = keyword + @block_var = block_var + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('do_block') + + if block_var + q.breakable + q.pp(block_var) + end + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :do_block, + keyword: keyword, + block_var: block_var, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_do_block: (BlockVar block_var, BodyStmt bodystmt) -> DoBlock + def on_do_block(block_var, bodystmt) + beginning = find_token(Kw, 'do') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start((block_var || beginning).location.end_char), + ending.location.start_char + ) + + DoBlock.new( + keyword: beginning, + block_var: block_var, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # Dot2 represents using the .. operator between two expressions. Usually this + # is to create a range object. + # + # 1..2 + # + # Sometimes this operator is used to create a flip-flop. + # + # if value == 5 .. value == 10 + # end + # + # One of the sides of the expression may be nil, but not both. + class Dot2 + # [nil | untyped] the left side of the expression + attr_reader :left + + # [nil | untyped] the right side of the expression + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('dot2') + + if left + q.breakable + q.pp(left) + end + + if right + q.breakable + q.pp(right) + end + end + end + + def to_json(*opts) + { type: :dot2, left: left, right: right, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_dot2: ((nil | untyped) left, (nil | untyped) right) -> Dot2 + def on_dot2(left, right) + operator = find_token(Op, '..') + + beginning = left || operator + ending = right || operator + + Dot2.new( + left: left, + right: right, + location: beginning.location.to(ending.location) + ) + end + + # Dot3 represents using the ... operator between two expressions. Usually this + # is to create a range object. It's effectively the same event as the Dot2 + # node but with this operator you're asking Ruby to omit the final value. + # + # 1...2 + # + # Like Dot2 it can also be used to create a flip-flop. + # + # if value == 5 ... value == 10 + # end + # + # One of the sides of the expression may be nil, but not both. + class Dot3 + # [nil | untyped] the left side of the expression + attr_reader :left + + # [nil | untyped] the right side of the expression + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('dot3') + + if left + q.breakable + q.pp(left) + end + + if right + q.breakable + q.pp(right) + end + end + end + + def to_json(*opts) + { type: :dot3, left: left, right: right, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_dot3: ((nil | untyped) left, (nil | untyped) right) -> Dot3 + def on_dot3(left, right) + operator = find_token(Op, '...') + + beginning = left || operator + ending = right || operator + + Dot3.new( + left: left, + right: right, + location: beginning.location.to(ending.location) + ) + end + + # DynaSymbol represents a symbol literal that uses quotes to dynamically + # define its value. + # + # :"#{variable}" + # + # They can also be used as a special kind of dynamic hash key, as in: + # + # { "#{key}": value } + # + class DynaSymbol + # [Array[ StringDVar | StringEmbExpr | TStringContent ]] the parts of the + # dynamic symbol + attr_reader :parts + + # [String] the quote used to delimit the dynamic symbol + attr_reader :quote + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, quote:, location:) + @parts = parts + @quote = quote + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('dyna_symbol') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :dyna_symbol, parts: parts, quote: quote, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_dyna_symbol: (StringContent string_content) -> DynaSymbol + def on_dyna_symbol(string_content) + if find_token(SymBeg, consume: false) + # A normal dynamic symbol + symbeg = find_token(SymBeg) + tstring_end = find_token(TStringEnd) + + DynaSymbol.new( + quote: symbeg.value, + parts: string_content.parts, + location: symbeg.location.to(tstring_end.location) + ) + else + # A dynamic symbol as a hash key + tstring_beg = find_token(TStringBeg) + label_end = find_token(LabelEnd) + + DynaSymbol.new( + parts: string_content.parts, + quote: label_end.value[0], + location: tstring_beg.location.to(label_end.location) + ) + end + end + + # Else represents the end of an +if+, +unless+, or +case+ chain. + # + # if variable + # else + # end + # + class Else + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(statements:, location:) + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('else') + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { type: :else, stmts: statements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_else: (Statements statements) -> Else + def on_else(statements) + beginning = find_token(Kw, 'else') + + # else can either end with an end keyword (in which case we'll want to + # consume that event) or it can end with an ensure keyword (in which case + # we'll leave that to the ensure to handle). + index = + tokens.rindex do |token| + token.is_a?(Kw) && %w[end ensure].include?(token.value) + end + + node = tokens[index] + ending = node.value == 'end' ? tokens.delete_at(index) : node + + statements.bind(beginning.location.end_char, ending.location.start_char) + + Else.new( + statements: statements, + location: beginning.location.to(ending.location) + ) + end + + # Elsif represents another clause in an +if+ or +unless+ chain. + # + # if variable + # elsif other_variable + # end + # + class Elsif + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [nil | Elsif | Else] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, consequent:, location:) + @predicate = predicate + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('elsif') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :elsif, + pred: predicate, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_elsif: ( + # untyped predicate, + # Statements statements, + # (nil | Elsif | Else) consequent + # ) -> Elsif + def on_elsif(predicate, statements, consequent) + beginning = find_token(Kw, 'elsif') + ending = consequent || find_token(Kw, 'end') + + statements.bind(predicate.location.end_char, ending.location.start_char) + + Elsif.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # EmbDoc represents a multi-line comment. + # + # =begin + # first line + # second line + # =end + # + class EmbDoc + # [String] the contents of the comment + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def inline? + false + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('embdoc') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :embdoc, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_embdoc: (String value) -> EmbDoc + def on_embdoc(value) + @embdoc.value << value + @embdoc + end + + # :call-seq: + # on_embdoc_beg: (String value) -> EmbDoc + def on_embdoc_beg(value) + @embdoc = + EmbDoc.new( + value: value, + location: Location.fixed(line: lineno, char: char_pos) + ) + end + + # :call-seq: + # on_embdoc_end: (String value) -> EmbDoc + def on_embdoc_end(value) + location = @embdoc.location + embdoc = + EmbDoc.new( + value: @embdoc.value << value.chomp, + location: + Location.new( + start_line: location.start_line, + start_char: location.start_char, + end_line: lineno, + end_char: char_pos + value.length - 1 + ) + ) + + @comments << embdoc + @embdoc = nil + + embdoc + end + + # EmbExprBeg represents the beginning token for using interpolation inside of + # a parent node that accepts string content (like a string or regular + # expression). + # + # "Hello, #{person}!" + # + class EmbExprBeg + # [String] the #{ used in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_embexpr_beg: (String value) -> EmbExprBeg + def on_embexpr_beg(value) + node = + EmbExprBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # EmbExprEnd represents the ending token for using interpolation inside of a + # parent node that accepts string content (like a string or regular + # expression). + # + # "Hello, #{person}!" + # + class EmbExprEnd + # [String] the } used in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_embexpr_end: (String value) -> EmbExprEnd + def on_embexpr_end(value) + node = + EmbExprEnd.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # EmbVar represents the use of shorthand interpolation for an instance, class, + # or global variable into a parent node that accepts string content (like a + # string or regular expression). + # + # "#@variable" + # + # In the example above, an EmbVar node represents the # because it forces + # @variable to be interpolated. + class EmbVar + # [String] the # used in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_embvar: (String value) -> EmbVar + def on_embvar(value) + node = + EmbVar.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Ensure represents the use of the +ensure+ keyword and its subsequent + # statements. + # + # begin + # ensure + # end + # + class Ensure + # [Kw] the ensure keyword that began this node + attr_reader :keyword + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(keyword:, statements:, location:) + @keyword = keyword + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('ensure') + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :ensure, + keyword: keyword, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_ensure: (Statements statements) -> Ensure + def on_ensure(statements) + keyword = find_token(Kw, 'ensure') + + # We don't want to consume the :@kw event, because that would break + # def..ensure..end chains. + ending = find_token(Kw, 'end', consume: false) + statements.bind( + find_next_statement_start(keyword.location.end_char), + ending.location.start_char + ) + + Ensure.new( + keyword: keyword, + statements: statements, + location: keyword.location.to(ending.location) + ) + end + + # ExcessedComma represents a trailing comma in a list of block parameters. It + # changes the block parameters such that they will destructure. + # + # [[1, 2, 3], [2, 3, 4]].each do |first, second,| + # end + # + # In the above example, an ExcessedComma node would appear in the third + # position of the Params node that is used to declare that block. The third + # position typically represents a rest-type parameter, but in this case is + # used to indicate that a trailing comma was used. + class ExcessedComma + # [String] the comma + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('excessed_comma') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :excessed_comma, value: value, loc: location }.to_json(*opts) + end + end + + # The handler for this event accepts no parameters (though in previous + # versions of Ruby it accepted a string literal with a value of ","). + # + # :call-seq: + # on_excessed_comma: () -> ExcessedComma + def on_excessed_comma(*) + comma = find_token(Comma) + + ExcessedComma.new(value: comma.value, location: comma.location) + end + + # FCall represents the piece of a method call that comes before any arguments + # (i.e., just the name of the method). It is used in places where the parser + # is sure that it is a method call and not potentially a local variable. + # + # method(argument) + # + # In the above example, it's referring to the +method+ segment. + class FCall + # [Const | Ident] the name of the method + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('fcall') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :fcall, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_fcall: ((Const | Ident) value) -> FCall + def on_fcall(value) + FCall.new(value: value, location: value.location) + end + + # Field is always the child of an assignment. It represents assigning to a + # “field” on an object. + # + # object.variable = value + # + class Field + # [untyped] the parent object that owns the field being assigned + attr_reader :parent + + # [:"::" | Op | Period] the operator being used for the assignment + attr_reader :operator + + # [Const | Ident] the name of the field being assigned + attr_reader :name + + # [Location] the location of this node + attr_reader :location + + def initialize(parent:, operator:, name:, location:) + @parent = parent + @operator = operator + @name = name + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('field') + + q.breakable + q.pp(parent) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(name) + end + end + + def to_json(*opts) + { + type: :field, + parent: parent, + op: operator, + name: name, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_field: ( + # untyped parent, + # (:"::" | Op | Period) operator + # (Const | Ident) name + # ) -> Field + def on_field(parent, operator, name) + Field.new( + parent: parent, + operator: operator, + name: name, + location: parent.location.to(name.location) + ) + end + + # FloatLiteral represents a floating point number literal. + # + # 1.0 + # + class FloatLiteral + # [String] the value of the floating point number literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('float') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :float, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_float: (String value) -> FloatLiteral + def on_float(value) + node = + FloatLiteral.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # FndPtn represents matching against a pattern where you find a pattern in an + # array using the Ruby 3.0+ pattern matching syntax. + # + # case value + # in [*, 7, *] + # end + # + class FndPtn + # [nil | untyped] the optional constant wrapper + attr_reader :constant + + # [VarField] the splat on the left-hand side + attr_reader :left + + # [Array[ untyped ]] the list of positional expressions in the pattern that + # are being matched + attr_reader :values + + # [VarField] the splat on the right-hand side + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, left:, values:, right:, location:) + @constant = constant + @left = left + @values = values + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('fndptn') + + if constant + q.breakable + q.pp(constant) + end + + q.breakable + q.pp(left) + + q.breakable + q.group(2, '(', ')') { q.seplist(values) { |value| q.pp(value) } } + + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { + type: :fndptn, + constant: constant, + left: left, + values: values, + right: right, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_fndptn: ( + # (nil | untyped) constant, + # VarField left, + # Array[untyped] values, + # VarField right + # ) -> FndPtn + def on_fndptn(constant, left, values, right) + beginning = constant || find_token(LBracket) + ending = find_token(RBracket) + + FndPtn.new( + constant: constant, + left: left, + values: values, + right: right, + location: beginning.location.to(ending.location) + ) + end + + # For represents using a +for+ loop. + # + # for value in list do + # end + # + class For + # [MLHS | MLHSAddStar | VarField] the variable declaration being used to + # pull values out of the object being enumerated + attr_reader :index + + # [untyped] the object being enumerated in the loop + attr_reader :collection + + # [Statements] the statements to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(index:, collection:, statements:, location:) + @index = index + @collection = collection + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('for') + + q.breakable + q.pp(index) + + q.breakable + q.pp(collection) + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :for, + index: index, + collection: collection, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_for: ( + # (MLHS | MLHSAddStar | VarField) value, + # untyped collection, + # Statements statements + # ) -> For + def on_for(index, collection, statements) + beginning = find_token(Kw, 'for') + ending = find_token(Kw, 'end') + + # Consume the do keyword if it exists so that it doesn't get confused for + # some other block + keyword = find_token(Kw, 'do', consume: false) + if keyword && keyword.location.start_char > collection.location.end_char && + keyword.location.end_char < ending.location.start_char + tokens.delete(keyword) + end + + statements.bind( + (keyword || collection).location.end_char, + ending.location.start_char + ) + + For.new( + index: index, + collection: collection, + statements: statements, + location: beginning.location.to(ending.location) + ) + end + + # GVar represents a global variable literal. + # + # $variable + # + class GVar + # [String] the name of the global variable + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('gvar') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :gvar, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_gvar: (String value) -> GVar + def on_gvar(value) + node = + GVar.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # HashLiteral represents a hash literal. + # + # { key => value } + # + class HashLiteral + # [nil | AssocListFromArgs] the contents of the hash + attr_reader :contents + + # [Location] the location of this node + attr_reader :location + + def initialize(contents:, location:) + @contents = contents + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('hash') + + q.breakable + q.pp(contents) + end + end + + def to_json(*opts) + { type: :hash, cnts: contents, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_hash: ((nil | AssocListFromArgs) contents) -> HashLiteral + def on_hash(contents) + lbrace = find_token(LBrace) + rbrace = find_token(RBrace) + + if contents + # Here we're going to expand out the location information for the contents + # node so that it can grab up any remaining comments inside the hash. + location = + Location.new( + start_line: contents.location.start_line, + start_char: lbrace.location.end_char, + end_line: contents.location.end_line, + end_char: rbrace.location.start_char + ) + + contents = contents.class.new(assocs: contents.assocs, location: location) + end + + HashLiteral.new( + contents: contents, + location: lbrace.location.to(rbrace.location) + ) + end + + # Heredoc represents a heredoc string literal. + # + # <<~DOC + # contents + # DOC + # + class Heredoc + # [HeredocBeg] the opening of the heredoc + attr_reader :beginning + + # [String] the ending of the heredoc + attr_reader :ending + + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # heredoc string literal + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(beginning:, ending: nil, parts: [], location:) + @beginning = beginning + @ending = ending + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('heredoc') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { + type: :heredoc, + beging: beginning, + ending: ending, + parts: parts, + loc: location + }.to_json(*opts) + end + end + + # HeredocBeg represents the beginning declaration of a heredoc. + # + # <<~DOC + # contents + # DOC + # + # In the example above the HeredocBeg node represents <<~DOC. + class HeredocBeg + # [String] the opening declaration of the heredoc + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('heredoc_beg') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :heredoc_beg, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_heredoc_beg: (String value) -> HeredocBeg + def on_heredoc_beg(value) + location = + Location.token(line: lineno, char: char_pos, size: value.size + 1) + + # Here we're going to artificially create an extra node type so that if + # there are comments after the declaration of a heredoc, they get printed. + beginning = HeredocBeg.new(value: value, location: location) + @heredocs << Heredoc.new(beginning: beginning, location: location) + + beginning + end + + # :call-seq: + # on_heredoc_dedent: (StringContent string, Integer width) -> Heredoc + def on_heredoc_dedent(string, width) + heredoc = @heredocs[-1] + + @heredocs[-1] = + Heredoc.new( + beginning: heredoc.beginning, + ending: heredoc.ending, + parts: string.parts, + location: heredoc.location + ) + end + + # :call-seq: + # on_heredoc_end: (String value) -> Heredoc + def on_heredoc_end(value) + heredoc = @heredocs[-1] + + @heredocs[-1] = + Heredoc.new( + beginning: heredoc.beginning, + ending: value.chomp, + parts: heredoc.parts, + location: + Location.new( + start_line: heredoc.location.start_line, + start_char: heredoc.location.start_char, + end_line: lineno, + end_char: char_pos + ) + ) + end + + # HshPtn represents matching against a hash pattern using the Ruby 2.7+ + # pattern matching syntax. + # + # case value + # in { key: } + # end + # + class HshPtn + # [nil | untyped] the optional constant wrapper + attr_reader :constant + + # [Array[ [Label, untyped] ]] the set of tuples representing the keywords + # that should be matched against in the pattern + attr_reader :keywords + + # [nil | VarField] an optional parameter to gather up all remaining keywords + attr_reader :keyword_rest + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, keywords:, keyword_rest:, location:) + @constant = constant + @keywords = keywords + @keyword_rest = keyword_rest + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('hshptn') + + if constant + q.breakable + q.pp(constant) + end + + if keywords.any? + q.breakable + q.group(2, '(', ')') do + q.seplist(keywords) { |keyword| q.pp(keyword) } + end + end + + if keyword_rest + q.breakable + q.pp(keyword_rest) + end + end + end + + def to_json(*opts) + { + type: :hshptn, + constant: constant, + keywords: keywords, + kwrest: keyword_rest, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_hshptn: ( + # (nil | untyped) constant, + # Array[[Label, untyped]] keywords, + # (nil | VarField) keyword_rest + # ) -> HshPtn + def on_hshptn(constant, keywords, keyword_rest) + parts = [constant, keywords, keyword_rest].flatten(2).compact + + HshPtn.new( + constant: constant, + keywords: keywords, + keyword_rest: keyword_rest, + location: parts[0].location.to(parts[-1].location) + ) + end + + # Ident represents an identifier anywhere in code. It can represent a very + # large number of things, depending on where it is in the syntax tree. + # + # value + # + class Ident + # [String] the value of the identifier + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('ident') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :ident, + value: value.force_encoding('UTF-8'), + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_ident: (String value) -> Ident + def on_ident(value) + node = + Ident.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # If represents the first clause in an +if+ chain. + # + # if predicate + # end + # + class If + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [nil, Elsif, Else] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, consequent:, location:) + @predicate = predicate + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('if') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :if, + pred: predicate, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_if: ( + # untyped predicate, + # Statements statements, + # (nil | Elsif | Else) consequent + # ) -> If + def on_if(predicate, statements, consequent) + beginning = find_token(Kw, 'if') + ending = consequent || find_token(Kw, 'end') + + statements.bind(predicate.location.end_char, ending.location.start_char) + + If.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # IfOp represents a ternary clause. + # + # predicate ? truthy : falsy + # + class IfOp + # [untyped] the expression to be checked + attr_reader :predicate + + # [untyped] the expression to be executed if the predicate is truthy + attr_reader :truthy + + # [untyped] the expression to be executed if the predicate is falsy + attr_reader :falsy + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, truthy:, falsy:, location:) + @predicate = predicate + @truthy = truthy + @falsy = falsy + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('ifop') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(truthy) + + q.breakable + q.pp(falsy) + end + end + + def to_json(*opts) + { + type: :ifop, + pred: predicate, + tthy: truthy, + flsy: falsy, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_ifop: (untyped predicate, untyped truthy, untyped falsy) -> IfOp + def on_ifop(predicate, truthy, falsy) + IfOp.new( + predicate: predicate, + truthy: truthy, + falsy: falsy, + location: predicate.location.to(falsy.location) + ) + end + + # IfMod represents the modifier form of an +if+ statement. + # + # expression if predicate + # + class IfMod + # [untyped] the expression to be executed + attr_reader :statement + + # [untyped] the expression to be checked + attr_reader :predicate + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, predicate:, location:) + @statement = statement + @predicate = predicate + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('if_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(predicate) + end + end + + def to_json(*opts) + { + type: :if_mod, + stmt: statement, + pred: predicate, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_if_mod: (untyped predicate, untyped statement) -> IfMod + def on_if_mod(predicate, statement) + find_token(Kw, 'if') + + IfMod.new( + statement: statement, + predicate: predicate, + location: statement.location.to(predicate.location) + ) + end + + # def on_ignored_nl(value) + # value + # end + + # def on_ignored_sp(value) + # value + # end + + # Imaginary represents an imaginary number literal. + # + # 1i + # + class Imaginary + # [String] the value of the imaginary number literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('imaginary') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :imaginary, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_imaginary: (String value) -> Imaginary + def on_imaginary(value) + node = + Imaginary.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # In represents using the +in+ keyword within the Ruby 2.7+ pattern matching + # syntax. + # + # case value + # in pattern + # end + # + class In + # [untyped] the pattern to check against + attr_reader :pattern + + # [Statements] the expressions to execute if the pattern matched + attr_reader :statements + + # [nil | In | Else] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(pattern:, statements:, consequent:, location:) + @pattern = pattern + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('in') + + q.breakable + q.pp(pattern) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :in, + pattern: pattern, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_in: (RAssign pattern, nil statements, nil consequent) -> RAssign + # | ( + # untyped pattern, + # Statements statements, + # (nil | In | Else) consequent + # ) -> In + def on_in(pattern, statements, consequent) + # Here we have a rightward assignment + return pattern unless statements + + beginning = find_token(Kw, 'in') + ending = consequent || find_token(Kw, 'end') + + statements.bind(beginning.location.end_char, ending.location.start_char) + + In.new( + pattern: pattern, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # Int represents an integer number literal. + # + # 1 + # + class Int + # [String] the value of the integer + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('int') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :int, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_int: (String value) -> Int + def on_int(value) + node = + Int.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # IVar represents an instance variable literal. + # + # @variable + # + class IVar + # [String] the name of the instance variable + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('ivar') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :ivar, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_ivar: (String value) -> IVar + def on_ivar(value) + node = + IVar.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Kw represents the use of a keyword. It can be almost anywhere in the syntax + # tree, so you end up seeing it quite a lot. + # + # if value + # end + # + # In the above example, there would be two Kw nodes: one for the if and one + # for the end. Note that anything that matches the list of keywords in Ruby + # will use a Kw, so if you use a keyword in a symbol literal for instance: + # + # :if + # + # then the contents of the symbol node will contain a Kw node. + class Kw + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('kw') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :kw, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_kw: (String value) -> Kw + def on_kw(value) + node = + Kw.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # KwRestParam represents defining a parameter in a method definition that + # accepts all remaining keyword parameters. + # + # def method(**kwargs) end + # + class KwRestParam + # [nil | Ident] the name of the parameter + attr_reader :name + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, location:) + @name = name + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('kwrest_param') + + q.breakable + q.pp(name) + end + end + + def to_json(*opts) + { type: :kwrest_param, name: name, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_kwrest_param: ((nil | Ident) name) -> KwRestParam + def on_kwrest_param(name) + location = find_token(Op, '**').location + location = location.to(name.location) if name + + KwRestParam.new(name: name, location: location) + end + + # Label represents the use of an identifier to associate with an object. You + # can find it in a hash key, as in: + # + # { key: value } + # + # In this case "key:" would be the body of the label. You can also find it in + # pattern matching, as in: + # + # case value + # in key: + # end + # + # In this case "key:" would be the body of the label. + class Label + # [String] the value of the label + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('label') + + q.breakable + q.text(':') + q.text(value[0...-1]) + end + end + + def to_json(*opts) + { type: :label, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_label: (String value) -> Label + def on_label(value) + node = + Label.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # LabelEnd represents the end of a dynamic symbol. + # + # { "key": value } + # + # In the example above, LabelEnd represents the "\":" token at the end of the + # hash key. This node is important for determining the type of quote being + # used by the label. + class LabelEnd + # [String] the end of the label + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_label_end: (String value) -> LabelEnd + def on_label_end(value) + node = + LabelEnd.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Lambda represents using a lambda literal (not the lambda method call). + # + # ->(value) { value * 2 } + # + class Lambda + # [Params | Paren] the parameter declaration for this lambda + attr_reader :params + + # [BodyStmt | Statements] the expressions to be executed in this lambda + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(params:, statements:, location:) + @params = params + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('lambda') + + q.breakable + q.pp(params) + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :lambda, + params: params, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_lambda: ( + # (Params | Paren) params, + # (BodyStmt | Statements) statements + # ) -> Lambda + def on_lambda(params, statements) + beginning = find_token(TLambda) + + if token = find_token(TLamBeg, consume: false) + opening = tokens.delete(token) + closing = find_token(RBrace) + else + opening = find_token(Kw, 'do') + closing = find_token(Kw, 'end') + end + + statements.bind(opening.location.end_char, closing.location.start_char) + + Lambda.new( + params: params, + statements: statements, + location: beginning.location.to(closing.location) + ) + end + + # LBrace represents the use of a left brace, i.e., {. + class LBrace + # [String] the left brace + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('lbrace') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :lbrace, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_lbrace: (String value) -> LBrace + def on_lbrace(value) + node = + LBrace.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # LBracket represents the use of a left bracket, i.e., [. + class LBracket + # [String] the left bracket + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_lbracket: (String value) -> LBracket + def on_lbracket(value) + node = + LBracket.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # LParen represents the use of a left parenthesis, i.e., (. + class LParen + # [String] the left parenthesis + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('lparen') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :lparen, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_lparen: (String value) -> LParen + def on_lparen(value) + node = + LParen.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # def on_magic_comment(key, value) + # [key, value] + # end + + # MAssign is a parent node of any kind of multiple assignment. This includes + # splitting out variables on the left like: + # + # first, second, third = value + # + # as well as splitting out variables on the right, as in: + # + # value = first, second, third + # + # Both sides support splats, as well as variables following them. There's also + # destructuring behavior that you can achieve with the following: + # + # first, = value + # + class MAssign + # [Mlhs | MlhsAddPost | MlhsAddStar | MlhsParen] the target of the multiple + # assignment + attr_reader :target + + # [untyped] the value being assigned + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, value:, location:) + @target = target + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('massign') + + q.breakable + q.pp(target) + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :massign, target: target, value: value, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_massign: ( + # (Mlhs | MlhsAddPost | MlhsAddStar | MlhsParen) target, + # untyped value + # ) -> MAssign + def on_massign(target, value) + comma_range = target.location.end_char...value.location.start_char + target.comma = true if source[comma_range].strip.start_with?(',') + + MAssign.new( + target: target, + value: value, + location: target.location.to(value.location) + ) + end + + # MethodAddArg represents a method call with arguments and parentheses. + # + # method(argument) + # + # MethodAddArg can also represent with a method on an object, as in: + # + # object.method(argument) + # + # Finally, MethodAddArg can represent calling a method with no receiver that + # ends in a ?. In this case, the parser knows it's a method call and not a + # local variable, so it uses a MethodAddArg node as opposed to a VCall node, + # as in: + # + # method? + # + class MethodAddArg + # [Call | FCall] the method call + attr_reader :call + + # [ArgParen | Args | ArgsAddBlock] the arguments to the method call + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(call:, arguments:, location:) + @call = call + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('method_add_arg') + + q.breakable + q.pp(call) + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { + type: :method_add_arg, + call: call, + args: arguments, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_method_add_arg: ( + # (Call | FCall) call, + # (ArgParen | Args | ArgsAddBlock) arguments + # ) -> MethodAddArg + def on_method_add_arg(call, arguments) + location = call.location + + location = location.to(arguments.location) unless arguments.is_a?(Args) + + MethodAddArg.new(call: call, arguments: arguments, location: location) + end + + # MethodAddBlock represents a method call with a block argument. + # + # method {} + # + class MethodAddBlock + # [Call | Command | CommandCall | FCall | MethodAddArg] the method call + attr_reader :call + + # [BraceBlock | DoBlock] the block being sent with the method call + attr_reader :block + + # [Location] the location of this node + attr_reader :location + + def initialize(call:, block:, location:) + @call = call + @block = block + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('method_add_block') + + q.breakable + q.pp(call) + + q.breakable + q.pp(block) + end + end + + def to_json(*opts) + { + type: :method_add_block, + call: call, + block: block, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_method_add_block: ( + # (Call | Command | CommandCall | FCall | MethodAddArg) call, + # (BraceBlock | DoBlock) block + # ) -> MethodAddBlock + def on_method_add_block(call, block) + MethodAddBlock.new( + call: call, + block: block, + location: call.location.to(block.location) + ) + end + + # MLHS represents a list of values being destructured on the left-hand side + # of a multiple assignment. + # + # first, second, third = value + # + class MLHS + # Array[ARefField | Field | Ident | MlhsParen | VarField] the parts of + # the left-hand side of a multiple assignment + attr_reader :parts + + # [boolean] whether or not there is a trailing comma at the end of this + # list, which impacts destructuring. It's an attr_accessor so that while + # the syntax tree is being built it can be set by its parent node + attr_accessor :comma + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, comma: false, location:) + @parts = parts + @comma = comma + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mlhs') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :mlhs, parts: parts, comma: comma, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_mlhs_add: ( + # MLHS mlhs, + # (ARefField | Field | Ident | MlhsParen | VarField) part + # ) -> MLHS + def on_mlhs_add(mlhs, part) + if mlhs.parts.empty? + MLHS.new(parts: [part], location: part.location) + else + MLHS.new( + parts: mlhs.parts << part, + location: mlhs.location.to(part.location) + ) + end + end + + # MLHSAddPost represents adding another set of variables onto a list of + # assignments after a splat variable within a multiple assignment. + # + # left, *middle, right = values + # + class MLHSAddPost + # [MlhsAddStar] the value being starred + attr_reader :star + + # [Mlhs] the values after the star + attr_reader :mlhs + + # [Location] the location of this node + attr_reader :location + + def initialize(star:, mlhs:, location:) + @star = star + @mlhs = mlhs + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mlhs_add_post') + + q.breakable + q.pp(star) + + q.breakable + q.pp(mlhs) + end + end + + def to_json(*opts) + { type: :mlhs_add_post, star: star, mlhs: mlhs, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_mlhs_add_post: (MLHSAddStar star, MLHS mlhs) -> MLHSAddPost + def on_mlhs_add_post(star, mlhs) + MLHSAddPost.new( + star: star, + mlhs: mlhs, + location: star.location.to(mlhs.location) + ) + end + + # MLHSAddStar represents a splatted variable inside of a multiple assignment + # on the left hand side. + # + # first, *rest = values + # + class MLHSAddStar + # [MLHS] the values before the starred expression + attr_reader :mlhs + + # [nil | ARefField | Field | Ident | VarField] the expression being + # splatted + attr_reader :star + + # [Location] the location of this node + attr_reader :location + + def initialize(mlhs:, star:, location:) + @mlhs = mlhs + @star = star + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mlhs_add_star') + + q.breakable + q.pp(mlhs) + + q.breakable + q.pp(star) + end + end + + def to_json(*opts) + { type: :mlhs_add_star, mlhs: mlhs, star: star, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_mlhs_add_star: ( + # MLHS mlhs, + # (nil | ARefField | Field | Ident | VarField) part + # ) -> MLHSAddStar + def on_mlhs_add_star(mlhs, part) + beginning = find_token(Op, '*') + ending = part || beginning + + MLHSAddStar.new( + mlhs: mlhs, + star: part, + location: beginning.location.to(ending.location) + ) + end + + # :call-seq: + # on_mlhs_new: () -> MLHS + def on_mlhs_new + MLHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + end + + # MLHSParen represents parentheses being used to destruct values in a multiple + # assignment on the left hand side. + # + # (left, right) = value + # + class MLHSParen + # [Mlhs | MlhsAddPost | MlhsAddStar | MlhsParen] the contents inside of the + # parentheses + attr_reader :contents + + # [Location] the location of this node + attr_reader :location + + def initialize(contents:, location:) + @contents = contents + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mlhs_paren') + + q.breakable + q.pp(contents) + end + end + + def to_json(*opts) + { type: :mlhs_paren, cnts: contents, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_mlhs_paren: ( + # (Mlhs | MlhsAddPost | MlhsAddStar | MlhsParen) contents + # ) -> MLHSParen + def on_mlhs_paren(contents) + lparen = find_token(LParen) + rparen = find_token(RParen) + + comma_range = lparen.location.end_char...rparen.location.start_char + contents.comma = true if source[comma_range].strip.end_with?(',') + + MLHSParen.new( + contents: contents, + location: lparen.location.to(rparen.location) + ) + end + + # ModuleDeclaration represents defining a module using the +module+ keyword. + # + # module Namespace + # end + # + class ModuleDeclaration + # [ConstPathRef | ConstRef | TopConstRef] the name of the module + attr_reader :constant + + # [BodyStmt] the expressions to be executed in the context of the module + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, bodystmt:, location:) + @constant = constant + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('module') + + q.breakable + q.pp(constant) + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :module, + constant: constant, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_module: ( + # (ConstPathRef | ConstRef | TopConstRef) constant, + # BodyStmt bodystmt + # ) -> ModuleDeclaration + def on_module(constant, bodystmt) + beginning = find_token(Kw, 'module') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start(constant.location.end_char), + ending.location.start_char + ) + + ModuleDeclaration.new( + constant: constant, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # MRHS represents the values that are being assigned on the right-hand side of + # a multiple assignment. + # + # values = first, second, third + # + class MRHS + # Array[untyped] the parts that are being assigned + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mrhs') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :mrhs, parts: parts, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_mrhs_new: () -> MRHS + def on_mrhs_new + MRHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + end + + # :call-seq: + # on_mrhs_add: (MRHS mrhs, untyped part) -> MRHS + def on_mrhs_add(mrhs, part) + if mrhs.is_a?(MRHSNewFromArgs) + MRHS.new( + parts: [*mrhs.arguments.parts, part], + location: mrhs.location.to(part.location) + ) + elsif mrhs.parts.empty? + MRHS.new(parts: [part], location: mrhs.location) + else + MRHS.new(parts: mrhs.parts << part, loc: mrhs.location.to(part.location)) + end + end + + # MRHSAddStar represents using the splat operator to expand out a value on the + # right hand side of a multiple assignment. + # + # values = first, *rest + # + class MRHSAddStar + # [MRHS | MRHSNewFromArgs] the values before the splatted expression + attr_reader :mrhs + + # [untyped] the splatted expression + attr_reader :star + + # [Location] the location of this node + attr_reader :location + + def initialize(mrhs:, star:, location:) + @mrhs = mrhs + @star = star + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mrhs_add_star') + + q.breakable + q.pp(mrhs) + + q.breakable + q.pp(star) + end + end + + def to_json(*opts) + { type: :mrhs_add_star, mrhs: mrhs, star: star, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_mrhs_add_star: ( + # (MRHS | MRHSNewFromArgs) mrhs, + # untyped star + # ) -> MRHSAddStar + def on_mrhs_add_star(mrhs, star) + beginning = find_token(Op, '*') + ending = star || beginning + + MRHSAddStar.new( + mrhs: mrhs, + star: star, + location: beginning.location.to(ending.location) + ) + end + + # MRHSNewFromArgs represents the shorthand of a multiple assignment that + # allows you to assign values using just commas as opposed to assigning from + # an array. + # + # values = first, second, third + # + class MRHSNewFromArgs + # [Args] the arguments being used in the assignment + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mrhs_new_from_args') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :mrhs_new_from_args, args: arguments, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_mrhs_new_from_args: (Args arguments) -> MRHSNewFromArgs + def on_mrhs_new_from_args(arguments) + MRHSNewFromArgs.new(arguments: arguments, location: arguments.location) + end + + # Next represents using the +next+ keyword. + # + # next + # + # The +next+ keyword can also optionally be called with an argument: + # + # next value + # + # +next+ can even be called with multiple arguments, but only if parentheses + # are omitted, as in: + # + # next first, second, third + # + # If a single value is being given, parentheses can be used, as in: + # + # next(value) + # + class Next + # [Args | ArgsAddBlock] the arguments passed to the next keyword + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('next') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :next, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_next: ((Args | ArgsAddBlock) arguments) -> Next + def on_next(arguments) + keyword = find_token(Kw, 'next') + + location = keyword.location + location = location.to(arguments.location) unless arguments.is_a?(Args) + + Next.new(arguments: arguments, location: location) + end + + # def on_nl(value) + # value + # end + + # def on_nokw_param(value) + # value + # end + + # Op represents an operator literal in the source. + # + # 1 + 2 + # + # In the example above, the Op node represents the + operator. + class Op + # [String] the operator + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('op') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :op, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_op: (String value) -> Op + def on_op(value) + node = + Op.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # OpAssign represents assigning a value to a variable or constant using an + # operator like += or ||=. + # + # variable += value + # + class OpAssign + # [ARefField | ConstPathField | Field | TopConstField | VarField] the target + # to assign the result of the expression to + attr_reader :target + + # [Op] the operator being used for the assignment + attr_reader :operator + + # [untyped] the expression to be assigned + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, operator:, value:, location:) + @target = target + @operator = operator + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('opassign') + + q.breakable + q.pp(target) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :opassign, + target: target, + op: operator, + value: value, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_opassign: ( + # (ARefField | ConstPathField | Field | TopConstField | VarField) target, + # Op operator, + # untyped value + # ) -> OpAssign + def on_opassign(target, operator, value) + OpAssign.new( + target: target, + operator: operator, + value: value, + location: target.location.to(value.location) + ) + end + + # def on_operator_ambiguous(value) + # value + # end + + # Params represents defining parameters on a method or lambda. + # + # def method(param) end + # + class Params + # [Array[ Ident ]] any required parameters + attr_reader :requireds + + # [Array[ [ Ident, untyped ] ]] any optional parameters and their default + # values + attr_reader :optionals + + # [nil | ArgsForward | ExcessedComma | RestParam] the optional rest + # parameter + attr_reader :rest + + # [Array[ Ident ]] any positional parameters that exist after a rest + # parameter + attr_reader :posts + + # [Array[ [ Ident, nil | untyped ] ]] any keyword parameters and their + # optional default values + attr_reader :keywords + + # [nil | :nil | KwRestParam] the optional keyword rest parameter + attr_reader :keyword_rest + + # [nil | BlockArg] the optional block parameter + attr_reader :block + + # [Location] the location of this node + attr_reader :location + + def initialize( + requireds: [], + optionals: [], + rest: nil, + posts: [], + keywords: [], + keyword_rest: nil, + block: nil, + location: + ) + @requireds = requireds + @optionals = optionals + @rest = rest + @posts = posts + @keywords = keywords + @keyword_rest = keyword_rest + @block = block + @location = location + end + + # Params nodes are the most complicated in the tree. Occasionally you want + # to know if they are "empty", which means not having any parameters + # declared. This logic accesses every kind of parameter and determines if + # it's missing. + def empty? + requireds.empty? && optionals.empty? && !rest && posts.empty? && + keywords.empty? && !keyword_rest && !block + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('params') + + if requireds.any? + q.breakable + q.group(2, '(', ')') { q.seplist(requireds) { |name| q.pp(name) } } + end + + if optionals.any? + q.breakable + q.group(2, '(', ')') do + q.seplist(optionals) do |(name, default)| + q.pp(name) + q.text('=') + q.group(2) do + q.breakable('') + q.pp(default) + end + end + end + end + + if rest + q.breakable + q.pp(rest) + end + + if posts.any? + q.breakable + q.group(2, '(', ')') { q.seplist(posts) { |value| q.pp(value) } } + end + + if keywords.any? + q.breakable + q.group(2, '(', ')') do + q.seplist(keywords) do |(name, default)| + q.pp(name) + + if default + q.text('=') + q.group(2) do + q.breakable('') + q.pp(default) + end + end + end + end + end + + if keyword_rest + q.breakable + q.pp(keyword_rest) + end + + if block + q.breakable + q.pp(block) + end + end + end + + def to_json(*opts) + { + type: :params, + reqs: requireds, + opts: optionals, + rest: rest, + posts: posts, + keywords: keywords, + kwrest: keyword_rest, + block: block, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_params: ( + # (nil | Array[Ident]) requireds, + # (nil | Array[[Ident, untyped]]) optionals, + # (nil | ArgsForward | ExcessedComma | RestParam) rest, + # (nil | Array[Ident]) posts, + # (nil | Array[[Ident, nil | untyped]]) keywords, + # (nil | :nil | KwRestParam) keyword_rest, + # (nil | BlockArg) block + # ) -> Params + def on_params( + requireds, + optionals, + rest, + posts, + keywords, + keyword_rest, + block + ) + parts = [ + *requireds, + *optionals&.flatten(1), + rest, + *posts, + *keywords&.flat_map { |(key, value)| [key, value || nil] }, + (keyword_rest if keyword_rest != :nil), + block + ].compact + + location = + if parts.any? + parts[0].location.to(parts[-1].location) + else + Location.fixed(line: lineno, char: char_pos) + end + + Params.new( + requireds: requireds || [], + optionals: optionals || [], + rest: rest, + posts: posts || [], + keywords: keywords || [], + keyword_rest: keyword_rest, + block: block, + location: location + ) + end + + # Paren represents using balanced parentheses in a couple places in a Ruby + # program. In general parentheses can be used anywhere a Ruby expression can + # be used. + # + # (1 + 2) + # + class Paren + # [LParen] the left parenthesis that opened this statement + attr_reader :lparen + + # [untyped] the expression inside the parentheses + attr_reader :contents + + # [Location] the location of this node + attr_reader :location + + def initialize(lparen:, contents:, location:) + @lparen = lparen + @contents = contents + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('paren') + + q.breakable + q.pp(contents) + end + end + + def to_json(*opts) + { type: :paren, lparen: lparen, cnts: contents, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_paren: (untyped contents) -> Paren + def on_paren(contents) + lparen = find_token(LParen) + rparen = find_token(RParen) + + if contents && contents.is_a?(Params) + location = contents.location + location = + Location.new( + start_line: location.start_line, + start_char: find_next_statement_start(lparen.location.end_char), + end_line: location.end_line, + end_char: rparen.location.start_char + ) + + contents = + Params.new( + requireds: contents.requireds, + optionals: contents.optionals, + rest: contents.rest, + posts: contents.posts, + keywords: contents.keywords, + keyword_rest: contents.keyword_rest, + block: contents.block, + location: location + ) + end + + Paren.new( + lparen: lparen, + contents: contents, + location: lparen.location.to(rparen.location) + ) + end + + # If we encounter a parse error, just immediately bail out so that our runner + # can catch it. + def on_parse_error(error, *) + raise ParseError.new(error, lineno, column) + end + alias on_alias_error on_parse_error + alias on_assign_error on_parse_error + alias on_class_name_error on_parse_error + alias on_param_error on_parse_error + + # Period represents the use of the +.+ operator. It is usually found in method + # calls. + class Period + # [String] the period + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('period') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :period, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_period: (String value) -> Period + def on_period(value) + Period.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + end + + # Program represents the overall syntax tree. + class Program + # [Statements] the top-level expressions of the program + attr_reader :statements + + # [Array[ Comment | EmbDoc ]] the comments inside the program + attr_reader :comments + + # [Location] the location of this node + attr_reader :location + + def initialize(statements:, comments:, location:) + @statements = statements + @comments = comments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('program') + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :program, + stmts: statements, + comments: comments, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_program: (Statements statements) -> Program + def on_program(statements) + location = + Location.new( + start_line: 1, + start_char: 0, + end_line: lines.length, + end_char: source.length + ) + + statements.body << @__end__ if @__end__ + statements.bind(0, source.length) + + Program.new(statements: statements, comments: @comments, location: location) + end + + # QSymbols represents a symbol literal array without interpolation. + # + # %i[one two three] + # + class QSymbols + # [Array[ TStringContent ]] the elements of the array + attr_reader :elements + + # [Location] the location of this node + attr_reader :location + + def initialize(elements:, location:) + @elements = elements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('qsymbols') + + q.breakable + q.group(2, '(', ')') { q.seplist(elements) { |element| q.pp(element) } } + end + end + + def to_json(*opts) + { type: :qsymbols, elems: elements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_qsymbols_add: (QSymbols qsymbols, TStringContent element) -> QSymbols + def on_qsymbols_add(qsymbols, element) + QSymbols.new( + elements: qsymbols.elements << element, + location: qsymbols.location.to(element.location) + ) + end + + # QSymbolsBeg represents the beginning of a symbol literal array. + # + # %i[one two three] + # + # In the snippet above, QSymbolsBeg represents the "%i[" token. Note that + # these kinds of arrays can start with a lot of different delimiter types + # (e.g., %i| or %i<). + class QSymbolsBeg + # [String] the beginning of the array literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_qsymbols_beg: (String value) -> QSymbolsBeg + def on_qsymbols_beg(value) + node = + QSymbolsBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # :call-seq: + # on_qsymbols_new: () -> QSymbols + def on_qsymbols_new + qsymbols_beg = find_token(QSymbolsBeg) + + QSymbols.new(elements: [], location: qsymbols_beg.location) + end + + # QWords represents a string literal array without interpolation. + # + # %w[one two three] + # + class QWords + # [Array[ TStringContent ]] the elements of the array + attr_reader :elements + + # [Location] the location of this node + attr_reader :location + + def initialize(elements:, location:) + @elements = elements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('qwords') + + q.breakable + q.group(2, '(', ')') { q.seplist(elements) { |element| q.pp(element) } } + end + end + + def to_json(*opts) + { type: :qwords, elems: elements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_qwords_add: (QWords qwords, TStringContent element) -> QWords + def on_qwords_add(qwords, element) + QWords.new( + elements: qwords.elements << element, + location: qwords.location.to(element.location) + ) + end + + # QWordsBeg represents the beginning of a string literal array. + # + # %w[one two three] + # + # In the snippet above, QWordsBeg represents the "%w[" token. Note that these + # kinds of arrays can start with a lot of different delimiter types (e.g., + # %w| or %w<). + class QWordsBeg + # [String] the beginning of the array literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_qwords_beg: (String value) -> QWordsBeg + def on_qwords_beg(value) + node = + QWordsBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # :call-seq: + # on_qwords_new: () -> QWords + def on_qwords_new + qwords_beg = find_token(QWordsBeg) + + QWords.new(elements: [], location: qwords_beg.location) + end + + # RationalLiteral represents the use of a rational number literal. + # + # 1r + # + class RationalLiteral + # [String] the rational number literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rational') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :rational, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_rational: (String value) -> RationalLiteral + def on_rational(value) + node = + RationalLiteral.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # RBrace represents the use of a right brace, i.e., +++. + class RBrace + # [String] the right brace + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_rbrace: (String value) -> RBrace + def on_rbrace(value) + node = + RBrace.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # RBracket represents the use of a right bracket, i.e., +]+. + class RBracket + # [String] the right bracket + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_rbracket: (String value) -> RBracket + def on_rbracket(value) + node = + RBracket.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Redo represents the use of the +redo+ keyword. + # + # redo + # + class Redo + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('redo') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :redo, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_redo: () -> Redo + def on_redo + keyword = find_token(Kw, 'redo') + + Redo.new(value: keyword.value, location: keyword.location) + end + + # RegexpContent represents the body of a regular expression. + # + # /.+ #{pattern} .+/ + # + # In the example above, a RegexpContent node represents everything contained + # within the forward slashes. + class RegexpContent + # [String] the opening of the regular expression + attr_reader :beginning + + # [Array[ StringDVar | StringEmbExpr | TStringContent ]] the parts of the + # regular expression + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(beginning:, parts:, location:) + @beginning = beginning + @parts = parts + @location = location + end + end + + # :call-seq: + # on_regexp_add: ( + # RegexpContent regexp_content, + # (StringDVar | StringEmbExpr | TStringContent) part + # ) -> RegexpContent + def on_regexp_add(regexp_content, part) + RegexpContent.new( + beginning: regexp_content.beginning, + parts: regexp_content.parts << part, + location: regexp_content.location.to(part.location) + ) + end + + # RegexpBeg represents the start of a regular expression literal. + # + # /.+/ + # + # In the example above, RegexpBeg represents the first / token. Regular + # expression literals can also be declared using the %r syntax, as in: + # + # %r{.+} + # + class RegexpBeg + # [String] the beginning of the regular expression + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_regexp_beg: (String value) -> RegexpBeg + def on_regexp_beg(value) + node = + RegexpBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # RegexpEnd represents the end of a regular expression literal. + # + # /.+/m + # + # In the example above, the RegexpEnd event represents the /m at the end of + # the regular expression literal. You can also declare regular expression + # literals using %r, as in: + # + # %r{.+}m + # + class RegexpEnd + # [String] the end of the regular expression + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_regexp_end: (String value) -> RegexpEnd + def on_regexp_end(value) + RegexpEnd.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + end + + # RegexpLiteral represents a regular expression literal. + # + # /.+/ + # + class RegexpLiteral + # [String] the beginning of the regular expression literal + attr_reader :beginning + + # [String] the ending of the regular expression literal + attr_reader :ending + + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # regular expression literal + attr_reader :parts + + # [Locatione] the location of this node + attr_reader :location + + def initialize(beginning:, ending:, parts:, location:) + @beginning = beginning + @ending = ending + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('regexp_literal') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { + type: :regexp_literal, + beging: beginning, + ending: ending, + parts: parts, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_regexp_literal: ( + # RegexpContent regexp_content, + # RegexpEnd ending + # ) -> RegexpLiteral + def on_regexp_literal(regexp_content, ending) + RegexpLiteral.new( + beginning: regexp_content.beginning, + ending: ending.value, + parts: regexp_content.parts, + location: regexp_content.location.to(ending.location) + ) + end + + # :call-seq: + # on_regexp_new: () -> RegexpContent + def on_regexp_new + regexp_beg = find_token(RegexpBeg) + + RegexpContent.new( + beginning: regexp_beg.value, + parts: [], + location: regexp_beg.location + ) + end + + # RescueEx represents the list of exceptions being rescued in a rescue clause. + # + # begin + # rescue Exception => exception + # end + # + class RescueEx + # [untyped] the list of exceptions being rescued + attr_reader :exceptions + + # [nil | Field | VarField] the expression being used to capture the raised + # exception + attr_reader :variable + + # [Location] the location of this node + attr_reader :location + + def initialize(exceptions:, variable:, location:) + @exceptions = exceptions + @variable = variable + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rescue_ex') + + q.breakable + q.pp(exceptions) + + q.breakable + q.pp(variable) + end + end + + def to_json(*opts) + { + type: :rescue_ex, + extns: exceptions, + var: variable, + loc: location + }.to_json(*opts) + end + end + + # Rescue represents the use of the rescue keyword inside of a BodyStmt node. + # + # begin + # rescue + # end + # + class Rescue + # [RescueEx] the exceptions being rescued + attr_reader :exception + + # [Statements] the expressions to evaluate when an error is rescued + attr_reader :statements + + # [nil | Rescue] the optional next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(exception:, statements:, consequent:, location:) + @exception = exception + @statements = statements + @consequent = consequent + @location = location + end + + def bind_end(end_char) + @location = + Location.new( + start_line: location.start_line, + start_char: location.start_char, + end_line: location.end_line, + end_char: end_char + ) + + if consequent + consequent.bind_end(end_char) + statements.bind_end(consequent.location.start_char) + else + statements.bind_end(end_char) + end + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rescue') + + if exception + q.breakable + q.pp(exception) + end + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :rescue, + extn: exception, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_rescue: ( + # (nil | [untyped] | MRHS | MRHSAddStar) exceptions, + # (nil | Field | VarField) variable, + # Statements statements, + # (nil | Rescue) consequent + # ) -> Rescue + def on_rescue(exceptions, variable, statements, consequent) + keyword = find_token(Kw, 'rescue') + exceptions = exceptions[0] if exceptions.is_a?(Array) + + last_node = variable || exceptions || keyword + statements.bind( + find_next_statement_start(last_node.location.end_char), + char_pos + ) + + # We add an additional inner node here that ripper doesn't provide so that + # we have a nice place to attach inline comments. But we only need it if we + # have an exception or a variable that we're rescuing. + rescue_ex = + if exceptions || variable + RescueEx.new( + exceptions: exceptions, + variable: variable, + location: + Location.new( + start_line: keyword.location.start_line, + start_char: keyword.location.end_char + 1, + end_line: last_node.location.end_line, + end_char: last_node.location.end_char + ) + ) + end + + Rescue.new( + exception: rescue_ex, + statements: statements, + consequent: consequent, + location: + Location.new( + start_line: keyword.location.start_line, + start_char: keyword.location.start_char, + end_line: lineno, + end_char: char_pos + ) + ) + end + + # RescueMod represents the use of the modifier form of a +rescue+ clause. + # + # expression rescue value + # + class RescueMod + # [untyped] the expression to execute + attr_reader :statement + + # [untyped] the value to use if the executed expression raises an error + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, value:, location:) + @statement = statement + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rescue_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :rescue_mod, + stmt: statement, + value: value, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_rescue_mod: (untyped statement, untyped value) -> RescueMod + def on_rescue_mod(statement, value) + find_token(Kw, 'rescue') + + RescueMod.new( + statement: statement, + value: value, + location: statement.location.to(value.location) + ) + end + + # RestParam represents defining a parameter in a method definition that + # accepts all remaining positional parameters. + # + # def method(*rest) end + # + class RestParam + # [nil | Ident] the name of the parameter + attr_reader :name + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, location:) + @name = name + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rest_param') + + q.breakable + q.pp(name) + end + end + + def to_json(*opts) + { type: :rest_param, name: name, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_rest_param: ((nil | Ident) name) -> RestParam + def on_rest_param(name) + location = find_token(Op, '*').location + location = location.to(name.location) if name + + RestParam.new(name: name, location: location) + end + + # Retry represents the use of the +retry+ keyword. + # + # retry + # + class Retry + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('retry') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :retry, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_retry: () -> Retry + def on_retry + keyword = find_token(Kw, 'retry') + + Retry.new(value: keyword.value, location: keyword.location) + end + + # Return represents using the +return+ keyword with arguments. + # + # return value + # + class Return + # [Args | ArgsAddBlock] the arguments being passed to the keyword + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('return') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :return, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_return: ((Args | ArgsAddBlock) arguments) -> Return + def on_return(arguments) + keyword = find_token(Kw, 'return') + + Return.new( + arguments: arguments, + location: keyword.location.to(arguments.location) + ) + end + + # Return0 represents the bare +return+ keyword with no arguments. + # + # return + # + class Return0 + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('return0') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :return0, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_return0: () -> Return0 + def on_return0 + keyword = find_token(Kw, 'return') + + Return0.new(value: keyword.value, location: keyword.location) + end + + # RParen represents the use of a right parenthesis, i.e., +)+. + class RParen + # [String] the parenthesis + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_rparen: (String value) -> RParen + def on_rparen(value) + node = + RParen.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # SClass represents a block of statements that should be evaluated within the + # context of the singleton class of an object. It's frequently used to define + # singleton methods. + # + # class << self + # end + # + class SClass + # [untyped] the target of the singleton class to enter + attr_reader :target + + # [BodyStmt] the expressions to be executed + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, bodystmt:, location:) + @target = target + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('sclass') + + q.breakable + q.pp(target) + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :sclass, + target: target, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_sclass: (untyped target, BodyStmt bodystmt) -> SClass + def on_sclass(target, bodystmt) + beginning = find_token(Kw, 'class') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start(target.location.end_char), + ending.location.start_char + ) + + SClass.new( + target: target, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # def on_semicolon(value) + # value + # end + + # def on_sp(value) + # value + # end + + # stmts_add is a parser event that represents a single statement inside a + # list of statements within any lexical block. It accepts as arguments the + # parent stmts node as well as an stmt which can be any expression in + # Ruby. + def on_stmts_add(statements, statement) + statements << statement + end + + # Everything that has a block of code inside of it has a list of statements. + # Normally we would just track those as a node that has an array body, but we + # have some special handling in order to handle empty statement lists. They + # need to have the right location information, so all of the parent node of + # stmts nodes will report back down the location information. We then + # propagate that onto void_stmt nodes inside the stmts in order to make sure + # all comments get printed appropriately. + class Statements + # [SyntaxTree] the parser that created this node + attr_reader :parser + + # [Array[ untyped ]] the list of expressions contained within this node + attr_reader :body + + # [Location] the location of this node + attr_reader :location + + def initialize(parser:, body:, location:) + @parser = parser + @body = body + @location = location + end + + def bind(start_char, end_char) + @location = + Location.new( + start_line: location.start_line, + start_char: start_char, + end_line: location.end_line, + end_char: end_char + ) + + if body[0].is_a?(VoidStmt) + location = body[0].location + location = + Location.new( + start_line: location.start_line, + start_char: start_char, + end_line: location.end_line, + end_char: start_char + ) + + body[0] = VoidStmt.new(location: location) + end + + attach_comments(start_char, end_char) + end + + def bind_end(end_char) + @location = + Location.new( + start_line: location.start_line, + start_char: location.start_char, + end_line: location.end_line, + end_char: end_char + ) + end + + def <<(statement) + @location = + body.any? ? location.to(statement.location) : statement.location + + body << statement + self + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('statements') + + q.breakable + q.seplist(body) { |statement| q.pp(statement) } + end + end + + def to_json(*opts) + { type: :statements, body: body, loc: location }.to_json(*opts) + end + + private + + def attach_comments(start_char, end_char) + attachable = + parser.comments.select do |comment| + !comment.inline? && start_char <= comment.location.start_char && + end_char >= comment.location.end_char && + !comment.value.include?('prettier-ignore') + end + + return if attachable.empty? + + parser.comments -= attachable + @body = (body + attachable).sort_by! { |node| node.location.start_char } + end + end + + # :call-seq: + # on_stmts_new: () -> Statements + def on_stmts_new + Statements.new( + parser: self, + body: [], + location: Location.fixed(line: lineno, char: char_pos) + ) + end + + # StringContent represents the contents of a string-like value. + # + # "string" + # + class StringContent + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # string + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + end + + # :call-seq: + # on_string_add: ( + # String string, + # (StringEmbExpr | StringDVar | TStringContent) part + # ) -> StringContent + def on_string_add(string, part) + location = + string.parts.any? ? string.location.to(part.location) : part.location + + StringContent.new(parts: string.parts << part, location: location) + end + + # StringConcat represents concatenating two strings together using a backward + # slash. + # + # "first" \ + # "second" + # + class StringConcat + # [StringConcat | StringLiteral] the left side of the concatenation + attr_reader :left + + # [StringLiteral] the right side of the concatenation + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('string_concat') + + q.breakable + q.pp(left) + + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { type: :string_concat, left: left, right: right, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_string_concat: ( + # (StringConcat | StringLiteral) left, + # StringLiteral right + # ) -> StringConcat + def on_string_concat(left, right) + StringConcat.new( + left: left, + right: right, + location: left.location.to(right.location) + ) + end + + # :call-seq: + # on_string_content: () -> StringContent + def on_string_content + StringContent.new( + parts: [], + location: Location.fixed(line: lineno, char: char_pos) + ) + end + + # StringDVar represents shorthand interpolation of a variable into a string. + # It allows you to take an instance variable, class variable, or global + # variable and omit the braces when interpolating. + # + # "#@variable" + # + class StringDVar + # [Backref | VarRef] the variable being interpolated + attr_reader :variable + + # [Location] the location of this node + attr_reader :location + + def initialize(variable:, location:) + @variable = variable + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('string_dvar') + + q.breakable + q.pp(variable) + end + end + + def to_json(*opts) + { type: :string_dvar, var: variable, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_string_dvar: ((Backref | VarRef) variable) -> StringDVar + def on_string_dvar(variable) + embvar = find_token(EmbVar) + + StringDVar.new( + variable: variable, + location: embvar.location.to(variable.location) + ) + end + + # StringEmbExpr represents interpolated content. It can be contained within a + # couple of different parent nodes, including regular expressions, strings, + # and dynamic symbols. + # + # "string #{expression}" + # + class StringEmbExpr + # [Statements] the expressions to be interpolated + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(statements:, location:) + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('string_embexpr') + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { type: :string_embexpr, stmts: statements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_string_embexpr: (Statements statements) -> StringEmbExpr + def on_string_embexpr(statements) + embexpr_beg = find_token(EmbExprBeg) + embexpr_end = find_token(EmbExprEnd) + + statements.bind( + embexpr_beg.location.end_char, + embexpr_end.location.start_char + ) + + StringEmbExpr.new( + statements: statements, + location: embexpr_beg.location.to(embexpr_end.location) + ) + end + + # StringLiteral represents a string literal. + # + # "string" + # + class StringLiteral + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # string literal + attr_reader :parts + + # [String] which quote was used by the string literal + attr_reader :quote + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, quote:, location:) + @parts = parts + @quote = quote + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('string_literal') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { + type: :string_literal, + parts: parts, + quote: quote, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_string_literal: (String string) -> Heredoc | StringLiteral + def on_string_literal(string) + heredoc = @heredocs[-1] + + if heredoc && heredoc.ending + heredoc = @heredocs.pop + + Heredoc.new( + beginning: heredoc.beginning, + ending: heredoc.ending, + parts: string.parts, + location: heredoc.location + ) + else + tstring_beg = find_token(TStringBeg) + tstring_end = find_token(TStringEnd) + + StringLiteral.new( + parts: string.parts, + quote: tstring_beg.value, + location: tstring_beg.location.to(tstring_end.location) + ) + end + end + + # Super represents using the +super+ keyword with arguments. It can optionally + # use parentheses. + # + # super(value) + # + class Super + # [ArgParen | Args | ArgsAddBlock] the arguments to the keyword + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('super') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :super, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_super: ((ArgParen | Args | ArgsAddBlock) arguments) -> Super + def on_super(arguments) + keyword = find_token(Kw, 'super') + + Super.new( + arguments: arguments, + location: keyword.location.to(arguments.location) + ) + end + + # SymBeg represents the beginning of a symbol literal. + # + # :symbol + # + # SymBeg is also used for dynamic symbols, as in: + # + # :"symbol" + # + # Finally, SymBeg is also used for symbols using the %s syntax, as in: + # + # %s[symbol] + # + # The value of this node is a string. In most cases (as in the first example + # above) it will contain just ":". In the case of dynamic symbols it will + # contain ":'" or ":\"". In the case of %s symbols, it will contain the start + # of the symbol including the %s and the delimiter. + class SymBeg + # [String] the beginning of the symbol + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # symbeg is a token that represents the beginning of a symbol literal. + # In most cases it will contain just ":" as in the value, but if its a dynamic + # symbol being defined it will contain ":'" or ":\"". + def on_symbeg(value) + node = + SymBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # SymbolContent represents symbol contents and is always the child of a + # SymbolLiteral node. + # + # :symbol + # + class SymbolContent + # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op] the value of the + # symbol + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_symbol: ( + # (Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op) value + # ) -> SymbolContent + def on_symbol(value) + tokens.pop + + SymbolContent.new(value: value, location: value.location) + end + + # SymbolLiteral represents a symbol in the system with no interpolation + # (as opposed to a DynaSymbol which has interpolation). + # + # :symbol + # + class SymbolLiteral + # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op] the value of the + # symbol + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('symbol_literal') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :symbol_literal, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_symbol_literal: ( + # ( + # Backtick | Const | CVar | GVar | Ident | + # IVar | Kw | Op | SymbolContent + # ) value + # ) -> SymbolLiteral + def on_symbol_literal(value) + if tokens[-1] == value + SymbolLiteral.new(value: tokens.pop, location: value.location) + else + symbeg = find_token(SymBeg) + + SymbolLiteral.new( + value: value.value, + location: symbeg.location.to(value.location) + ) + end + end + + # Symbols represents a symbol array literal with interpolation. + # + # %I[one two three] + # + class Symbols + # [Array[ Word ]] the words in the symbol array literal + attr_reader :elements + + # [Location] the location of this node + attr_reader :location + + def initialize(elements:, location:) + @elements = elements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('symbols') + + q.breakable + q.group(2, '(', ')') { q.seplist(elements) { |element| q.pp(element) } } + end + end + + def to_json(*opts) + { type: :symbols, elems: elements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_symbols_add: (Symbols symbols, Word word) -> Symbols + def on_symbols_add(symbols, word) + Symbols.new( + elements: symbols.elements << word, + location: symbols.location.to(word.location) + ) + end + + # SymbolsBeg represents the start of a symbol array literal with + # interpolation. + # + # %I[one two three] + # + # In the snippet above, SymbolsBeg represents the "%I[" token. Note that these + # kinds of arrays can start with a lot of different delimiter types + # (e.g., %I| or %I<). + class SymbolsBeg + # [String] the beginning of the symbol literal array + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_symbols_beg: (String value) -> SymbolsBeg + def on_symbols_beg(value) + node = + SymbolsBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # :call-seq: + # on_symbols_new: () -> Symbols + def on_symbols_new + symbols_beg = find_token(SymbolsBeg) + + Symbols.new(elements: [], location: symbols_beg.location) + end + + # TLambda represents the beginning of a lambda literal. + # + # -> { value } + # + # In the example above the TLambda represents the +->+ operator. + class TLambda + # [String] the beginning of the lambda literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_tlambda: (String value) -> TLambda + def on_tlambda(value) + node = + TLambda.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # TLamBeg represents the beginning of the body of a lambda literal using + # braces. + # + # -> { value } + # + # In the example above the TLamBeg represents the +{+ operator. + class TLamBeg + # [String] the beginning of the body of the lambda literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_tlambeg: (String value) -> TLamBeg + def on_tlambeg(value) + node = + TLamBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # TopConstField is always the child node of some kind of assignment. It + # represents when you're assigning to a constant that is being referenced at + # the top level. + # + # ::Constant = value + # + class TopConstField + # [Const] the constant being assigned + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, location:) + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('top_const_field') + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { type: :top_const_field, constant: constant, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_top_const_field: (Const constant) -> TopConstRef + def on_top_const_field(constant) + operator = find_colon2_before(constant) + + TopConstField.new( + constant: constant, + location: operator.location.to(constant.location) + ) + end + + # TopConstRef is very similar to TopConstField except that it is not involved + # in an assignment. + # + # ::Constant + # + class TopConstRef + # [Const] the constant being referenced + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, location:) + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('top_const_ref') + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { type: :top_const_ref, constant: constant, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_top_const_ref: (Const constant) -> TopConstRef + def on_top_const_ref(constant) + operator = find_colon2_before(constant) + + TopConstRef.new( + constant: constant, + location: operator.location.to(constant.location) + ) + end + + # TStringBeg represents the beginning of a string literal. + # + # "string" + # + # In the example above, TStringBeg represents the first set of quotes. Strings + # can also use single quotes. They can also be declared using the +%q+ and + # +%Q+ syntax, as in: + # + # %q{string} + # + class TStringBeg + # [String] the beginning of the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_tstring_beg: (String value) -> TStringBeg + def on_tstring_beg(value) + node = + TStringBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # TStringContent represents plain characters inside of an entity that accepts + # string content like a string, heredoc, command string, or regular + # expression. + # + # "string" + # + # In the example above, TStringContent represents the +string+ token contained + # within the string. + class TStringContent + # [String] the content of the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('tstring_content') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :tstring_content, + value: value.force_encoding('UTF-8'), + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_tstring_content: (String value) -> TStringContent + def on_tstring_content(value) + TStringContent.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + end + + # TStringEnd represents the end of a string literal. + # + # "string" + # + # In the example above, TStringEnd represents the second set of quotes. + # Strings can also use single quotes. They can also be declared using the +%q+ + # and +%Q+ syntax, as in: + # + # %q{string} + # + class TStringEnd + # [String] the end of the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_tstring_end: (String value) -> TStringEnd + def on_tstring_end(value) + node = + TStringEnd.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Not represents the unary +not+ method being called on an expression. + # + # not value + # + class Not + # [untyped] the statement on which to operate + attr_reader :statement + + # [boolean] whether or not parentheses were used + attr_reader :parentheses + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, parentheses:, location:) + @statement = statement + @parentheses = parentheses + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('not') + + q.breakable + q.pp(statement) + end + end + + def to_json(*opts) + { + type: :not, + value: statement, + paren: parentheses, + loc: location + }.to_json(*opts) + end + end + + # Unary represents a unary method being called on an expression, as in +!+ or + # +~+. + # + # !value + # + class Unary + # [String] the operator being used + attr_reader :operator + + # [untyped] the statement on which to operate + attr_reader :statement + + # [Location] the location of this node + attr_reader :location + + def initialize(operator:, statement:, location:) + @operator = operator + @statement = statement + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('unary') + + q.breakable + q.pp(operator) + + q.breakable + q.pp(statement) + end + end + + def to_json(*opts) + { type: :unary, op: operator, value: statement, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_unary: (:not operator, untyped statement) -> Not + # | (Symbol operator, untyped statement) -> Unary + def on_unary(operator, statement) + if operator == :not + # We have somewhat special handling of the not operator since if it has + # parentheses they don't get reported as a paren node for some reason. + + beginning = find_token(Kw, 'not') + ending = statement + + range = beginning.location.end_char...statement.location.start_char + paren = source[range].include?('(') + + if paren + find_token(LParen) + ending = find_token(RParen) + end + + Not.new( + statement: statement, + parentheses: paren, + location: beginning.location.to(ending.location) + ) + else + # Special case instead of using find_token here. It turns out that + # if you have a range that goes from a negative number to a negative + # number then you can end up with a .. or a ... that's higher in the + # stack. So we need to explicitly disallow those operators. + index = + tokens.rindex do |token| + token.is_a?(Op) && + token.location.start_char < statement.location.start_char && + !%w[.. ...].include?(token.value) + end + + beginning = tokens.delete_at(index) + + Unary.new( + operator: operator[0], # :+@ -> "+" + statement: statement, + location: beginning.location.to(statement.location) + ) + end + end + + # Undef represents the use of the +undef+ keyword. + # + # undef method + # + class Undef + # [Array[ DynaSymbol | SymbolLiteral ]] the symbols to undefine + attr_reader :symbols + + # [Location] the location of this node + attr_reader :location + + def initialize(symbols:, location:) + @symbols = symbols + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('undef') + + q.breakable + q.group(2, '(', ')') { q.seplist(symbols) { |symbol| q.pp(symbol) } } + end + end + + def to_json(*opts) + { type: :undef, syms: symbols, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_undef: (Array[DynaSymbol | SymbolLiteral] symbols) -> Undef + def on_undef(symbols) + keyword = find_token(Kw, 'undef') + + Undef.new( + symbols: symbols, + location: keyword.location.to(symbols.last.location) + ) + end + + # Unless represents the first clause in an +unless+ chain. + # + # unless predicate + # end + # + class Unless + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [nil, Elsif, Else] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, consequent:, location:) + @predicate = predicate + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('unless') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :unless, + pred: predicate, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_unless: ( + # untyped predicate, + # Statements statements, + # ((nil | Elsif | Else) consequent) + # ) -> Unless + def on_unless(predicate, statements, consequent) + beginning = find_token(Kw, 'unless') + ending = consequent || find_token(Kw, 'end') + + statements.bind(predicate.location.end_char, ending.location.start_char) + + Unless.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # UnlessMod represents the modifier form of an +unless+ statement. + # + # expression unless predicate + # + class UnlessMod + # [untyped] the expression to be executed + attr_reader :statement + + # [untyped] the expression to be checked + attr_reader :predicate + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, predicate:, location:) + @statement = statement + @predicate = predicate + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('unless_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(predicate) + end + end + + def to_json(*opts) + { + type: :unless_mod, + stmt: statement, + pred: predicate, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_unless_mod: (untyped predicate, untyped statement) -> UnlessMod + def on_unless_mod(predicate, statement) + find_token(Kw, 'unless') + + UnlessMod.new( + statement: statement, + predicate: predicate, + location: statement.location.to(predicate.location) + ) + end + + # Until represents an +until+ loop. + # + # until predicate + # end + # + class Until + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, location:) + @predicate = predicate + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('until') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :until, + pred: predicate, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_until: (untyped predicate, Statements statements) -> Until + def on_until(predicate, statements) + beginning = find_token(Kw, 'until') + ending = find_token(Kw, 'end') + + # Consume the do keyword if it exists so that it doesn't get confused for + # some other block + keyword = find_token(Kw, 'do', consume: false) + if keyword && keyword.location.start_char > predicate.location.end_char && + keyword.location.end_char < ending.location.start_char + tokens.delete(keyword) + end + + # Update the Statements location information + statements.bind(predicate.location.end_char, ending.location.start_char) + + Until.new( + predicate: predicate, + statements: statements, + location: beginning.location.to(ending.location) + ) + end + + # UntilMod represents the modifier form of a +until+ loop. + # + # expression until predicate + # + class UntilMod + # [untyped] the expression to be executed + attr_reader :statement + + # [untyped] the expression to be checked + attr_reader :predicate + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, predicate:, location:) + @statement = statement + @predicate = predicate + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('until_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(predicate) + end + end + + def to_json(*opts) + { + type: :until_mod, + stmt: statement, + pred: predicate, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_until_mod: (untyped predicate, untyped statement) -> UntilMod + def on_until_mod(predicate, statement) + find_token(Kw, 'until') + + UntilMod.new( + statement: statement, + predicate: predicate, + location: statement.location.to(predicate.location) + ) + end + + # VarAlias represents when you're using the +alias+ keyword with global + # variable arguments. + # + # alias $new $old + # + class VarAlias + # [GVar] the new alias of the variable + attr_reader :left + + # [Backref | GVar] the current name of the variable to be aliased + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('var_alias') + + q.breakable + q.pp(left) + + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { type: :var_alias, left: left, right: right, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_var_alias: (GVar left, (Backref | GVar) right) -> VarAlias + def on_var_alias(left, right) + keyword = find_token(Kw, 'alias') + + VarAlias.new( + left: left, + right: right, + location: keyword.location.to(right.location) + ) + end + + # VarField represents a variable that is being assigned a value. As such, it + # is always a child of an assignment type node. + # + # variable = value + # + # In the example above, the VarField node represents the +variable+ token. + class VarField + # [nil | Const | CVar | GVar | Ident | IVar] the target of this node + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('var_field') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :var_field, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_var_field: ( + # (nil | Const | CVar | GVar | Ident | IVar) value + # ) -> VarField + def on_var_field(value) + location = + if value + value.location + else + # You can hit this pattern if you're assigning to a splat using pattern + # matching syntax in Ruby 2.7+ + Location.fixed(line: lineno, char: char_pos) + end + + VarField.new(value: value, location: location) + end + + # VarRef represents a variable reference. + # + # true + # + # This can be a plain local variable like the example above. It can also be a + # constant, a class variable, a global variable, an instance variable, a + # keyword (like +self+, +nil+, +true+, or +false+), or a numbered block + # variable. + class VarRef + # [Const | CVar | GVar | Ident | IVar | Kw] the value of this node + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('var_ref') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :var_ref, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_var_ref: ((Const | CVar | GVar | Ident | IVar | Kw) value) -> VarRef + def on_var_ref(value) + VarRef.new(value: value, location: value.location) + end + + # AccessCtrl represents a call to a method visibility control, i.e., +public+, + # +protected+, or +private+. + # + # private + # + class AccessCtrl + # [Ident] the value of this expression + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('access_ctrl') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :access_ctrl, value: value, loc: location }.to_json(*opts) + end + end + + # VCall represent any plain named object with Ruby that could be either a + # local variable or a method call. + # + # variable + # + class VCall + # [Ident] the value of this expression + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('vcall') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :vcall, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_vcall: (Ident ident) -> AccessCtrl | VCall + def on_vcall(ident) + @controls ||= %w[private protected public].freeze + + if @controls.include?(ident.value) && ident.value == lines[lineno - 1].strip + # Access controls like private, protected, and public are reported as + # vcall nodes since they're technically method calls. We want to be able + # add new lines around them as necessary, so here we're going to + # explicitly track those as a different node type. + AccessCtrl.new(value: ident, location: ident.location) + else + VCall.new(value: ident, location: ident.location) + end + end + + # VoidStmt represents an empty lexical block of code. + # + # ;; + # + class VoidStmt + # [Location] the location of this node + attr_reader :location + + def initialize(location:) + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') { q.text('void_stmt') } + end + + def to_json(*opts) + { type: :void_stmt, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_void_stmt: () -> VoidStmt + def on_void_stmt + VoidStmt.new(location: Location.fixed(line: lineno, char: char_pos)) + end + + # When represents a +when+ clause in a +case+ chain. + # + # case value + # when predicate + # end + # + class When + # [untyped] the arguments to the when clause + attr_reader :arguments + + # [Statements] the expressions to be executed + attr_reader :statements + + # [nil | Else | When] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, statements:, consequent:, location:) + @arguments = arguments + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('when') + + q.breakable + q.pp(arguments) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :when, + args: arguments, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_when: ( + # untyped arguments, + # Statements statements, + # (nil | Else | When) consequent + # ) -> When + def on_when(arguments, statements, consequent) + beginning = find_token(Kw, 'when') + ending = consequent || find_token(Kw, 'end') + + statements.bind(arguments.location.end_char, ending.location.start_char) + + When.new( + arguments: arguments, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # While represents a +while+ loop. + # + # while predicate + # end + # + class While + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, location:) + @predicate = predicate + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('while') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :while, + pred: predicate, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_while: (untyped predicate, Statements statements) -> While + def on_while(predicate, statements) + beginning = find_token(Kw, 'while') + ending = find_token(Kw, 'end') + + # Consume the do keyword if it exists so that it doesn't get confused for + # some other block + keyword = find_token(Kw, 'do', consume: false) + if keyword && keyword.location.start_char > predicate.location.end_char && + keyword.location.end_char < ending.location.start_char + tokens.delete(keyword) + end + + # Update the Statements location information + statements.bind(predicate.location.end_char, ending.location.start_char) + + While.new( + predicate: predicate, + statements: statements, + location: beginning.location.to(ending.location) + ) + end + + # WhileMod represents the modifier form of a +while+ loop. + # + # expression while predicate + # + class WhileMod + # [untyped] the expression to be executed + attr_reader :statement + + # [untyped] the expression to be checked + attr_reader :predicate + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, predicate:, location:) + @statement = statement + @predicate = predicate + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('while_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(predicate) + end + end + + def to_json(*opts) + { + type: :while_mod, + stmt: statement, + pred: predicate, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_while_mod: (untyped predicate, untyped statement) -> WhileMod + def on_while_mod(predicate, statement) + find_token(Kw, 'while') + + WhileMod.new( + statement: statement, + predicate: predicate, + location: statement.location.to(predicate.location) + ) + end + + # Word represents an element within a special array literal that accepts + # interpolation. + # + # %W[a#{b}c xyz] + # + # In the example above, there would be two Word nodes within a parent Words + # node. + class Word + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # word + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('word') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :word, parts: parts, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_word_add: ( + # Word word, + # (StringEmbExpr | StringDVar | TStringContent) part + # ) -> Word + def on_word_add(word, part) + location = + word.parts.empty? ? part.location : word.location.to(part.location) + + Word.new(parts: word.parts << part, location: location) + end + + # :call-seq: + # on_word_new: () -> Word + def on_word_new + Word.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + end + + # Words represents a string literal array with interpolation. + # + # %W[one two three] + # + class Words + # [Array[ Word ]] the elements of this array + attr_reader :elements + + # [Location] the location of this node + attr_reader :location + + def initialize(elements:, location:) + @elements = elements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('words') + + q.breakable + q.group(2, '(', ')') { q.seplist(elements) { |element| q.pp(element) } } + end + end + + def to_json(*opts) + { type: :words, elems: elements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_words_add: (Words words, Word word) -> Words + def on_words_add(words, word) + Words.new( + elements: words.elements << word, + location: words.location.to(word.location) + ) + end + + # WordsBeg represents the beginning of a string literal array with + # interpolation. + # + # %W[one two three] + # + # In the snippet above, a WordsBeg would be created with the value of "%W[". + # Note that these kinds of arrays can start with a lot of different delimiter + # types (e.g., %W| or %W<). + class WordsBeg + # [String] the start of the word literal array + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_words_beg: (String value) -> WordsBeg + def on_words_beg(value) + node = + WordsBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # :call-seq: + # on_words_new: () -> Words + def on_words_new + words_beg = find_token(WordsBeg) + + Words.new(elements: [], location: words_beg.location) + end + + # def on_words_sep(value) + # value + # end + + # XString represents the contents of an XStringLiteral. + # + # `ls` + # + class XString + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # xstring + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + end + + # :call-seq: + # on_xstring_add: ( + # XString xstring, + # (StringEmbExpr | StringDVar | TStringContent) part + # ) -> XString + def on_xstring_add(xstring, part) + XString.new( + parts: xstring.parts << part, + location: xstring.location.to(part.location) + ) + end + + # :call-seq: + # on_xstring_new: () -> XString + def on_xstring_new + heredoc = @heredocs[-1] + + location = + if heredoc && heredoc.beginning.value.include?('`') + heredoc.location + else + find_token(Backtick).location + end + + XString.new(parts: [], location: location) + end + + # XStringLiteral represents a string that gets executed. + # + # `ls` + # + class XStringLiteral + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # xstring + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('xstring_literal') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :xstring_literal, parts: parts, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_xstring_literal: (XString xstring) -> Heredoc | XStringLiteral + def on_xstring_literal(xstring) + heredoc = @heredocs[-1] + + if heredoc && heredoc.beginning.value.include?('`') + Heredoc.new( + beginning: heredoc.beginning, + ending: heredoc.ending, + parts: xstring.parts, + location: heredoc.location + ) + else + ending = find_token(TStringEnd) + + XStringLiteral.new( + parts: xstring.parts, + location: xstring.location.to(ending.location) + ) + end + end + + # Yield represents using the +yield+ keyword with arguments. + # + # yield value + # + class Yield + # [ArgsAddBlock | Paren] the arguments passed to the yield + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('yield') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :yield, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_yield: ((ArgsAddBlock | Paren) arguments) -> Yield + def on_yield(arguments) + keyword = find_token(Kw, 'yield') + + Yield.new( + arguments: arguments, + location: keyword.location.to(arguments.location) + ) + end + + # Yield0 represents the bare +yield+ keyword with no arguments. + # + # yield + # + class Yield0 + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('yield0') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :yield0, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_yield0: () -> Yield0 + def on_yield0 + keyword = find_token(Kw, 'yield') + + Yield0.new(value: keyword.value, location: keyword.location) + end + + # ZSuper represents the bare +super+ keyword with no arguments. + # + # super + # + class ZSuper + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of the node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('zsuper') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :zsuper, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_zsuper: () -> ZSuper + def on_zsuper + keyword = find_token(Kw, 'super') + + ZSuper.new(value: keyword.value, location: keyword.location) + end +end diff --git a/spec/syntax_suggest/fixtures/this_project_extra_def.rb.txt b/spec/syntax_suggest/fixtures/this_project_extra_def.rb.txt new file mode 100644 index 00000000000000..e62fd3fa6632ad --- /dev/null +++ b/spec/syntax_suggest/fixtures/this_project_extra_def.rb.txt @@ -0,0 +1,64 @@ +module SyntaxErrorSearch + # Used for formatting invalid blocks + class DisplayInvalidBlocks + attr_reader :filename + + def initialize(block_array, io: $stderr, filename: nil) + @filename = filename + @io = io + @blocks = block_array + @lines = @blocks.map(&:lines).flatten + @digit_count = @lines.last.line_number.to_s.length + @code_lines = @blocks.first.code_lines + + @invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true} + end + + def call + @io.puts <<~EOM + + SyntaxSuggest: A syntax error was detected + + This code has an unmatched `end` this is caused by either + missing a syntax keyword (`def`, `do`, etc.) or inclusion + of an extra `end` line: + EOM + + @io.puts(<<~EOM) if filename + file: #{filename} + EOM + + @io.puts <<~EOM + #{code_with_filename} + EOM + end + + def filename + + def code_with_filename + string = String.new("") + string << "```\n" + string << "#".rjust(@digit_count) + " filename: #{filename}\n\n" if filename + string << code_with_lines + string << "```\n" + string + end + + def code_with_lines + @code_lines.map do |line| + next if line.hidden? + number = line.line_number.to_s.rjust(@digit_count) + if line.empty? + "#{number.to_s}#{line}" + else + string = String.new + string << "\e[1;3m" if @invalid_line_hash[line] # Bold, italics + string << "#{number.to_s} " + string << line.to_s + string << "\e[0m" + string + end + end.join + end + end +end diff --git a/spec/syntax_suggest/fixtures/webmock.rb.txt b/spec/syntax_suggest/fixtures/webmock.rb.txt new file mode 100644 index 00000000000000..16da0d2ac0701c --- /dev/null +++ b/spec/syntax_suggest/fixtures/webmock.rb.txt @@ -0,0 +1,35 @@ +describe "webmock tests" do + before(:each) do + WebMock.enable! + end + + after(:each) do + WebMock.disable! + end + + it "port" do + port = rand(1000...9999) + stub_request(:any, "localhost:#{port}") + + query = Cutlass::FunctionQuery.new( + port: port + ).call + + expect(WebMock).to have_requested(:post, "localhost:#{port}"). + with(body: "{}") + end + + it "body" do + body = { lol: "hi" } + port = 8080 + stub_request(:any, "localhost:#{port}") + + query = Cutlass::FunctionQuery.new( + port: port + body: body + ).call + + expect(WebMock).to have_requested(:post, "localhost:#{port}"). + with(body: body.to_json) + end +end diff --git a/spec/syntax_suggest/integration/exe_cli_spec.rb b/spec/syntax_suggest/integration/exe_cli_spec.rb new file mode 100644 index 00000000000000..79e659a27aeda8 --- /dev/null +++ b/spec/syntax_suggest/integration/exe_cli_spec.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "exe" do + def exe_path + root_dir.join("exe").join("syntax_suggest") + end + + def exe(cmd) + out = run!("#{exe_path} #{cmd}", raise_on_nonzero_exit: false) + puts out if ENV["SYNTAX_SUGGEST_DEBUG"] + out + end + + it "prints the version" do + out = exe("-v") + expect(out.strip).to include(SyntaxSuggest::VERSION) + end + end +end diff --git a/spec/syntax_suggest/integration/ruby_command_line_spec.rb b/spec/syntax_suggest/integration/ruby_command_line_spec.rb new file mode 100644 index 00000000000000..7a1c5c654e65ff --- /dev/null +++ b/spec/syntax_suggest/integration/ruby_command_line_spec.rb @@ -0,0 +1,151 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "Requires with ruby cli" do + it "namespaces all monkeypatched methods" do + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~'EOM' + puts Kernel.private_methods + EOM + + syntax_suggest_methods_file = tmpdir.join("syntax_suggest_methods.txt") + api_only_methods_file = tmpdir.join("api_only_methods.txt") + kernel_methods_file = tmpdir.join("kernel_methods.txt") + + d_pid = Process.spawn("ruby -I#{lib_dir} -rsyntax_suggest #{script} 2>&1 > #{syntax_suggest_methods_file}") + k_pid = Process.spawn("ruby #{script} 2>&1 >> #{kernel_methods_file}") + r_pid = Process.spawn("ruby -I#{lib_dir} -rsyntax_suggest/api #{script} 2>&1 > #{api_only_methods_file}") + + Process.wait(k_pid) + Process.wait(d_pid) + Process.wait(r_pid) + + kernel_methods_array = kernel_methods_file.read.strip.lines.map(&:strip) + syntax_suggest_methods_array = syntax_suggest_methods_file.read.strip.lines.map(&:strip) + api_only_methods_array = api_only_methods_file.read.strip.lines.map(&:strip) + + # In ruby 3.1.0-preview1 the `timeout` file is already required + # we can remove it if it exists to normalize the output for + # all ruby versions + [syntax_suggest_methods_array, kernel_methods_array, api_only_methods_array].each do |array| + array.delete("timeout") + end + + methods = (syntax_suggest_methods_array - kernel_methods_array).sort + if methods.any? + expect(methods).to eq(["syntax_suggest_original_load", "syntax_suggest_original_require", "syntax_suggest_original_require_relative"]) + end + + methods = (api_only_methods_array - kernel_methods_array).sort + expect(methods).to eq([]) + end + end + + it "detects require error and adds a message with auto mode" do + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~EOM + describe "things" do + it "blerg" do + end + + it "flerg" + end + + it "zlerg" do + end + end + EOM + + require_rb = tmpdir.join("require.rb") + require_rb.write <<~EOM + load "#{script.expand_path}" + EOM + + out = `ruby -I#{lib_dir} -rsyntax_suggest #{require_rb} 2>&1` + + expect($?.success?).to be_falsey + expect(out).to include('❯ 5 it "flerg"').once + end + end + + it "annotates a syntax error in Ruby 3.2+ when require is not used" do + pending("Support for SyntaxError#detailed_message monkeypatch needed https://gist.github.com/schneems/09f45cc23b9a8c46e9af6acbb6e6840d?permalink_comment_id=4172585#gistcomment-4172585") + + skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") + + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~EOM + describe "things" do + it "blerg" do + end + + it "flerg" + end + + it "zlerg" do + end + end + EOM + + out = `ruby -I#{lib_dir} -rsyntax_suggest #{script} 2>&1` + + expect($?.success?).to be_falsey + expect(out).to include('❯ 5 it "flerg"').once + end + end + + it "does not load internals into memory if no syntax error" do + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~EOM + class Dog + end + + if defined?(SyntaxSuggest::DEFAULT_VALUE) + puts "SyntaxSuggest is loaded" + else + puts "SyntaxSuggest is NOT loaded" + end + EOM + + require_rb = tmpdir.join("require.rb") + require_rb.write <<~EOM + load "#{script.expand_path}" + EOM + + out = `ruby -I#{lib_dir} -rsyntax_suggest #{require_rb} 2>&1` + + expect($?.success?).to be_truthy + expect(out).to include("SyntaxSuggest is NOT loaded").once + end + end + + it "ignores eval" do + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~'EOM' + $stderr = STDOUT + eval("def lol") + EOM + + out = `ruby -I#{lib_dir} -rsyntax_suggest #{script} 2>&1` + + expect($?.success?).to be_falsey + expect(out).to include("(eval):1") + + expect(out).to_not include("SyntaxSuggest") + expect(out).to_not include("Could not find filename") + end + end + end +end diff --git a/spec/syntax_suggest/integration/syntax_suggest_spec.rb b/spec/syntax_suggest/integration/syntax_suggest_spec.rb new file mode 100644 index 00000000000000..a7287ff64e02a6 --- /dev/null +++ b/spec/syntax_suggest/integration/syntax_suggest_spec.rb @@ -0,0 +1,211 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "Integration tests that don't spawn a process (like using the cli)" do + it "does not timeout on massive files" do + next unless ENV["SYNTAX_SUGGEST_TIMEOUT"] + + file = fixtures_dir.join("syntax_tree.rb.txt") + lines = file.read.lines + lines.delete_at(768 - 1) + + io = StringIO.new + + benchmark = Benchmark.measure do + debug_perf do + SyntaxSuggest.call( + io: io, + source: lines.join, + filename: file + ) + end + debug_display(io.string) + debug_display(benchmark) + end + + expect(io.string).to include(<<~'EOM') + 6 class SyntaxTree < Ripper + 170 def self.parse(source) + 174 end + ❯ 754 def on_args_add(arguments, argument) + ❯ 776 class ArgsAddBlock + ❯ 810 end + 9233 end + EOM + end + + it "re-checks all block code, not just what's visible issues/95" do + file = fixtures_dir.join("ruby_buildpack.rb.txt") + io = StringIO.new + + debug_perf do + benchmark = Benchmark.measure do + SyntaxSuggest.call( + io: io, + source: file.read, + filename: file + ) + end + debug_display(io.string) + debug_display(benchmark) + end + + expect(io.string).to_not include("def ruby_install_binstub_path") + expect(io.string).to include(<<~'EOM') + ❯ 1067 def add_yarn_binary + ❯ 1068 return [] if yarn_preinstalled? + ❯ 1069 | + ❯ 1075 end + EOM + end + + it "returns good results on routes.rb" do + source = fixtures_dir.join("routes.rb.txt").read + + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + debug_display(io.string) + + expect(io.string).to include(<<~'EOM') + 1 Rails.application.routes.draw do + ❯ 113 namespace :admin do + ❯ 116 match "/foobar(*path)", via: :all, to: redirect { |_params, req| + ❯ 120 } + 121 end + EOM + end + + it "handles multi-line-methods issues/64" do + source = fixtures_dir.join("webmock.rb.txt").read + + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + debug_display(io.string) + + expect(io.string).to include(<<~'EOM') + 1 describe "webmock tests" do + 22 it "body" do + 27 query = Cutlass::FunctionQuery.new( + ❯ 28 port: port + ❯ 29 body: body + 30 ).call + 34 end + 35 end + EOM + end + + it "handles derailed output issues/50" do + source = fixtures_dir.join("derailed_require_tree.rb.txt").read + + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + debug_display(io.string) + + expect(io.string).to include(<<~'EOM') + 5 module DerailedBenchmarks + 6 class RequireTree + 7 REQUIRED_BY = {} + 9 attr_reader :name + 10 attr_writer :cost + ❯ 13 def initialize(name) + ❯ 18 def self.reset! + ❯ 25 end + 73 end + 74 end + EOM + end + + it "handles heredocs" do + lines = fixtures_dir.join("rexe.rb.txt").read.lines + lines.delete_at(85 - 1) + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: lines.join + ) + + out = io.string + debug_display(out) + + expect(out).to include(<<~EOM) + 16 class Rexe + ❯ 77 class Lookups + ❯ 78 def input_modes + ❯ 148 end + 551 end + EOM + end + + it "rexe" do + lines = fixtures_dir.join("rexe.rb.txt").read.lines + lines.delete_at(148 - 1) + source = lines.join + + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + out = io.string + expect(out).to include(<<~EOM) + 16 class Rexe + 18 VERSION = '1.5.1' + ❯ 77 class Lookups + ❯ 140 def format_requires + ❯ 148 end + 551 end + EOM + end + + it "ambiguous end" do + source = <<~'EOM' + def call # 0 + print "lol" # 1 + end # one # 2 + end # two # 3 + EOM + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + out = io.string + expect(out).to include(<<~EOM) + ❯ 1 def call # 0 + ❯ 3 end # one # 2 + ❯ 4 end # two # 3 + EOM + end + + it "simple regression" do + source = <<~'EOM' + class Dog + def bark + puts "woof" + end + EOM + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + out = io.string + expect(out).to include(<<~EOM) + ❯ 1 class Dog + ❯ 2 def bark + ❯ 4 end + EOM + end + end +end diff --git a/spec/syntax_suggest/spec_helper.rb b/spec/syntax_suggest/spec_helper.rb new file mode 100644 index 00000000000000..33f3ef37083d9f --- /dev/null +++ b/spec/syntax_suggest/spec_helper.rb @@ -0,0 +1,90 @@ +# frozen_string_literal: true + +require "bundler/setup" +require "syntax_suggest/api" + +require "benchmark" +require "tempfile" + +RSpec.configure do |config| + # Enable flags like --only-failures and --next-failure + config.example_status_persistence_file_path = ".rspec_status" + + # Disable RSpec exposing methods globally on `Module` and `main` + config.disable_monkey_patching! + + config.expect_with :rspec do |c| + c.syntax = :expect + end +end + +# Used for debugging modifications to +# display output +def debug_display(output) + return unless ENV["DEBUG_DISPLAY"] + puts + puts output + puts +end + +def spec_dir + Pathname(__dir__) +end + +def lib_dir + root_dir.join("lib") +end + +def root_dir + spec_dir.join("..") +end + +def fixtures_dir + spec_dir.join("fixtures") +end + +def code_line_array(source) + SyntaxSuggest::CleanDocument.new(source: source).call.lines +end + +autoload :RubyProf, "ruby-prof" + +def debug_perf + raise "No block given" unless block_given? + + if ENV["DEBUG_PERF"] + out = nil + result = RubyProf.profile do + out = yield + end + + dir = SyntaxSuggest.record_dir("tmp") + printer = RubyProf::MultiPrinter.new(result, [:flat, :graph, :graph_html, :tree, :call_tree, :stack, :dot]) + printer.print(path: dir, profile: "profile") + + out + else + yield + end +end + +def run!(cmd, raise_on_nonzero_exit: true) + out = `#{cmd} 2>&1` + raise "Command: #{cmd} failed: #{out}" if !$?.success? && raise_on_nonzero_exit + out +end + +# Allows us to write cleaner tests since <<~EOM block quotes +# strip off all leading indentation and we need it to be preserved +# sometimes. +class String + def indent(number) + lines.map do |line| + if line.chomp.empty? + line + else + " " * number + line + end + end.join + end +end diff --git a/spec/syntax_suggest/unit/api_spec.rb b/spec/syntax_suggest/unit/api_spec.rb new file mode 100644 index 00000000000000..284a4cdeec47c8 --- /dev/null +++ b/spec/syntax_suggest/unit/api_spec.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" +require "ruby-prof" + +module SyntaxSuggest + RSpec.describe "Top level SyntaxSuggest api" do + it "has a `handle_error` interface" do + fake_error = Object.new + def fake_error.message + "#{__FILE__}:216: unterminated string meets end of file " + end + + def fake_error.is_a?(v) + true + end + + io = StringIO.new + SyntaxSuggest.handle_error( + fake_error, + re_raise: false, + io: io + ) + + expect(io.string.strip).to eq("Syntax OK") + end + + it "raises original error with warning if a non-syntax error is passed" do + error = NameError.new("blerg") + io = StringIO.new + expect { + SyntaxSuggest.handle_error( + error, + re_raise: false, + io: io + ) + }.to raise_error { |e| + expect(io.string).to include("Must pass a SyntaxError") + expect(e).to eq(error) + } + end + + it "raises original error with warning if file is not found" do + fake_error = SyntaxError.new + def fake_error.message + "#does/not/exist/lol/doesnotexist:216: unterminated string meets end of file " + end + + io = StringIO.new + expect { + SyntaxSuggest.handle_error( + fake_error, + re_raise: false, + io: io + ) + }.to raise_error { |e| + expect(io.string).to include("Could not find filename") + expect(e).to eq(fake_error) + } + end + + it "respects highlight API" do + skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") + + error = SyntaxError.new("#{fixtures_dir.join("this_project_extra_def.rb.txt")}:1 ") + + require "syntax_suggest/core_ext" + + expect(error.detailed_message(highlight: true)).to include(SyntaxSuggest::DisplayCodeWithLineNumbers::TERMINAL_HIGHLIGHT) + expect(error.detailed_message(highlight: false)).to_not include(SyntaxSuggest::DisplayCodeWithLineNumbers::TERMINAL_HIGHLIGHT) + end + + it "can be disabled via falsey kwarg" do + skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") + + error = SyntaxError.new("#{fixtures_dir.join("this_project_extra_def.rb.txt")}:1 ") + + require "syntax_suggest/core_ext" + + expect(error.detailed_message(syntax_suggest: true)).to_not eq(error.detailed_message(syntax_suggest: false)) + end + end +end diff --git a/spec/syntax_suggest/unit/around_block_scan_spec.rb b/spec/syntax_suggest/unit/around_block_scan_spec.rb new file mode 100644 index 00000000000000..6053c3947e4351 --- /dev/null +++ b/spec/syntax_suggest/unit/around_block_scan_spec.rb @@ -0,0 +1,165 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe AroundBlockScan do + it "continues scan from last location even if scan is false" do + source = <<~'EOM' + print 'omg' + print 'lol' + print 'haha' + EOM + code_lines = CodeLine.from_source(source) + block = CodeBlock.new(lines: code_lines[1]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + .scan_neighbors + + expect(expand.code_block.to_s).to eq(source) + expand.scan_while { |line| false } + + expect(expand.code_block.to_s).to eq(source) + end + + it "scan_adjacent_indent works on first or last line" do + source_string = <<~EOM + def foo + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[4]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + .scan_adjacent_indent + + expect(expand.code_block.to_s).to eq(<<~EOM) + def foo + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + end + EOM + end + + it "expands indentation" do + source_string = <<~EOM + def foo + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[2]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + .stop_after_kw + .scan_adjacent_indent + + expect(expand.code_block.to_s).to eq(<<~EOM.indent(2)) + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + EOM + end + + it "can stop before hitting another end" do + source_string = <<~EOM + def lol + end + def foo + puts "lol" + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.stop_after_kw + expand.scan_while { true } + + expect(expand.code_block.to_s).to eq(<<~EOM) + def foo + puts "lol" + end + EOM + end + + it "captures multiple empty and hidden lines" do + source_string = <<~EOM + def foo + Foo.call + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.scan_while { true } + + expect(expand.before_index).to eq(0) + expect(expand.after_index).to eq(6) + expect(expand.code_block.to_s).to eq(source_string) + end + + it "only takes what you ask" do + source_string = <<~EOM + def foo + Foo.call + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.scan_while { |line| line.not_empty? } + + expect(expand.code_block.to_s).to eq(<<~EOM.indent(4)) + puts "lol" + EOM + end + + it "skips what you want" do + source_string = <<~EOM + def foo + Foo.call + + puts "haha" + # hide me + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + code_lines[4].mark_invisible + + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.skip(:empty?) + expand.skip(:hidden?) + expand.scan_neighbors + + expect(expand.code_block.to_s).to eq(<<~EOM.indent(4)) + + puts "haha" + + puts "lol" + + EOM + end + end +end diff --git a/spec/syntax_suggest/unit/block_expand_spec.rb b/spec/syntax_suggest/unit/block_expand_spec.rb new file mode 100644 index 00000000000000..ba0b0457a1d613 --- /dev/null +++ b/spec/syntax_suggest/unit/block_expand_spec.rb @@ -0,0 +1,200 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe BlockExpand do + it "captures multiple empty and hidden lines" do + source_string = <<~EOM + def foo + Foo.call + + + puts "lol" + + # hidden + end + end + EOM + + code_lines = code_line_array(source_string) + + code_lines[6].mark_invisible + + block = CodeBlock.new(lines: [code_lines[3]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(4)) + + + puts "lol" + + EOM + end + + it "captures multiple empty lines" do + source_string = <<~EOM + def foo + Foo.call + + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: [code_lines[3]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(4)) + + + puts "lol" + + EOM + end + + it "expands neighbors then indentation" do + source_string = <<~EOM + def foo + Foo.call + puts "hey" + puts "lol" + puts "sup" + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: [code_lines[3]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(4)) + puts "hey" + puts "lol" + puts "sup" + EOM + + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(2)) + Foo.call + puts "hey" + puts "lol" + puts "sup" + end + EOM + end + + it "handles else code" do + source_string = <<~EOM + Foo.call + if blerg + puts "lol" + else + puts "haha" + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: [code_lines[2]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(2)) + if blerg + puts "lol" + else + puts "haha" + end + EOM + end + + it "expand until next boundry (indentation)" do + source_string = <<~EOM + describe "what" do + Foo.call + end + + describe "hi" + Bar.call do + Foo.call + end + end + + it "blerg" do + end + EOM + + code_lines = code_line_array(source_string) + + block = CodeBlock.new( + lines: code_lines[6] + ) + + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(2)) + Bar.call do + Foo.call + end + EOM + + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM) + describe "hi" + Bar.call do + Foo.call + end + end + EOM + end + + it "expand until next boundry (empty lines)" do + source_string = <<~EOM + describe "what" do + end + + describe "hi" + end + + it "blerg" do + end + EOM + + code_lines = code_line_array(source_string) + expansion = BlockExpand.new(code_lines: code_lines) + + block = CodeBlock.new(lines: code_lines[3]) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM) + + describe "hi" + end + + EOM + + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM) + describe "what" do + end + + describe "hi" + end + + it "blerg" do + end + EOM + end + end +end diff --git a/spec/syntax_suggest/unit/capture_code_context_spec.rb b/spec/syntax_suggest/unit/capture_code_context_spec.rb new file mode 100644 index 00000000000000..e1bc281c13edd3 --- /dev/null +++ b/spec/syntax_suggest/unit/capture_code_context_spec.rb @@ -0,0 +1,202 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CaptureCodeContext do + it "capture_before_after_kws" do + source = <<~'EOM' + def sit + end + + def bark + + def eat + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines[0]) + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call + expect(lines.join).to eq(<<~'EOM') + def sit + end + def bark + def eat + end + EOM + end + + it "handles ambiguous end" do + source = <<~'EOM' + def call # 0 + print "lol" # 1 + end # one # 2 + end # two # 3 + EOM + + code_lines = CleanDocument.new(source: source).call.lines + code_lines[0..2].each(&:mark_invisible) + block = CodeBlock.new(lines: code_lines) + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call + + lines = lines.sort.map(&:original) + + expect(lines.join).to eq(<<~'EOM') + def call # 0 + end # one # 2 + end # two # 3 + EOM + end + + it "shows ends of captured block" do + lines = fixtures_dir.join("rexe.rb.txt").read.lines + lines.delete_at(148 - 1) + source = lines.join + + code_lines = CleanDocument.new(source: source).call.lines + + code_lines[0..75].each(&:mark_invisible) + code_lines[77..-1].each(&:mark_invisible) + expect(code_lines.join.strip).to eq("class Lookups") + + block = CodeBlock.new(lines: code_lines[76..149]) + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call + + lines = lines.sort.map(&:original) + expect(lines.join).to include(<<~'EOM'.indent(2)) + class Lookups + def format_requires + end + EOM + end + + it "shows ends of captured block" do + source = <<~'EOM' + class Dog + def bark + puts "woof" + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines) + code_lines[1..-1].each(&:mark_invisible) + + expect(block.to_s.strip).to eq("class Dog") + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call.sort.map(&:original) + expect(lines.join).to eq(<<~'EOM') + class Dog + def bark + end + EOM + end + + it "captures surrounding context on falling indent" do + source = <<~'EOM' + class Blerg + end + + class OH + + def hello + it "foo" do + end + end + + class Zerg + end + EOM + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines[6]) + + expect(block.to_s.strip).to eq('it "foo" do') + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call.sort.map(&:original) + expect(lines.join).to eq(<<~'EOM') + class OH + def hello + it "foo" do + end + end + EOM + end + + it "captures surrounding context on same indent" do + source = <<~'EOM' + class Blerg + end + class OH + + def nope + end + + def lol + end + + end # here + + def haha + end + + def nope + end + end + + class Zerg + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines[7..10]) + expect(block.to_s).to eq(<<~'EOM'.indent(2)) + def lol + end + + end # here + EOM + + code_context = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + + lines = code_context.call + out = DisplayCodeWithLineNumbers.new( + lines: lines + ).call + + expect(out).to eq(<<~'EOM'.indent(2)) + 3 class OH + 8 def lol + 9 end + 11 end # here + 18 end + EOM + end + end +end diff --git a/spec/syntax_suggest/unit/clean_document_spec.rb b/spec/syntax_suggest/unit/clean_document_spec.rb new file mode 100644 index 00000000000000..fa049ad8df5773 --- /dev/null +++ b/spec/syntax_suggest/unit/clean_document_spec.rb @@ -0,0 +1,259 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CleanDocument do + it "heredocs" do + source = fixtures_dir.join("this_project_extra_def.rb.txt").read + code_lines = CleanDocument.new(source: source).call.lines + + expect(code_lines[18 - 1].to_s).to eq(<<-'EOL') + @io.puts <<~EOM + + SyntaxSuggest: A syntax error was detected + + This code has an unmatched `end` this is caused by either + missing a syntax keyword (`def`, `do`, etc.) or inclusion + of an extra `end` line: + EOM + EOL + expect(code_lines[18].to_s).to eq("") + + expect(code_lines[27 - 1].to_s).to eq(<<-'EOL') + @io.puts(<<~EOM) if filename + file: #{filename} + EOM + EOL + expect(code_lines[27].to_s).to eq("") + + expect(code_lines[31 - 1].to_s).to eq(<<-'EOL') + @io.puts <<~EOM + #{code_with_filename} + EOM + EOL + expect(code_lines[31].to_s).to eq("") + end + + it "joins: multi line methods" do + source = <<~EOM + User + .where(name: 'schneems') + .first + EOM + + doc = CleanDocument.new(source: source).join_consecutive! + + expect(doc.lines[0].to_s).to eq(source) + expect(doc.lines[1].to_s).to eq("") + expect(doc.lines[2].to_s).to eq("") + expect(doc.lines[3]).to eq(nil) + + lines = doc.lines + expect( + DisplayCodeWithLineNumbers.new( + lines: lines + ).call + ).to eq(<<~'EOM'.indent(2)) + 1 User + 2 .where(name: 'schneems') + 3 .first + EOM + + expect( + DisplayCodeWithLineNumbers.new( + lines: lines, + highlight_lines: lines[0] + ).call + ).to eq(<<~'EOM') + ❯ 1 User + ❯ 2 .where(name: 'schneems') + ❯ 3 .first + EOM + end + + it "helper method: take_while_including" do + source = <<~EOM + User + .where(name: 'schneems') + .first + EOM + + doc = CleanDocument.new(source: source) + + lines = doc.take_while_including { |line| !line.to_s.include?("where") } + expect(lines.count).to eq(2) + end + + it "comments: removes comments" do + source = <<~EOM + # lol + puts "what" + # yolo + EOM + + out = CleanDocument.new(source: source).lines.join + expect(out.to_s).to eq(<<~EOM) + + puts "what" + + EOM + end + + it "whitespace: removes whitespace" do + source = " \n" + <<~EOM + puts "what" + EOM + + out = CleanDocument.new(source: source).lines.join + expect(out.to_s).to eq(<<~EOM) + + puts "what" + EOM + + expect(source.lines.first.to_s).to_not eq("\n") + expect(out.lines.first.to_s).to eq("\n") + end + + it "trailing slash: does not join trailing do" do + # Some keywords and syntaxes trigger the "ignored line" + # lex output, we ignore them by filtering by BEG + # + # The `do` keyword is one of these: + # https://gist.github.com/schneems/6a7d7f988d3329fb3bd4b5be3e2efc0c + source = <<~EOM + foo do + puts "lol" + end + EOM + + doc = CleanDocument.new(source: source).join_consecutive! + + expect(doc.lines[0].to_s).to eq(source.lines[0]) + expect(doc.lines[1].to_s).to eq(source.lines[1]) + expect(doc.lines[2].to_s).to eq(source.lines[2]) + end + + it "trailing slash: formats output" do + source = <<~'EOM' + context "timezones workaround" do + it "should receive a time in UTC format and return the time with the"\ + "office's UTC offset substracted from it" do + travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + office = build(:office) + end + end + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + expect( + DisplayCodeWithLineNumbers.new( + lines: code_lines.select(&:visible?) + ).call + ).to eq(<<~'EOM'.indent(2)) + 1 context "timezones workaround" do + 2 it "should receive a time in UTC format and return the time with the"\ + 3 "office's UTC offset substracted from it" do + 4 travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + 5 office = build(:office) + 6 end + 7 end + 8 end + EOM + + expect( + DisplayCodeWithLineNumbers.new( + lines: code_lines.select(&:visible?), + highlight_lines: code_lines[1] + ).call + ).to eq(<<~'EOM') + 1 context "timezones workaround" do + ❯ 2 it "should receive a time in UTC format and return the time with the"\ + ❯ 3 "office's UTC offset substracted from it" do + 4 travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + 5 office = build(:office) + 6 end + 7 end + 8 end + EOM + end + + it "trailing slash: basic detection" do + source = <<~'EOM' + it "trailing s" \ + "lash" do + EOM + + code_lines = CleanDocument.new(source: source).call.lines + + expect(code_lines[0]).to_not be_hidden + expect(code_lines[1]).to be_hidden + + expect( + code_lines.join + ).to eq(code_lines.map(&:original).join) + end + + it "trailing slash: joins multiple lines" do + source = <<~'EOM' + it "should " \ + "keep " \ + "going " do + end + EOM + + doc = CleanDocument.new(source: source).join_trailing_slash! + expect(doc.lines[0].to_s).to eq(source.lines[0..2].join) + expect(doc.lines[1].to_s).to eq("") + expect(doc.lines[2].to_s).to eq("") + expect(doc.lines[3].to_s).to eq(source.lines[3]) + + lines = doc.lines + expect( + DisplayCodeWithLineNumbers.new( + lines: lines + ).call + ).to eq(<<~'EOM'.indent(2)) + 1 it "should " \ + 2 "keep " \ + 3 "going " do + 4 end + EOM + + expect( + DisplayCodeWithLineNumbers.new( + lines: lines, + highlight_lines: lines[0] + ).call + ).to eq(<<~'EOM') + ❯ 1 it "should " \ + ❯ 2 "keep " \ + ❯ 3 "going " do + 4 end + EOM + end + + it "trailing slash: no false positives" do + source = <<~'EOM' + def formatters + @formatters ||= { + amazing_print: ->(obj) { obj.ai + "\n" }, + inspect: ->(obj) { obj.inspect + "\n" }, + json: ->(obj) { obj.to_json }, + marshal: ->(obj) { Marshal.dump(obj) }, + none: ->(_obj) { nil }, + pretty_json: ->(obj) { JSON.pretty_generate(obj) }, + pretty_print: ->(obj) { obj.pretty_inspect }, + puts: ->(obj) { require 'stringio'; sio = StringIO.new; sio.puts(obj); sio.string }, + to_s: ->(obj) { obj.to_s + "\n" }, + yaml: ->(obj) { obj.to_yaml }, + } + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + expect(code_lines.join).to eq(code_lines.join) + end + end +end diff --git a/spec/syntax_suggest/unit/cli_spec.rb b/spec/syntax_suggest/unit/cli_spec.rb new file mode 100644 index 00000000000000..fecf3e304c9361 --- /dev/null +++ b/spec/syntax_suggest/unit/cli_spec.rb @@ -0,0 +1,224 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + class FakeExit + def initialize + @called = false + @value = nil + end + + def exit(value = nil) + @called = true + @value = value + end + + def called? + @called + end + + attr_reader :value + end + + RSpec.describe Cli do + it "parses valid code" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + file = dir.join("script.rb") + file.write("puts 'lol'") + + io = StringIO.new + exit_obj = FakeExit.new + Cli.new( + io: io, + argv: [file.to_s], + exit_obj: exit_obj + ).call + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(0) + expect(io.string.strip).to eq("Syntax OK") + end + end + + it "parses invalid code" do + file = fixtures_dir.join("this_project_extra_def.rb.txt") + + io = StringIO.new + exit_obj = FakeExit.new + Cli.new( + io: io, + argv: [file.to_s], + exit_obj: exit_obj + ).call + + out = io.string + debug_display(out) + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(1) + expect(out.strip).to include("❯ 36 def filename") + end + + it "parses valid code with flags" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + file = dir.join("script.rb") + file.write("puts 'lol'") + + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--terminal", file.to_s], + exit_obj: exit_obj + ) + cli.call + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(0) + expect(cli.options[:terminal]).to be_truthy + expect(io.string.strip).to eq("Syntax OK") + end + end + + it "errors when no file given" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--terminal"], + exit_obj: exit_obj + ) + cli.call + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(1) + expect(io.string.strip).to eq("No file given") + end + + it "errors when file does not exist" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["lol-i-d-o-not-ex-ist-yololo.txtblerglol"], + exit_obj: exit_obj + ) + cli.call + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(1) + expect(io.string.strip).to include("file not found:") + end + + # We cannot execute the parser here + # because it calls `exit` and it will exit + # our tests, however we can assert that the + # parser has the right value for version + it "-v version" do + io = StringIO.new + exit_obj = FakeExit.new + parser = Cli.new( + io: io, + argv: ["-v"], + exit_obj: exit_obj + ).parser + + expect(parser.version).to include(SyntaxSuggest::VERSION.to_s) + end + + it "SYNTAX_SUGGEST_RECORD_DIR" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: [], + env: {"SYNTAX_SUGGEST_RECORD_DIR" => "hahaha"}, + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:record_dir]).to eq("hahaha") + end + + it "--record-dir=" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--record=lol"], + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:record_dir]).to eq("lol") + end + + it "terminal default to respecting TTY" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: [], + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:terminal]).to eq(SyntaxSuggest::DEFAULT_VALUE) + end + + it "--terminal" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--terminal"], + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:terminal]).to be_truthy + end + + it "--no-terminal" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--no-terminal"], + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:terminal]).to be_falsey + end + + it "--help outputs help" do + io = StringIO.new + exit_obj = FakeExit.new + Cli.new( + io: io, + argv: ["--help"], + exit_obj: exit_obj + ).call + + expect(exit_obj.called?).to be_truthy + expect(io.string).to include("Usage: syntax_suggest [options]") + end + + it " outputs help" do + io = StringIO.new + exit_obj = FakeExit.new + Cli.new( + io: io, + argv: [], + exit_obj: exit_obj + ).call + + expect(exit_obj.called?).to be_truthy + expect(io.string).to include("Usage: syntax_suggest [options]") + end + end +end diff --git a/spec/syntax_suggest/unit/code_block_spec.rb b/spec/syntax_suggest/unit/code_block_spec.rb new file mode 100644 index 00000000000000..3ab2751b271597 --- /dev/null +++ b/spec/syntax_suggest/unit/code_block_spec.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CodeBlock do + it "can detect if it's valid or not" do + code_lines = code_line_array(<<~EOM) + def foo + puts 'lol' + end + EOM + + block = CodeBlock.new(lines: code_lines[1]) + expect(block.valid?).to be_truthy + end + + it "can be sorted in indentation order" do + code_lines = code_line_array(<<~EOM) + def foo + puts 'lol' + end + EOM + + block_0 = CodeBlock.new(lines: code_lines[0]) + block_1 = CodeBlock.new(lines: code_lines[1]) + block_2 = CodeBlock.new(lines: code_lines[2]) + + expect(block_0 <=> block_0.dup).to eq(0) + expect(block_1 <=> block_0).to eq(1) + expect(block_1 <=> block_2).to eq(-1) + + array = [block_2, block_1, block_0].sort + expect(array.last).to eq(block_2) + + block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4, lex: [])) + array.prepend(block) + expect(array.max).to eq(block) + end + + it "knows it's current indentation level" do + code_lines = code_line_array(<<~EOM) + def foo + puts 'lol' + end + EOM + + block = CodeBlock.new(lines: code_lines[1]) + expect(block.current_indent).to eq(2) + + block = CodeBlock.new(lines: code_lines[0]) + expect(block.current_indent).to eq(0) + end + + it "knows it's current indentation level when mismatched indents" do + code_lines = code_line_array(<<~EOM) + def foo + puts 'lol' + end + EOM + + block = CodeBlock.new(lines: [code_lines[1], code_lines[2]]) + expect(block.current_indent).to eq(1) + end + + it "before lines and after lines" do + code_lines = code_line_array(<<~EOM) + def foo + bar; end + end + EOM + + block = CodeBlock.new(lines: code_lines[1]) + expect(block.valid?).to be_falsey + end + end +end diff --git a/spec/syntax_suggest/unit/code_frontier_spec.rb b/spec/syntax_suggest/unit/code_frontier_spec.rb new file mode 100644 index 00000000000000..c9aba7c8d80a42 --- /dev/null +++ b/spec/syntax_suggest/unit/code_frontier_spec.rb @@ -0,0 +1,135 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CodeFrontier do + it "detect_bad_blocks" do + code_lines = code_line_array(<<~EOM) + describe "lol" do + end + end + + it "lol" do + end + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + blocks = [] + blocks << CodeBlock.new(lines: code_lines[1]) + blocks << CodeBlock.new(lines: code_lines[5]) + blocks.each do |b| + frontier << b + end + + expect(frontier.detect_invalid_blocks.sort).to eq(blocks.sort) + end + + it "self.combination" do + expect( + CodeFrontier.combination([:a, :b, :c, :d]) + ).to eq( + [ + [:a], [:b], [:c], [:d], + [:a, :b], + [:a, :c], + [:a, :d], + [:b, :c], + [:b, :d], + [:c, :d], + [:a, :b, :c], + [:a, :b, :d], + [:a, :c, :d], + [:b, :c, :d], + [:a, :b, :c, :d] + ] + ) + end + + it "doesn't duplicate blocks" do + code_lines = code_line_array(<<~EOM) + def foo + puts "lol" + puts "lol" + puts "lol" + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + frontier << CodeBlock.new(lines: [code_lines[2]]) + expect(frontier.count).to eq(1) + + frontier << CodeBlock.new(lines: [code_lines[1], code_lines[2], code_lines[3]]) + # expect(frontier.count).to eq(1) + expect(frontier.pop.to_s).to eq(<<~EOM.indent(2)) + puts "lol" + puts "lol" + puts "lol" + EOM + + expect(frontier.pop).to be_nil + + code_lines = code_line_array(<<~EOM) + def foo + puts "lol" + puts "lol" + puts "lol" + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + frontier << CodeBlock.new(lines: [code_lines[2]]) + expect(frontier.count).to eq(1) + + frontier << CodeBlock.new(lines: [code_lines[3]]) + expect(frontier.count).to eq(2) + expect(frontier.pop.to_s).to eq(<<~EOM.indent(2)) + puts "lol" + EOM + end + + it "detects if multiple syntax errors are found" do + code_lines = code_line_array(<<~EOM) + def foo + end + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + + frontier << CodeBlock.new(lines: code_lines[1]) + block = frontier.pop + expect(block.to_s).to eq(<<~EOM.indent(2)) + end + EOM + frontier << block + + expect(frontier.holds_all_syntax_errors?).to be_truthy + end + + it "detects if it has not captured all syntax errors" do + code_lines = code_line_array(<<~EOM) + def foo + puts "lol" + end + + describe "lol" + end + + it "lol" + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + frontier << CodeBlock.new(lines: [code_lines[1]]) + block = frontier.pop + expect(block.to_s).to eq(<<~EOM.indent(2)) + puts "lol" + EOM + frontier << block + + expect(frontier.holds_all_syntax_errors?).to be_falsey + end + end +end diff --git a/spec/syntax_suggest/unit/code_line_spec.rb b/spec/syntax_suggest/unit/code_line_spec.rb new file mode 100644 index 00000000000000..cc4fa48bc9efea --- /dev/null +++ b/spec/syntax_suggest/unit/code_line_spec.rb @@ -0,0 +1,164 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CodeLine do + it "bug in keyword detection" do + lines = CodeLine.from_source(<<~'EOM') + def to_json(*opts) + { + type: :module, + }.to_json(*opts) + end + EOM + expect(lines.count(&:is_kw?)).to eq(1) + expect(lines.count(&:is_end?)).to eq(1) + end + + it "supports endless method definitions" do + skip("Unsupported ruby version") unless Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3") + + line = CodeLine.from_source(<<~'EOM').first + def square(x) = x * x + EOM + + expect(line.is_kw?).to be_falsey + expect(line.is_end?).to be_falsey + end + + it "retains original line value, after being marked invisible" do + line = CodeLine.from_source(<<~'EOM').first + puts "lol" + EOM + expect(line.line).to match('puts "lol"') + line.mark_invisible + expect(line.line).to eq("") + expect(line.original).to match('puts "lol"') + end + + it "knows which lines can be joined" do + code_lines = CodeLine.from_source(<<~'EOM') + user = User. + where(name: 'schneems'). + first + puts user.name + EOM + + # Indicates line 1 can join 2, 2 can join 3, but 3 won't join it's next line + expect(code_lines.map(&:ignore_newline_not_beg?)).to eq([true, true, false, false]) + end + it "trailing if" do + code_lines = CodeLine.from_source(<<~'EOM') + puts "lol" if foo + if foo + end + EOM + + expect(code_lines.map(&:is_kw?)).to eq([false, true, false]) + end + + it "trailing unless" do + code_lines = CodeLine.from_source(<<~'EOM') + puts "lol" unless foo + unless foo + end + EOM + + expect(code_lines.map(&:is_kw?)).to eq([false, true, false]) + end + + it "trailing slash" do + code_lines = CodeLine.from_source(<<~'EOM') + it "trailing s" \ + "lash" do + EOM + + expect(code_lines.map(&:trailing_slash?)).to eq([true, false]) + + code_lines = CodeLine.from_source(<<~'EOM') + amazing_print: ->(obj) { obj.ai + "\n" }, + EOM + expect(code_lines.map(&:trailing_slash?)).to eq([false]) + end + + it "knows it's got an end" do + line = CodeLine.from_source(" end").first + + expect(line.is_end?).to be_truthy + expect(line.is_kw?).to be_falsey + end + + it "knows it's got a keyword" do + line = CodeLine.from_source(" if").first + + expect(line.is_end?).to be_falsey + expect(line.is_kw?).to be_truthy + end + + it "ignores marked lines" do + code_lines = CodeLine.from_source(<<~EOM) + def foo + Array(value) |x| + end + end + EOM + + expect(SyntaxSuggest.valid?(code_lines)).to be_falsey + expect(code_lines.join).to eq(<<~EOM) + def foo + Array(value) |x| + end + end + EOM + + expect(code_lines[0].visible?).to be_truthy + expect(code_lines[3].visible?).to be_truthy + + code_lines[0].mark_invisible + code_lines[3].mark_invisible + + expect(code_lines[0].visible?).to be_falsey + expect(code_lines[3].visible?).to be_falsey + + expect(code_lines.join).to eq(<<~EOM.indent(2)) + Array(value) |x| + end + EOM + expect(SyntaxSuggest.valid?(code_lines)).to be_falsey + end + + it "knows empty lines" do + code_lines = CodeLine.from_source(<<~EOM) + # Not empty + + # Not empty + EOM + + expect(code_lines.map(&:empty?)).to eq([false, true, false]) + expect(code_lines.map(&:not_empty?)).to eq([true, false, true]) + expect(code_lines.map { |l| SyntaxSuggest.valid?(l) }).to eq([true, true, true]) + end + + it "counts indentations" do + code_lines = CodeLine.from_source(<<~EOM) + def foo + Array(value) |x| + puts 'lol' + end + end + EOM + + expect(code_lines.map(&:indent)).to eq([0, 2, 4, 2, 0]) + end + + it "doesn't count empty lines as having an indentation" do + code_lines = CodeLine.from_source(<<~EOM) + + + EOM + + expect(code_lines.map(&:indent)).to eq([0, 0]) + end + end +end diff --git a/spec/syntax_suggest/unit/code_search_spec.rb b/spec/syntax_suggest/unit/code_search_spec.rb new file mode 100644 index 00000000000000..b62b2c0a3c9b03 --- /dev/null +++ b/spec/syntax_suggest/unit/code_search_spec.rb @@ -0,0 +1,505 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CodeSearch do + it "rexe regression" do + lines = fixtures_dir.join("rexe.rb.txt").read.lines + lines.delete_at(148 - 1) + source = lines.join + + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join.strip).to eq(<<~'EOM'.strip) + class Lookups + EOM + end + + it "squished do regression" do + source = <<~'EOM' + def call + trydo + + @options = CommandLineParser.new.parse + + options.requires.each { |r| require!(r) } + load_global_config_if_exists + options.loads.each { |file| load(file) } + + @user_source_code = ARGV.join(' ') + @user_source_code = 'self' if @user_source_code == '' + + @callable = create_callable + + init_rexe_context + init_parser_and_formatters + + # This is where the user's source code will be executed; the action will in turn call `execute`. + lookup_action(options.input_mode).call unless options.noop + + output_log_entry + end # one + end # two + EOM + + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + trydo + end # one + EOM + end + + it "regression test ambiguous end" do + source = <<~'EOM' + def call # 0 + print "lol" # 1 + end # one # 2 + end # two # 3 + EOM + + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + end # two # 3 + EOM + end + + it "regression dog test" do + source = <<~'EOM' + class Dog + def bark + puts "woof" + end + EOM + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + class Dog + EOM + expect(search.invalid_blocks.first.lines.length).to eq(4) + end + + it "handles mismatched |" do + source = <<~EOM + class Blerg + Foo.call do |a + end # one + + puts lol + class Foo + end # two + end # three + EOM + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + Foo.call do |a + end # one + EOM + end + + it "handles mismatched }" do + source = <<~EOM + class Blerg + Foo.call do { + + puts lol + class Foo + end # two + end # three + EOM + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + Foo.call do { + EOM + end + + it "handles no spaces between blocks and trailing slash" do + source = <<~'EOM' + require "rails_helper" + RSpec.describe Foo, type: :model do + describe "#bar" do + context "context" do + it "foos the bar with a foo and then bazes the foo with a bar to"\ + "fooify the barred bar" do + travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + foo = build(:foo) + end + end + end + end + describe "#baz?" do + context "baz has barred the foo" do + it "returns true" do # <== HERE + end + end + end + EOM + + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join.strip).to eq('it "returns true" do # <== HERE') + end + + it "handles no spaces between blocks" do + source = <<~'EOM' + context "foo bar" do + it "bars the foo" do + travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + end + end + end + context "test" do + it "should" do + end + EOM + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join.strip).to eq('it "should" do') + end + + it "records debugging steps to a directory" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + search = CodeSearch.new(<<~'EOM', record_dir: dir) + class OH + def hello + def hai + end + end + EOM + search.call + + expect(search.record_dir.entries.map(&:to_s)).to include("1-add-1-(3__4).txt") + expect(search.record_dir.join("1-add-1-(3__4).txt").read).to include(<<~EOM) + 1 class OH + 2 def hello + ❯ 3 def hai + ❯ 4 end + 5 end + EOM + end + end + + it "def with missing end" do + search = CodeSearch.new(<<~'EOM') + class OH + def hello + + def hai + puts "lol" + end + end + EOM + search.call + + expect(search.invalid_blocks.join.strip).to eq("def hello") + + search = CodeSearch.new(<<~'EOM') + class OH + def hello + + def hai + end + end + EOM + search.call + + expect(search.invalid_blocks.join.strip).to eq("def hello") + + search = CodeSearch.new(<<~'EOM') + class OH + def hello + def hai + end + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + def hello + EOM + end + + describe "real world cases" do + it "finds hanging def in this project" do + source_string = fixtures_dir.join("this_project_extra_def.rb.txt").read + search = CodeSearch.new(source_string) + search.call + + document = DisplayCodeWithLineNumbers.new( + lines: search.code_lines.select(&:visible?), + terminal: false, + highlight_lines: search.invalid_blocks.flat_map(&:lines) + ).call + + expect(document).to include(<<~'EOM') + ❯ 36 def filename + EOM + end + + it "Format Code blocks real world example" do + search = CodeSearch.new(<<~'EOM') + require 'rails_helper' + + RSpec.describe AclassNameHere, type: :worker do + describe "thing" do + context "when" do + let(:thing) { stuff } + let(:another_thing) { moarstuff } + subject { foo.new.perform(foo.id, true) } + + it "stuff" do + subject + + expect(foo.foo.foo).to eq(true) + end + end + end # line 16 accidental end, but valid block + + context "stuff" do + let(:thing) { create(:foo, foo: stuff) } + let(:another_thing) { create(:stuff) } + + subject { described_class.new.perform(foo.id, false) } + + it "more stuff" do + subject + + expect(foo.foo.foo).to eq(false) + end + end + end # mismatched due to 16 + end + EOM + search.call + + document = DisplayCodeWithLineNumbers.new( + lines: search.code_lines.select(&:visible?), + terminal: false, + highlight_lines: search.invalid_blocks.flat_map(&:lines) + ).call + + expect(document).to include(<<~'EOM') + 1 require 'rails_helper' + 2 + 3 RSpec.describe AclassNameHere, type: :worker do + ❯ 4 describe "thing" do + ❯ 16 end # line 16 accidental end, but valid block + ❯ 30 end # mismatched due to 16 + 31 end + EOM + end + end + + # For code that's not perfectly formatted, we ideally want to do our best + # These examples represent the results that exist today, but I would like to improve upon them + describe "needs improvement" do + describe "mis-matched-indentation" do + it "extra space before end" do + search = CodeSearch.new(<<~'EOM') + Foo.call + def foo + puts "lol" + puts "lol" + end # one + end # two + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end # two + EOM + end + + it "stacked ends 2" do + search = CodeSearch.new(<<~'EOM') + def cat + blerg + end + + Foo.call do + end # one + end # two + + def dog + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call do + end # one + end # two + + EOM + end + + it "stacked ends " do + search = CodeSearch.new(<<~'EOM') + Foo.call + def foo + puts "lol" + puts "lol" + end + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end + EOM + end + + it "missing space before end" do + search = CodeSearch.new(<<~'EOM') + Foo.call + + def foo + puts "lol" + puts "lol" + end + end + EOM + search.call + + # expand-1 and expand-2 seem to be broken? + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end + EOM + end + end + end + + it "returns syntax error in outer block without inner block" do + search = CodeSearch.new(<<~'EOM') + Foo.call + def foo + puts "lol" + puts "lol" + end # one + end # two + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end # two + EOM + end + + it "doesn't just return an empty `end`" do + search = CodeSearch.new(<<~'EOM') + Foo.call + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end + EOM + end + + it "finds multiple syntax errors" do + search = CodeSearch.new(<<~'EOM') + describe "hi" do + Foo.call + end + end + + it "blerg" do + Bar.call + end + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + Foo.call + end + Bar.call + end + EOM + end + + it "finds a typo def" do + search = CodeSearch.new(<<~'EOM') + defzfoo + puts "lol" + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + defzfoo + end + EOM + end + + it "finds a mis-matched def" do + search = CodeSearch.new(<<~'EOM') + def foo + def blerg + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + def blerg + EOM + end + + it "finds a naked end" do + search = CodeSearch.new(<<~'EOM') + def foo + end # one + end # two + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + end # one + EOM + end + + it "returns when no invalid blocks are found" do + search = CodeSearch.new(<<~'EOM') + def foo + puts 'lol' + end + EOM + search.call + + expect(search.invalid_blocks).to eq([]) + end + + it "expands frontier by eliminating valid lines" do + search = CodeSearch.new(<<~'EOM') + def foo + puts 'lol' + end + EOM + search.create_blocks_from_untracked_lines + + expect(search.code_lines.join).to eq(<<~'EOM') + def foo + end + EOM + end + end +end diff --git a/spec/syntax_suggest/unit/display_invalid_blocks_spec.rb b/spec/syntax_suggest/unit/display_invalid_blocks_spec.rb new file mode 100644 index 00000000000000..c696132782cb26 --- /dev/null +++ b/spec/syntax_suggest/unit/display_invalid_blocks_spec.rb @@ -0,0 +1,172 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe DisplayInvalidBlocks do + it "works with valid code" do + syntax_string = <<~EOM + class OH + def hello + end + def hai + end + end + EOM + + search = CodeSearch.new(syntax_string) + search.call + + io = StringIO.new + display = DisplayInvalidBlocks.new( + io: io, + blocks: search.invalid_blocks, + terminal: false, + code_lines: search.code_lines + ) + display.call + expect(io.string).to include("Syntax OK") + end + + it "selectively prints to terminal if input is a tty by default" do + source = <<~EOM + class OH + def hello + def hai + end + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + + io = StringIO.new + def io.isatty + true + end + + block = CodeBlock.new(lines: code_lines[1]) + display = DisplayInvalidBlocks.new( + io: io, + blocks: block, + code_lines: code_lines + ) + display.call + expect(io.string).to include([ + "❯ 2 ", + DisplayCodeWithLineNumbers::TERMINAL_HIGHLIGHT, + " def hello" + ].join) + + io = StringIO.new + def io.isatty + false + end + + block = CodeBlock.new(lines: code_lines[1]) + display = DisplayInvalidBlocks.new( + io: io, + blocks: block, + code_lines: code_lines + ) + display.call + expect(io.string).to include("❯ 2 def hello") + end + + it "outputs to io when using `call`" do + source = <<~EOM + class OH + def hello + def hai + end + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + + io = StringIO.new + block = CodeBlock.new(lines: code_lines[1]) + display = DisplayInvalidBlocks.new( + io: io, + blocks: block, + terminal: false, + code_lines: code_lines + ) + display.call + expect(io.string).to include("❯ 2 def hello") + end + + it " wraps code with github style codeblocks" do + source = <<~EOM + class OH + def hello + + def hai + end + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines[1]) + io = StringIO.new + DisplayInvalidBlocks.new( + io: io, + blocks: block, + terminal: false, + code_lines: code_lines + ).call + expect(io.string).to include(<<~EOM) + 1 class OH + ❯ 2 def hello + 4 def hai + 5 end + 6 end + EOM + end + + it "shows terminal characters" do + code_lines = code_line_array(<<~EOM) + class OH + def hello + def hai + end + end + EOM + + io = StringIO.new + block = CodeBlock.new(lines: code_lines[1]) + DisplayInvalidBlocks.new( + io: io, + blocks: block, + terminal: false, + code_lines: code_lines + ).call + + expect(io.string).to include([ + " 1 class OH", + "❯ 2 def hello", + " 4 end", + " 5 end", + "" + ].join($/)) + + block = CodeBlock.new(lines: code_lines[1]) + io = StringIO.new + DisplayInvalidBlocks.new( + io: io, + blocks: block, + terminal: true, + code_lines: code_lines + ).call + + expect(io.string).to include( + [ + " 1 class OH", + ["❯ 2 ", DisplayCodeWithLineNumbers::TERMINAL_HIGHLIGHT, " def hello"].join, + " 4 end", + " 5 end", + "" + ].join($/ + DisplayCodeWithLineNumbers::TERMINAL_END) + ) + end + end +end diff --git a/spec/syntax_suggest/unit/explain_syntax_spec.rb b/spec/syntax_suggest/unit/explain_syntax_spec.rb new file mode 100644 index 00000000000000..394981dcf662cd --- /dev/null +++ b/spec/syntax_suggest/unit/explain_syntax_spec.rb @@ -0,0 +1,255 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "ExplainSyntax" do + it "handles shorthand syntaxes with non-bracket characters" do + source = <<~EOM + %Q* lol + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + expect(explain.errors.join).to include("unterminated string") + end + + it "handles %w[]" do + source = <<~EOM + node.is_a?(Op) && %w[| ||].include?(node.value) && + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + end + + it "doesn't falsely identify strings or symbols as critical chars" do + source = <<~EOM + a = ['(', '{', '[', '|'] + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + + source = <<~EOM + a = [:'(', :'{', :'[', :'|'] + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + end + + it "finds missing |" do + source = <<~EOM + Foo.call do | + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["|"]) + expect(explain.errors).to eq([explain.why("|")]) + end + + it "finds missing {" do + source = <<~EOM + class Cat + lol = { + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["}"]) + expect(explain.errors).to eq([explain.why("}")]) + end + + it "finds missing }" do + source = <<~EOM + def foo + lol = "foo" => :bar } + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["{"]) + expect(explain.errors).to eq([explain.why("{")]) + end + + it "finds missing [" do + source = <<~EOM + class Cat + lol = [ + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["]"]) + expect(explain.errors).to eq([explain.why("]")]) + end + + it "finds missing ]" do + source = <<~EOM + def foo + lol = ] + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["["]) + expect(explain.errors).to eq([explain.why("[")]) + end + + it "finds missing (" do + source = "def initialize; ); end" + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["("]) + expect(explain.errors).to eq([explain.why("(")]) + end + + it "finds missing )" do + source = "def initialize; (; end" + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([")"]) + expect(explain.errors).to eq([explain.why(")")]) + end + + it "finds missing keyword" do + source = <<~EOM + class Cat + end + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["keyword"]) + expect(explain.errors).to eq([explain.why("keyword")]) + end + + it "finds missing end" do + source = <<~EOM + class Cat + def meow + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["end"]) + expect(explain.errors).to eq([explain.why("end")]) + end + + it "falls back to ripper on unknown errors" do + source = <<~EOM + class Cat + def meow + 1 * + end + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + expect(explain.errors).to eq(RipperErrors.new(source).call.errors) + end + + it "handles an unexpected rescue" do + source = <<~EOM + def foo + if bar + "baz" + else + "foo" + rescue FooBar + nil + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["end"]) + end + + # String embeds are `"#{foo} <-- here` + # + # We need to count a `#{` as a `{` + # otherwise it will report that we are + # missing a curly when we are using valid + # string embed syntax + it "is not confused by valid string embed" do + source = <<~'EOM' + foo = "#{hello}" + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + expect(explain.missing).to eq([]) + end + + # Missing string embed beginnings are not a + # syntax error. i.e. `"foo}"` or `"{foo}` or "#foo}" + # would just be strings with extra characters. + # + # However missing the end curly will trigger + # an error: i.e. `"#{foo` + # + # String embed beginning is a `#{` rather than + # a `{`, make sure we handle that case and + # report the correct missing `}` diagnosis + it "finds missing string embed end" do + source = <<~'EOM' + "#{foo + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["}"]) + end + end +end diff --git a/spec/syntax_suggest/unit/lex_all_spec.rb b/spec/syntax_suggest/unit/lex_all_spec.rb new file mode 100644 index 00000000000000..0c0df7cfaa297e --- /dev/null +++ b/spec/syntax_suggest/unit/lex_all_spec.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "EndBlockParse" do + it "finds blocks based on `end` keyword" do + source = <<~EOM + describe "cat" # 1 + Cat.call do # 2 + end # 3 + end # 4 + # 5 + it "dog" do # 6 + Dog.call do # 7 + end # 8 + end # 9 + EOM + + # raw_lex = Ripper.lex(source) + # expect(raw_lex.to_s).to_not include("dog") + + lex = LexAll.new(source: source) + expect(lex.map(&:token).to_s).to include("dog") + expect(lex.first.line).to eq(1) + expect(lex.last.line).to eq(9) + end + end +end diff --git a/spec/syntax_suggest/unit/pathname_from_message_spec.rb b/spec/syntax_suggest/unit/pathname_from_message_spec.rb new file mode 100644 index 00000000000000..76756efda9df2d --- /dev/null +++ b/spec/syntax_suggest/unit/pathname_from_message_spec.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "PathnameFromMessage" do + it "handles filenames with colons in them" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + + file = dir.join("scr:atch.rb").tap { |p| FileUtils.touch(p) } + + message = "#{file}:2:in `require_relative': /private/tmp/bad.rb:1: syntax error, unexpected `end' (SyntaxError)" + file = PathnameFromMessage.new(message).call.name + + expect(file).to be_truthy + end + end + + it "checks if the file exists" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + + file = dir.join("scratch.rb") + # No touch, file does not exist + expect(file.exist?).to be_falsey + + message = "#{file}:2:in `require_relative': /private/tmp/bad.rb:1: syntax error, unexpected `end' (SyntaxError)" + io = StringIO.new + file = PathnameFromMessage.new(message, io: io).call.name + + expect(io.string).to include(file.to_s) + expect(file).to be_falsey + end + end + + it "does not output error message on syntax error inside of an (eval)" do + message = "(eval):1: invalid multibyte char (UTF-8) (SyntaxError)\n" + io = StringIO.new + file = PathnameFromMessage.new(message, io: io).call.name + + expect(io.string).to eq("") + expect(file).to be_falsey + end + + it "does not output error message on syntax error inside of streamed code" do + # An example of streamed code is: $ echo "def foo" | ruby + message = "-:1: syntax error, unexpected end-of-input\n" + io = StringIO.new + file = PathnameFromMessage.new(message, io: io).call.name + + expect(io.string).to eq("") + expect(file).to be_falsey + end + end +end diff --git a/spec/syntax_suggest/unit/priority_queue_spec.rb b/spec/syntax_suggest/unit/priority_queue_spec.rb new file mode 100644 index 00000000000000..17361833e509c4 --- /dev/null +++ b/spec/syntax_suggest/unit/priority_queue_spec.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + class CurrentIndex + attr_reader :current_indent + + def initialize(value) + @current_indent = value + end + + def <=>(other) + @current_indent <=> other.current_indent + end + + def inspect + @current_indent + end + end + + RSpec.describe CodeFrontier do + it "works" do + q = PriorityQueue.new + q << 1 + q << 2 + expect(q.elements).to eq([2, 1]) + + q << 3 + expect(q.elements).to eq([3, 1, 2]) + + expect(q.pop).to eq(3) + expect(q.pop).to eq(2) + expect(q.pop).to eq(1) + expect(q.pop).to eq(nil) + + array = [] + q = PriorityQueue.new + array.reverse_each do |v| + q << v + end + expect(q.elements).to eq(array) + + array = [100, 36, 17, 19, 25, 0, 3, 1, 7, 2] + array.reverse_each do |v| + q << v + end + + expect(q.pop).to eq(100) + expect(q.elements).to eq([36, 25, 19, 17, 0, 1, 7, 2, 3]) + + # expected [36, 25, 19, 17, 0, 1, 7, 2, 3] + expect(q.pop).to eq(36) + expect(q.pop).to eq(25) + expect(q.pop).to eq(19) + expect(q.pop).to eq(17) + expect(q.pop).to eq(7) + expect(q.pop).to eq(3) + expect(q.pop).to eq(2) + expect(q.pop).to eq(1) + expect(q.pop).to eq(0) + expect(q.pop).to eq(nil) + end + + it "priority queue" do + frontier = PriorityQueue.new + frontier << CurrentIndex.new(0) + frontier << CurrentIndex.new(1) + + expect(frontier.sorted.map(&:current_indent)).to eq([0, 1]) + + frontier << CurrentIndex.new(1) + expect(frontier.sorted.map(&:current_indent)).to eq([0, 1, 1]) + + frontier << CurrentIndex.new(0) + expect(frontier.sorted.map(&:current_indent)).to eq([0, 0, 1, 1]) + + frontier << CurrentIndex.new(10) + expect(frontier.sorted.map(&:current_indent)).to eq([0, 0, 1, 1, 10]) + + frontier << CurrentIndex.new(2) + expect(frontier.sorted.map(&:current_indent)).to eq([0, 0, 1, 1, 2, 10]) + + frontier = PriorityQueue.new + values = [18, 18, 0, 18, 0, 18, 18, 18, 18, 16, 18, 8, 18, 8, 8, 8, 16, 6, 0, 0, 16, 16, 4, 14, 14, 12, 12, 12, 10, 12, 12, 12, 12, 8, 10, 10, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 10, 6, 6, 6, 6, 6, 6, 8, 10, 8, 8, 10, 8, 10, 8, 10, 8, 6, 8, 8, 6, 8, 6, 6, 8, 0, 8, 0, 0, 8, 8, 0, 8, 0, 8, 8, 0, 8, 8, 8, 0, 8, 0, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 6, 8, 6, 6, 6, 6, 8, 6, 8, 6, 6, 4, 4, 6, 6, 4, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 6, 6, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 6, 6, 2] + + values.each do |v| + value = CurrentIndex.new(v) + frontier << value # CurrentIndex.new(v) + end + + expect(frontier.sorted.map(&:current_indent)).to eq(values.sort) + end + end +end From 9c2af0a171cb362ba3b1eb116b6fdb060cb62d2c Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 26 Aug 2022 10:42:44 +0900 Subject: [PATCH 254/546] added test-syntax-suggest and prepare tasks --- common.mk | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/common.mk b/common.mk index 1f33a9a3acc035..a7e379c672c24e 100644 --- a/common.mk +++ b/common.mk @@ -1415,6 +1415,25 @@ BUNDLED_GEMS = test-bundled-gems-run: $(PREPARE_BUNDLED_GEMS) $(gnumake_recursive)$(Q) $(XRUBY) $(tooldir)/test-bundled-gems.rb $(BUNDLED_GEMS) +test-syntax-suggest-precheck: $(TEST_RUNNABLE)-test-syntax-suggest-precheck +no-test-syntax-suggest-precheck: +yes-test-syntax-suggest-precheck: main + +no-test-syntax-suggest-prepare: no-test-syntax-suggest-precheck +yes-test-syntax-suggest-prepare: yes-test-syntax-suggest-precheck + $(ACTIONS_GROUP) + $(XRUBY) -C "$(srcdir)" bin/gem install --no-document \ + --install-dir .bundle --conservative "bundler" "rake" "rspec:~> 3" "ruby-prof" + $(ACTIONS_ENDGROUP) + +RSPECOPTS = +SYNTAX_SUGGEST_SPECS = +test-syntax-suggest: $(TEST_RUNNABLE)-test-syntax-suggest +yes-test-syntax-suggest: yes-test-syntax-suggest-prepare + $(XRUBY) -C $(srcdir) -Ispec/syntax_suggest .bundle/bin/rspec \ + --require spec_helper $(RSPECOPTS) spec/syntax_suggest/$(SYNTAX-SUGGEST_SPECS) +no-test-syntax-suggest: + test-bundler-precheck: $(TEST_RUNNABLE)-test-bundler-precheck no-test-bundler-precheck: yes-test-bundler-precheck: main From bd1b1eeb0e528da983e76216d459a1d61aa026cb Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 26 Aug 2022 10:49:11 +0900 Subject: [PATCH 255/546] ruby-prof is now optional --- common.mk | 2 +- spec/syntax_suggest/unit/api_spec.rb | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/common.mk b/common.mk index a7e379c672c24e..77aebce312d93e 100644 --- a/common.mk +++ b/common.mk @@ -1423,7 +1423,7 @@ no-test-syntax-suggest-prepare: no-test-syntax-suggest-precheck yes-test-syntax-suggest-prepare: yes-test-syntax-suggest-precheck $(ACTIONS_GROUP) $(XRUBY) -C "$(srcdir)" bin/gem install --no-document \ - --install-dir .bundle --conservative "bundler" "rake" "rspec:~> 3" "ruby-prof" + --install-dir .bundle --conservative "bundler" "rake" "rspec:~> 3" #"ruby-prof" $(ACTIONS_ENDGROUP) RSPECOPTS = diff --git a/spec/syntax_suggest/unit/api_spec.rb b/spec/syntax_suggest/unit/api_spec.rb index 284a4cdeec47c8..21df86bb3e7c8f 100644 --- a/spec/syntax_suggest/unit/api_spec.rb +++ b/spec/syntax_suggest/unit/api_spec.rb @@ -1,7 +1,10 @@ # frozen_string_literal: true require_relative "../spec_helper" -require "ruby-prof" +begin + require "ruby-prof" +rescue LoadError +end module SyntaxSuggest RSpec.describe "Top level SyntaxSuggest api" do From 8dfc077f7016715f33b4818ad3e6770d557356a1 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 26 Aug 2022 11:10:15 +0900 Subject: [PATCH 256/546] Added syntax_suggest cli and resolve failing exapmle with it --- libexec/syntax_suggest | 7 +++++++ spec/syntax_suggest/integration/exe_cli_spec.rb | 6 +++++- spec/syntax_suggest/spec_helper.rb | 4 ++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100755 libexec/syntax_suggest diff --git a/libexec/syntax_suggest b/libexec/syntax_suggest new file mode 100755 index 00000000000000..e4a0b0b658e67f --- /dev/null +++ b/libexec/syntax_suggest @@ -0,0 +1,7 @@ +#!/usr/bin/env ruby + +require_relative "../lib/syntax_suggest/api" + +SyntaxSuggest::Cli.new( + argv: ARGV +).call diff --git a/spec/syntax_suggest/integration/exe_cli_spec.rb b/spec/syntax_suggest/integration/exe_cli_spec.rb index 79e659a27aeda8..f0b49b4386f100 100644 --- a/spec/syntax_suggest/integration/exe_cli_spec.rb +++ b/spec/syntax_suggest/integration/exe_cli_spec.rb @@ -5,7 +5,11 @@ module SyntaxSuggest RSpec.describe "exe" do def exe_path - root_dir.join("exe").join("syntax_suggest") + if ruby_core? + root_dir.join("../libexec").join("syntax_suggest") + else + root_dir.join("exe").join("syntax_suggest") + end end def exe(cmd) diff --git a/spec/syntax_suggest/spec_helper.rb b/spec/syntax_suggest/spec_helper.rb index 33f3ef37083d9f..e78dee76bc1298 100644 --- a/spec/syntax_suggest/spec_helper.rb +++ b/spec/syntax_suggest/spec_helper.rb @@ -43,6 +43,10 @@ def fixtures_dir spec_dir.join("fixtures") end +def ruby_core? + !root_dir.join("syntax_suggest.gemspec").exist? +end + def code_line_array(source) SyntaxSuggest::CleanDocument.new(source: source).call.lines end From 5e4d1f9908fc09d0d9a451fd7e385af5ef0d7007 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 26 Aug 2022 11:13:26 +0900 Subject: [PATCH 257/546] Skip examples to need installed ruby exe --- spec/syntax_suggest/integration/ruby_command_line_spec.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spec/syntax_suggest/integration/ruby_command_line_spec.rb b/spec/syntax_suggest/integration/ruby_command_line_spec.rb index 7a1c5c654e65ff..6ed1bf0bf728c0 100644 --- a/spec/syntax_suggest/integration/ruby_command_line_spec.rb +++ b/spec/syntax_suggest/integration/ruby_command_line_spec.rb @@ -46,6 +46,8 @@ module SyntaxSuggest end it "detects require error and adds a message with auto mode" do + skip if ruby_core? + Dir.mktmpdir do |dir| tmpdir = Pathname(dir) script = tmpdir.join("script.rb") @@ -77,6 +79,7 @@ module SyntaxSuggest it "annotates a syntax error in Ruby 3.2+ when require is not used" do pending("Support for SyntaxError#detailed_message monkeypatch needed https://gist.github.com/schneems/09f45cc23b9a8c46e9af6acbb6e6840d?permalink_comment_id=4172585#gistcomment-4172585") + skip if ruby_core? skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") Dir.mktmpdir do |dir| From 3a2bc849541a38fe0050bd6b522adf6fedbc29f5 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 26 Aug 2022 11:15:01 +0900 Subject: [PATCH 258/546] Added test-syntax-suggest to CI --- .github/workflows/ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 372eb7cc2f9882..6ea8c06b930534 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -21,7 +21,7 @@ jobs: make: strategy: matrix: - test_task: ["check", "test-bundler-parallel", "test-bundled-gems"] + test_task: ["check", "test-syntax-suggest", "test-bundler-parallel", "test-bundled-gems"] os: - ubuntu-20.04 configure: ["", "cppflags=-DRUBY_DEBUG"] From 1cbee173bef4ce8dfb779e367b75a05f9777cea6 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 26 Aug 2022 11:18:43 +0900 Subject: [PATCH 259/546] Sync examples and cli from syntax_suggest --- tool/sync_default_gems.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index 7a5e190c2d000e..58813dc656915e 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -373,6 +373,11 @@ def sync_default_gems(gem) when "open3" sync_lib gem, upstream rm_rf("lib/open3/jruby_windows.rb") + when "syntax_suggest" + sync_lib gem, upstream + rm_rf(%w[spec/syntax_suggest libexec/syntax_suggest]) + cp_r("#{upstream}/spec", "spec/syntax_suggest") + cp_r("#{upstream}/exe/syntax_suggest", "libexec/syntax_suggest") else sync_lib gem, upstream end From 098a3cfaa7afe40e023e589c92124bac18e207c3 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 26 Aug 2022 11:25:10 +0900 Subject: [PATCH 260/546] Fixed typo --- common.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.mk b/common.mk index 77aebce312d93e..3d3976d5d5233d 100644 --- a/common.mk +++ b/common.mk @@ -1431,7 +1431,7 @@ SYNTAX_SUGGEST_SPECS = test-syntax-suggest: $(TEST_RUNNABLE)-test-syntax-suggest yes-test-syntax-suggest: yes-test-syntax-suggest-prepare $(XRUBY) -C $(srcdir) -Ispec/syntax_suggest .bundle/bin/rspec \ - --require spec_helper $(RSPECOPTS) spec/syntax_suggest/$(SYNTAX-SUGGEST_SPECS) + --require spec_helper $(RSPECOPTS) spec/syntax_suggest/$(SYNTAX_SUGGEST_SPECS) no-test-syntax-suggest: test-bundler-precheck: $(TEST_RUNNABLE)-test-bundler-precheck From d6f21b308bcff03e82f8b3dbf11a852ce111b3b3 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 25 Aug 2022 23:00:18 -0700 Subject: [PATCH 261/546] Convert catch_except_t to stdbool catch_excep_t is a field that exists for MJIT. In the process of rewriting MJIT in Ruby, I added API to convert 1/0 of _Bool to true/false, and it seemed confusing and hard to maintain if you don't use _Bool for *_p fields. --- compile.c | 10 +++++----- iseq.c | 2 +- test/ruby/test_mjit.rb | 6 +++--- vm_core.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/compile.c b/compile.c index 484399abc6d7d6..e906bd1e10b86c 100644 --- a/compile.c +++ b/compile.c @@ -1357,18 +1357,18 @@ new_child_iseq_with_callback(rb_iseq_t *iseq, const struct rb_iseq_new_with_call static void set_catch_except_p(struct rb_iseq_constant_body *body) { - body->catch_except_p = TRUE; + body->catch_except_p = true; if (body->parent_iseq != NULL) { set_catch_except_p(ISEQ_BODY(body->parent_iseq)); } } -/* Set body->catch_except_p to TRUE if the ISeq may catch an exception. If it is FALSE, - JIT-ed code may be optimized. If we are extremely conservative, we should set TRUE +/* Set body->catch_except_p to true if the ISeq may catch an exception. If it is false, + JIT-ed code may be optimized. If we are extremely conservative, we should set true if catch table exists. But we want to optimize while loop, which always has catch table entries for break/next/redo. - So this function sets TRUE for limited ISeqs with break/next/redo catch table entries + So this function sets true for limited ISeqs with break/next/redo catch table entries whose child ISeq would really raise an exception. */ static void update_catch_except_flags(struct rb_iseq_constant_body *body) @@ -1399,7 +1399,7 @@ update_catch_except_flags(struct rb_iseq_constant_body *body) if (entry->type != CATCH_TYPE_BREAK && entry->type != CATCH_TYPE_NEXT && entry->type != CATCH_TYPE_REDO) { - body->catch_except_p = TRUE; + body->catch_except_p = true; break; } } diff --git a/iseq.c b/iseq.c index f17a2d49b61278..4a2c9a33ee68ee 100644 --- a/iseq.c +++ b/iseq.c @@ -2411,7 +2411,7 @@ rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent) rb_str_cat2(str, "== disasm: "); rb_str_append(str, iseq_inspect(iseq)); - rb_str_catf(str, " (catch: %s)", body->catch_except_p ? "TRUE" : "FALSE"); + rb_str_catf(str, " (catch: %s)", body->catch_except_p ? "true" : "false"); if ((l = RSTRING_LEN(str) - indent_len) < header_minlen) { rb_str_modify_expand(str, header_minlen - l); memset(RSTRING_END(str), '=', header_minlen - l); diff --git a/test/ruby/test_mjit.rb b/test/ruby/test_mjit.rb index 3a1dcf7f09ad24..9cd93855bd9f3b 100644 --- a/test/ruby/test_mjit.rb +++ b/test/ruby/test_mjit.rb @@ -782,9 +782,9 @@ def a def test_catching_deep_exception assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: '1', success_count: 4) begin; - def catch_true(paths, prefixes) # catch_except_p: TRUE - prefixes.each do |prefix| # catch_except_p: TRUE - paths.each do |path| # catch_except_p: FALSE + def catch_true(paths, prefixes) # catch_except_p: true + prefixes.each do |prefix| # catch_except_p: true + paths.each do |path| # catch_except_p: false return path end end diff --git a/vm_core.h b/vm_core.h index 45ec1111559b9b..eee25161f50140 100644 --- a/vm_core.h +++ b/vm_core.h @@ -474,7 +474,7 @@ struct rb_iseq_constant_body { iseq_bits_t single; } mark_bits; - char catch_except_p; /* If a frame of this ISeq may catch exception, set TRUE */ + bool catch_except_p; // If a frame of this ISeq may catch exception, set true. // If true, this ISeq is leaf *and* backtraces are not used, for example, // by rb_profile_frames. We verify only leafness on VM_CHECK_MODE though. // Note that GC allocations might use backtraces due to From fe4dd18db4ef0d0cc0192949538c07110516b69a Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 27 Aug 2022 12:54:42 +0900 Subject: [PATCH 262/546] [DOC] Fix a typo [ci skip] --- string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string.c b/string.c index cd4a59385653c6..564812ae51fdb6 100644 --- a/string.c +++ b/string.c @@ -5437,7 +5437,7 @@ rb_str_aset(VALUE str, VALUE indx, VALUE val) * string[index] = new_string * string[start, length] = new_string * string[range] = new_string - * string[regexp, capture = 0) = new_string + * string[regexp, capture = 0] = new_string * string[substring] = new_string * * Replaces all, some, or none of the contents of +self+; returns +new_string+. From f97af5cdc3a7de0c6d0273c29299d1ddaa4a143c Mon Sep 17 00:00:00 2001 From: git Date: Sat, 27 Aug 2022 12:56:00 +0900 Subject: [PATCH 263/546] * 2022-08-27 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 1b29b880e0d02b..39c10e7788c8a4 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 26 +#define RUBY_RELEASE_DAY 27 #include "ruby/version.h" #include "ruby/internal/abi.h" From 13d2225c460cfc3daa679acb89433289527a844f Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 26 Aug 2022 23:32:01 -0700 Subject: [PATCH 264/546] Try rm -rf instead of FileUtils.rm_rf FileUtils.rm_rf started to randomly fail on http://ci.rvm.jp/results/trunk-mjit@phosphorus-docker since around https://github.com/ruby/fileutils/pull/99. --- test/rubygems/helper.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index cb0177adb264cb..b9bc72a1f3c72d 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -465,7 +465,12 @@ def teardown Dir.chdir @current_dir - FileUtils.rm_rf @tempdir + # FileUtils.rm_rf randomly fails on ci.rvm.jp trunk-mjit + if ENV['RUBY_DEBUG']&.include?('ci') + system('rm', '-rf', @tempdir.shellescape, exception: true) + else + FileUtils.rm_rf @tempdir + end ENV.replace(@orig_env) From 381d8e43ce33378345a834b3e554e977f157351a Mon Sep 17 00:00:00 2001 From: Aleksandr Varnin <10187586+cmrd-senya@users.noreply.github.com> Date: Wed, 24 Aug 2022 19:09:10 +0300 Subject: [PATCH 265/546] [rubygems/rubygems] Bundler: make to_lock consistent between Gem::Dependency and Bundler::Dependency https://github.com/rubygems/rubygems/commit/971d57cf5a --- lib/bundler/dependency.rb | 2 +- lib/bundler/lockfile_generator.rb | 2 +- spec/bundler/bundler/dependency_spec.rb | 37 +++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 spec/bundler/bundler/dependency_spec.rb diff --git a/lib/bundler/dependency.rb b/lib/bundler/dependency.rb index 7f94079e096f26..52c6fff19459f7 100644 --- a/lib/bundler/dependency.rb +++ b/lib/bundler/dependency.rb @@ -151,7 +151,7 @@ def current_platform? def to_lock out = super out << "!" if source - out << "\n" + out end def specific? diff --git a/lib/bundler/lockfile_generator.rb b/lib/bundler/lockfile_generator.rb index 0578a93fdc4513..23413dbdd6db21 100644 --- a/lib/bundler/lockfile_generator.rb +++ b/lib/bundler/lockfile_generator.rb @@ -60,7 +60,7 @@ def add_dependencies handled = [] definition.dependencies.sort_by(&:to_s).each do |dep| next if handled.include?(dep.name) - out << dep.to_lock + out << dep.to_lock << "\n" handled << dep.name end end diff --git a/spec/bundler/bundler/dependency_spec.rb b/spec/bundler/bundler/dependency_spec.rb new file mode 100644 index 00000000000000..f4701529968dd8 --- /dev/null +++ b/spec/bundler/bundler/dependency_spec.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +RSpec.describe Bundler::Dependency do + let(:options) do + {} + end + let(:dependency) do + described_class.new( + "test_gem", + "1.0.0", + options + ) + end + + describe "to_lock" do + it "returns formatted string" do + expect(dependency.to_lock).to eq(" test_gem (= 1.0.0)") + end + + it "matches format of Gem::Dependency#to_lock" do + gem_dependency = Gem::Dependency.new("test_gem", "1.0.0") + expect(dependency.to_lock).to eq(gem_dependency.to_lock) + end + + context "when source is passed" do + let(:options) do + { + "source" => Bundler::Source::Git.new({}), + } + end + + it "returns formatted string with exclamation mark" do + expect(dependency.to_lock).to eq(" test_gem (= 1.0.0)!") + end + end + end +end From 4de09574e0e801fc65b378ffd18ecb959bc2f99f Mon Sep 17 00:00:00 2001 From: git Date: Sun, 28 Aug 2022 02:04:31 +0900 Subject: [PATCH 266/546] * 2022-08-28 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 39c10e7788c8a4..233b42447e4b74 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 27 +#define RUBY_RELEASE_DAY 28 #include "ruby/version.h" #include "ruby/internal/abi.h" From 458d49a04ab7ce8a61be11fbaf341ee252b8253a Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sat, 27 Aug 2022 10:21:23 -0700 Subject: [PATCH 267/546] Ignore rm -rf failure http://ci.rvm.jp/results/trunk-mjit@phosphorus-docker/4213386 It's failing with: rm: cannot remove '/tmp/ruby/v3/build/trunk-mjit/tmp/test_rubygems_20220827-13666-ii8lcp': Directory not empty rm: cannot remove '/tmp/ruby/v3/build/trunk-mjit/tmp/test_rubygems_20220827-13666-fy77y1': Directory not empty I'd like to make sure the following `ENV.replace` is called and see if there's any other issues. --- test/rubygems/helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index b9bc72a1f3c72d..23ea69ae0be3b1 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -467,7 +467,7 @@ def teardown # FileUtils.rm_rf randomly fails on ci.rvm.jp trunk-mjit if ENV['RUBY_DEBUG']&.include?('ci') - system('rm', '-rf', @tempdir.shellescape, exception: true) + system('rm', '-rf', @tempdir) else FileUtils.rm_rf @tempdir end From 95d2d7920c97d0502ebed4ba439177325ad05e57 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sat, 27 Aug 2022 12:05:41 -0700 Subject: [PATCH 268/546] Try to prevent the failure of FileUtils.rm_rf --- test/rubygems/helper.rb | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index 23ea69ae0be3b1..4e92ae827ece38 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -465,11 +465,13 @@ def teardown Dir.chdir @current_dir - # FileUtils.rm_rf randomly fails on ci.rvm.jp trunk-mjit - if ENV['RUBY_DEBUG']&.include?('ci') - system('rm', '-rf', @tempdir) - else - FileUtils.rm_rf @tempdir + # Prevent a race condition on removing TMPDIR being written by MJIT + if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? + RubyVM::MJIT.pause + end + FileUtils.rm_rf @tempdir + if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? + RubyVM::MJIT.resume end ENV.replace(@orig_env) From a797ded8e2637de91dd4dd5b54bed515f6cda08a Mon Sep 17 00:00:00 2001 From: spaette Date: Sat, 27 Aug 2022 16:09:55 -0500 Subject: [PATCH 269/546] typos --- include/ruby/assert.h | 2 +- include/ruby/internal/arithmetic.h | 3 ++- include/ruby/internal/attr/nodiscard.h | 2 +- include/ruby/internal/intern/select/posix.h | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/ruby/assert.h b/include/ruby/assert.h index c9f2c3fbef29e2..0c052363bcf79a 100644 --- a/include/ruby/assert.h +++ b/include/ruby/assert.h @@ -103,7 +103,7 @@ # /* keep NDEBUG undefined */ #elif (RBIMPL_NDEBUG == 0) && (RBIMPL_RUBY_DEBUG == 0) -# /* The (*1) situation in avobe diagram. */ +# /* The (*1) situation in above diagram. */ # define RUBY_DEBUG 0 # define RUBY_NDEBUG 1 # define NDEBUG diff --git a/include/ruby/internal/arithmetic.h b/include/ruby/internal/arithmetic.h index 3f7840c3840afe..437a141d2a2d26 100644 --- a/include/ruby/internal/arithmetic.h +++ b/include/ruby/internal/arithmetic.h @@ -18,7 +18,8 @@ * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of * extension libraries. They could be written in C++98. - * @brief Conversion between C's arithmtic types and Ruby's numeric types. + * @brief Conversion between C's arithmetic types and Ruby's numeric types + * . */ #include "ruby/internal/arithmetic/char.h" #include "ruby/internal/arithmetic/double.h" diff --git a/include/ruby/internal/attr/nodiscard.h b/include/ruby/internal/attr/nodiscard.h index 087192a7a8b18a..c3ae1189424a96 100644 --- a/include/ruby/internal/attr/nodiscard.h +++ b/include/ruby/internal/attr/nodiscard.h @@ -26,7 +26,7 @@ /** * Wraps (or simulates) `[[nodiscard]]`. In C++ (at least since C++20) a - * nodiscard attribute can have a message why the result shall not be ignoed. + * nodiscard attribute can have a message why the result shall not be ignored. * However GCC attribute and SAL annotation cannot take them. */ #if RBIMPL_HAS_CPP_ATTRIBUTE(nodiscard) diff --git a/include/ruby/internal/intern/select/posix.h b/include/ruby/internal/intern/select/posix.h index 5f828e66e2cf14..0a9b0b2e51e688 100644 --- a/include/ruby/internal/intern/select/posix.h +++ b/include/ruby/internal/intern/select/posix.h @@ -136,7 +136,7 @@ rb_fd_max(const rb_fdset_t *f) } /** @cond INTERNAL_MACRO */ -/* :FIXME: What are these? They don't exist for shibling implementations. */ +/* :FIXME: What are these? They don't exist for sibling implementations. */ #define rb_fd_init_copy(d, s) (*(d) = *(s)) #define rb_fd_term(f) ((void)(f)) /** @endcond */ From 83375f47e111a254e523ba462364390937eeef07 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sat, 27 Aug 2022 14:48:10 -0700 Subject: [PATCH 270/546] Avoid leaving a period alone [ci skip] --- include/ruby/internal/arithmetic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ruby/internal/arithmetic.h b/include/ruby/internal/arithmetic.h index 437a141d2a2d26..7ebb4a86f1ec6f 100644 --- a/include/ruby/internal/arithmetic.h +++ b/include/ruby/internal/arithmetic.h @@ -18,8 +18,8 @@ * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of * extension libraries. They could be written in C++98. - * @brief Conversion between C's arithmetic types and Ruby's numeric types - * . + * @brief Conversion between C's arithmetic types and Ruby's numeric + * types. */ #include "ruby/internal/arithmetic/char.h" #include "ruby/internal/arithmetic/double.h" From 111b69e8a009f469356a723183b651626dbaa8c4 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sat, 27 Aug 2022 15:47:18 -0700 Subject: [PATCH 271/546] Pass wait: false to avoid a timeout --- test/rubygems/helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index 4e92ae827ece38..37e113d3fcacde 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -467,7 +467,7 @@ def teardown # Prevent a race condition on removing TMPDIR being written by MJIT if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? - RubyVM::MJIT.pause + RubyVM::MJIT.pause(wait: false) end FileUtils.rm_rf @tempdir if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? From ace2eee544378eb03ea95b95c89434508325e8c8 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 28 Aug 2022 09:29:24 +0900 Subject: [PATCH 272/546] [Bug #18963] Separate string contents by here document terminator --- parse.y | 13 +++++++++++++ test/ripper/test_lexer.rb | 10 ++++++++++ 2 files changed, 23 insertions(+) diff --git a/parse.y b/parse.y index 405a83a8cf01bc..e6fee29595e26e 100644 --- a/parse.y +++ b/parse.y @@ -7196,6 +7196,10 @@ tokadd_string(struct parser_params *p, { int c; bool erred = false; +#ifdef RIPPER + const int heredoc_end = (p->heredoc_end ? p->heredoc_end + 1 : 0); + int top_of_line = FALSE; +#endif #define mixed_error(enc1, enc2) \ (void)(erred || (parser_mixed_error(p, enc1, enc2), erred = true)) @@ -7206,6 +7210,12 @@ tokadd_string(struct parser_params *p, if (p->heredoc_indent > 0) { parser_update_heredoc_indent(p, c); } +#ifdef RIPPER + if (top_of_line && heredoc_end == p->ruby_sourceline) { + pushback(p, c); + break; + } +#endif if (paren && c == paren) { ++*nest; @@ -7332,6 +7342,9 @@ tokadd_string(struct parser_params *p, } } tokadd(p, c); +#ifdef RIPPER + top_of_line = (c == '\n'); +#endif } terminate: if (*enc) *encp = *enc; diff --git a/test/ripper/test_lexer.rb b/test/ripper/test_lexer.rb index 4f3f4657efe4df..27e00070234a68 100644 --- a/test/ripper/test_lexer.rb +++ b/test/ripper/test_lexer.rb @@ -242,4 +242,14 @@ def test_lex_with_syntax_error_and_heredoc EOF assert_equal([[5, 0], :on_heredoc_end, "EOS\n", state(:EXPR_BEG)], Ripper.lex(s).last, bug) end + + def test_tokenize_with_here_document + bug = '[Bug #18963]' + code = %[ +< Date: Sun, 28 Aug 2022 13:54:14 +0900 Subject: [PATCH 273/546] [Win32] Use `exit` instead of `type` [ci skip] `exit` command ignores the rest arguments after an exit code, while `type` command tries to read all as files. --- win32/Makefile.sub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/win32/Makefile.sub b/win32/Makefile.sub index 1aa27a91195211..3de53d8372e0ec 100644 --- a/win32/Makefile.sub +++ b/win32/Makefile.sub @@ -4,7 +4,7 @@ SHELL = $(COMSPEC) ECHO1 = $(V:1=@:) RUNCMD = $(COMSPEC) /c MKFILES = Makefile verconf.mk -NULLCMD = type nul +NULLCMD = exit /b0. # exit ignores the rest NULL = nul CHDIR = cd PATH_SEPARATOR = ; From 1486ffe03913076889290e38d86a7bdaca4e6fbd Mon Sep 17 00:00:00 2001 From: Felix Yan Date: Sun, 28 Aug 2022 10:27:11 +0300 Subject: [PATCH 274/546] [DOC] Correct article of Ractor's introduction [ci skip] --- ractor.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ractor.rb b/ractor.rb index 953d3ceddc197f..8e229d47005521 100644 --- a/ractor.rb +++ b/ractor.rb @@ -1,4 +1,4 @@ -# Ractor is a Actor-model abstraction for Ruby that provides thread-safe parallel execution. +# Ractor is an Actor-model abstraction for Ruby that provides thread-safe parallel execution. # # Ractor.new can make a new Ractor, and it will run in parallel. # From 8799c912052f8bb957a65edd103e0064cac94598 Mon Sep 17 00:00:00 2001 From: shields Date: Sun, 28 Aug 2022 22:40:02 +0900 Subject: [PATCH 275/546] [rubygems/rubygems] Add platform :windows as a shortcut for all Windows platforms https://github.com/rubygems/rubygems/commit/f3c49ad3f7 --- lib/bundler/current_ruby.rb | 19 ++++++++--- lib/bundler/dependency.rb | 1 + lib/bundler/rubygems_ext.rb | 1 + spec/bundler/commands/install_spec.rb | 4 +-- spec/bundler/commands/lock_spec.rb | 12 +++---- spec/bundler/install/gemfile/gemspec_spec.rb | 2 +- spec/bundler/install/gemfile/platform_spec.rb | 2 +- .../install/gemfile/specific_platform_spec.rb | 4 +-- .../install/gems/dependency_api_spec.rb | 2 +- spec/bundler/install/gems/resolving_spec.rb | 4 +-- spec/bundler/runtime/platform_spec.rb | 34 ++++++++++++++++++- spec/bundler/support/builders.rb | 10 +++++- spec/bundler/support/helpers.rb | 2 +- spec/bundler/support/platforms.rb | 20 ++++++++--- 14 files changed, 90 insertions(+), 27 deletions(-) diff --git a/lib/bundler/current_ruby.rb b/lib/bundler/current_ruby.rb index 36f26b7ab43e8e..f9987c4da8713f 100644 --- a/lib/bundler/current_ruby.rb +++ b/lib/bundler/current_ruby.rb @@ -36,17 +36,18 @@ class CurrentRuby rbx ruby truffleruby + windows x64_mingw ].freeze def ruby? return true if Bundler::GemHelpers.generic_local_platform == Gem::Platform::RUBY - !mswin? && (RUBY_ENGINE == "ruby" || RUBY_ENGINE == "rbx" || RUBY_ENGINE == "maglev" || RUBY_ENGINE == "truffleruby") + !windows? && (RUBY_ENGINE == "ruby" || RUBY_ENGINE == "rbx" || RUBY_ENGINE == "maglev" || RUBY_ENGINE == "truffleruby") end def mri? - !mswin? && RUBY_ENGINE == "ruby" + !windows? && RUBY_ENGINE == "ruby" end def rbx? @@ -65,16 +66,24 @@ def truffleruby? RUBY_ENGINE == "truffleruby" end - def mswin? + def windows? Gem.win_platform? end + def mswin? + # For backwards compatibility + windows? + + # TODO: This should correctly be: + # windows? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mswin32" && Bundler.local_platform.cpu == "x86" + end + def mswin64? - Gem.win_platform? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mswin64" && Bundler.local_platform.cpu == "x64" + windows? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mswin64" && Bundler.local_platform.cpu == "x64" end def mingw? - Gem.win_platform? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mingw32" && Bundler.local_platform.cpu != "x64" + windows? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mingw32" && Bundler.local_platform.cpu != "x64" end def x64_mingw? diff --git a/lib/bundler/dependency.rb b/lib/bundler/dependency.rb index 52c6fff19459f7..49ce23ec888c15 100644 --- a/lib/bundler/dependency.rb +++ b/lib/bundler/dependency.rb @@ -42,6 +42,7 @@ class Dependency < Gem::Dependency :jruby => Gem::Platform::JAVA, :jruby_18 => Gem::Platform::JAVA, :jruby_19 => Gem::Platform::JAVA, + :windows => Gem::Platform::WINDOWS, :mswin => Gem::Platform::MSWIN, :mswin_18 => Gem::Platform::MSWIN, :mswin_19 => Gem::Platform::MSWIN, diff --git a/lib/bundler/rubygems_ext.rb b/lib/bundler/rubygems_ext.rb index d976170f12fba8..056053a7832546 100644 --- a/lib/bundler/rubygems_ext.rb +++ b/lib/bundler/rubygems_ext.rb @@ -237,6 +237,7 @@ class Platform MINGW = Gem::Platform.new("x86-mingw32") X64_MINGW = [Gem::Platform.new("x64-mingw32"), Gem::Platform.new("x64-mingw-ucrt")].freeze + WINDOWS = [MSWIN, MSWIN64, MINGW, X64_MINGW].flatten.freeze if Gem::Platform.new("x86_64-linux-musl") === Gem::Platform.new("x86_64-linux") remove_method :=== diff --git a/spec/bundler/commands/install_spec.rb b/spec/bundler/commands/install_spec.rb index 7bf36ee0204422..56945346e15143 100644 --- a/spec/bundler/commands/install_spec.rb +++ b/spec/bundler/commands/install_spec.rb @@ -285,7 +285,7 @@ end it "installs gems for windows" do - simulate_platform mswin + simulate_platform x86_mswin32 install_gemfile <<-G source "#{file_uri_for(gem_repo1)}" @@ -293,7 +293,7 @@ G run "require 'platform_specific' ; puts PLATFORM_SPECIFIC" - expect(out).to eq("1.0.0 MSWIN") + expect(out).to eq("1.0 x86-mswin32") end end diff --git a/spec/bundler/commands/lock_spec.rb b/spec/bundler/commands/lock_spec.rb index b314169a9850a7..007e53f4e2e1d5 100644 --- a/spec/bundler/commands/lock_spec.rb +++ b/spec/bundler/commands/lock_spec.rb @@ -217,7 +217,7 @@ def read_lockfile(file = "Gemfile.lock") allow(Bundler::SharedHelpers).to receive(:find_gemfile).and_return(bundled_app_gemfile) lockfile = Bundler::LockfileParser.new(read_lockfile) - expect(lockfile.platforms).to match_array([java, mingw, specific_local_platform].uniq) + expect(lockfile.platforms).to match_array([java, x86_mingw32, specific_local_platform].uniq) end it "supports adding new platforms with force_ruby_platform = true" do @@ -241,7 +241,7 @@ def read_lockfile(file = "Gemfile.lock") allow(Bundler::SharedHelpers).to receive(:find_gemfile).and_return(bundled_app_gemfile) lockfile = Bundler::LockfileParser.new(read_lockfile) - expect(lockfile.platforms).to contain_exactly(rb, linux, java, mingw) + expect(lockfile.platforms).to contain_exactly(rb, linux, java, x86_mingw32) end it "supports adding the `ruby` platform" do @@ -262,12 +262,12 @@ def read_lockfile(file = "Gemfile.lock") allow(Bundler::SharedHelpers).to receive(:find_gemfile).and_return(bundled_app_gemfile) lockfile = Bundler::LockfileParser.new(read_lockfile) - expect(lockfile.platforms).to match_array([java, mingw, specific_local_platform].uniq) + expect(lockfile.platforms).to match_array([java, x86_mingw32, specific_local_platform].uniq) bundle "lock --remove-platform java" lockfile = Bundler::LockfileParser.new(read_lockfile) - expect(lockfile.platforms).to match_array([mingw, specific_local_platform].uniq) + expect(lockfile.platforms).to match_array([x86_mingw32, specific_local_platform].uniq) end it "errors when removing all platforms" do @@ -280,7 +280,7 @@ def read_lockfile(file = "Gemfile.lock") build_repo4 do build_gem "ffi", "1.9.14" build_gem "ffi", "1.9.14" do |s| - s.platform = mingw + s.platform = x86_mingw32 end build_gem "gssapi", "0.1" @@ -312,7 +312,7 @@ def read_lockfile(file = "Gemfile.lock") gem "gssapi" G - simulate_platform(mingw) { bundle :lock } + simulate_platform(x86_mingw32) { bundle :lock } expect(lockfile).to eq <<~G GEM diff --git a/spec/bundler/install/gemfile/gemspec_spec.rb b/spec/bundler/install/gemfile/gemspec_spec.rb index 941f1c6db9038f..7e2e7c345a8248 100644 --- a/spec/bundler/install/gemfile/gemspec_spec.rb +++ b/spec/bundler/install/gemfile/gemspec_spec.rb @@ -436,7 +436,7 @@ simulate_new_machine simulate_platform("jruby") { bundle "install" } - simulate_platform(x64_mingw) { bundle "install" } + simulate_platform(x64_mingw32) { bundle "install" } end context "on ruby" do diff --git a/spec/bundler/install/gemfile/platform_spec.rb b/spec/bundler/install/gemfile/platform_spec.rb index a357a922723d93..62e6bda4cd9449 100644 --- a/spec/bundler/install/gemfile/platform_spec.rb +++ b/spec/bundler/install/gemfile/platform_spec.rb @@ -501,7 +501,7 @@ RSpec.describe "when a gem has no architecture" do it "still installs correctly" do - simulate_platform mswin + simulate_platform x86_mswin32 build_repo2 do # The rcov gem is platform mswin32, but has no arch diff --git a/spec/bundler/install/gemfile/specific_platform_spec.rb b/spec/bundler/install/gemfile/specific_platform_spec.rb index 094186e63d7ea0..699672f357e087 100644 --- a/spec/bundler/install/gemfile/specific_platform_spec.rb +++ b/spec/bundler/install/gemfile/specific_platform_spec.rb @@ -227,9 +227,9 @@ it "adds the foreign platform" do setup_multiplatform_gem install_gemfile(google_protobuf) - bundle "lock --add-platform=#{x64_mingw}" + bundle "lock --add-platform=#{x64_mingw32}" - expect(the_bundle.locked_gems.platforms).to eq([x64_mingw, pl("x86_64-darwin-15")]) + expect(the_bundle.locked_gems.platforms).to eq([x64_mingw32, pl("x86_64-darwin-15")]) expect(the_bundle.locked_gems.specs.map(&:full_name)).to eq(%w[ google-protobuf-3.0.0.alpha.5.0.5.1-universal-darwin google-protobuf-3.0.0.alpha.5.0.5.1-x64-mingw32 diff --git a/spec/bundler/install/gems/dependency_api_spec.rb b/spec/bundler/install/gems/dependency_api_spec.rb index a3c5bc32aa6414..9a83e5ffad8b39 100644 --- a/spec/bundler/install/gems/dependency_api_spec.rb +++ b/spec/bundler/install/gems/dependency_api_spec.rb @@ -119,7 +119,7 @@ end it "falls back when the API errors out" do - simulate_platform mswin + simulate_platform x86_mswin32 build_repo2 do # The rcov gem is platform mswin32, but has no arch diff --git a/spec/bundler/install/gems/resolving_spec.rb b/spec/bundler/install/gems/resolving_spec.rb index 9405f146b9afe1..9f4da23162671f 100644 --- a/spec/bundler/install/gems/resolving_spec.rb +++ b/spec/bundler/install/gems/resolving_spec.rb @@ -423,13 +423,13 @@ s.required_ruby_version = "> 9000" end build_gem "rack", "1.2" do |s| - s.platform = mingw + s.platform = x86_mingw32 s.required_ruby_version = "> 9000" end build_gem "rack", "1.2" end - simulate_platform mingw do + simulate_platform x86_mingw32 do install_gemfile <<-G, :artifice => "compact_index", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s } ruby "#{Gem.ruby_version}" source "http://localgemserver.test/" diff --git a/spec/bundler/runtime/platform_spec.rb b/spec/bundler/runtime/platform_spec.rb index a7161c9cfea263..84c8dfcab3ec63 100644 --- a/spec/bundler/runtime/platform_spec.rb +++ b/spec/bundler/runtime/platform_spec.rb @@ -386,7 +386,7 @@ s.add_dependency "platform_specific" end end - simulate_windows x64_mingw do + simulate_windows x64_mingw32 do lockfile <<-L GEM remote: #{file_uri_for(gem_repo2)}/ @@ -412,4 +412,36 @@ expect(the_bundle).to include_gem "platform_specific 1.0 x64-mingw32" end end + + %w[x86-mswin32 x64-mswin64 x86-mingw32 x64-mingw32 x64-mingw-ucrt].each do |arch| + it "allows specifying platform windows on #{arch} arch" do + platform = send(arch.tr("-", "_")) + + simulate_windows platform do + lockfile <<-L + GEM + remote: #{file_uri_for(gem_repo1)}/ + specs: + platform_specific (1.0-#{platform}) + requires_platform_specific (1.0) + platform_specific + + PLATFORMS + #{platform} + + DEPENDENCIES + requires_platform_specific + L + + install_gemfile <<-G + source "#{file_uri_for(gem_repo1)}" + gem "platform_specific", :platforms => [:windows] + G + + bundle "install" + + expect(the_bundle).to include_gems "platform_specific 1.0 #{platform}" + end + end + end end diff --git a/spec/bundler/support/builders.rb b/spec/bundler/support/builders.rb index a4d4c9f085366b..2af11e9874bb22 100644 --- a/spec/bundler/support/builders.rb +++ b/spec/bundler/support/builders.rb @@ -110,19 +110,27 @@ def build_repo1 build_gem "platform_specific" do |s| s.platform = "x86-mswin32" - s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0.0 MSWIN'" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x86-mswin32'" + end + + build_gem "platform_specific" do |s| + s.platform = "x64-mswin64" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x64-mswin64'" end build_gem "platform_specific" do |s| s.platform = "x86-mingw32" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x86-mingw32'" end build_gem "platform_specific" do |s| s.platform = "x64-mingw32" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x64-mingw32'" end build_gem "platform_specific" do |s| s.platform = "x64-mingw-ucrt" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x64-mingw-ucrt'" end build_gem "platform_specific" do |s| diff --git a/spec/bundler/support/helpers.rb b/spec/bundler/support/helpers.rb index af6e33885348fe..f4ee93ccc0a639 100644 --- a/spec/bundler/support/helpers.rb +++ b/spec/bundler/support/helpers.rb @@ -445,7 +445,7 @@ def simulate_platform(platform) ENV["BUNDLER_SPEC_PLATFORM"] = old if block_given? end - def simulate_windows(platform = mswin) + def simulate_windows(platform = x86_mswin32) old = ENV["BUNDLER_SPEC_WINDOWS"] ENV["BUNDLER_SPEC_WINDOWS"] = "true" simulate_platform platform do diff --git a/spec/bundler/support/platforms.rb b/spec/bundler/support/platforms.rb index 1ad7778403f350..d3aefe004ac3d8 100644 --- a/spec/bundler/support/platforms.rb +++ b/spec/bundler/support/platforms.rb @@ -24,20 +24,32 @@ def linux Gem::Platform.new(["x86", "linux", nil]) end - def mswin + def x86_mswin32 Gem::Platform.new(["x86", "mswin32", nil]) end - def mingw + def x64_mswin64 + Gem::Platform.new(["x64", "mswin64", nil]) + end + + def x86_mingw32 Gem::Platform.new(["x86", "mingw32", nil]) end - def x64_mingw + def x64_mingw32 Gem::Platform.new(["x64", "mingw32", nil]) end + def x64_mingw_ucrt + Gem::Platform.new(["x64", "mingw", "ucrt"]) + end + + def windows_platforms + [x86_mswin32, x64_mswin64, x86_mingw32, x64_mingw32, x64_mingw_ucrt] + end + def all_platforms - [rb, java, linux, mswin, mingw, x64_mingw] + [rb, java, linux, windows_platforms].flatten end def local From 5fcce23ae2a9064e9dd6db6afe9a83f6f3ffcb30 Mon Sep 17 00:00:00 2001 From: git Date: Mon, 29 Aug 2022 00:33:32 +0900 Subject: [PATCH 276/546] * 2022-08-29 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 233b42447e4b74..812897df21a4dd 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 28 +#define RUBY_RELEASE_DAY 29 #include "ruby/version.h" #include "ruby/internal/abi.h" From aecc3b12528e1b02d24bcd5df746e93aa04ba211 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Sun, 28 Aug 2022 16:49:51 -0500 Subject: [PATCH 277/546] [DOC] Enhanced RDoc for Time (#6294) --- time.c | 37 ++++++++++++++++++--------------- timev.rb | 62 ++++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 68 insertions(+), 31 deletions(-) diff --git a/time.c b/time.c index 505ab4835c111a..56ff35b34e2dbc 100644 --- a/time.c +++ b/time.c @@ -3487,18 +3487,18 @@ time_s_mktime(int argc, VALUE *argv, VALUE klass) * call-seq: * to_i -> integer * - * Returns the number of seconds since the Epoch - * for the time represented in +self+: + * Returns the value of +self+ as integer + * {Epoch seconds}[rdoc-ref:Time@Epoch+Seconds]; + * subseconds are truncated (not rounded): * - * Time.utc(1970, 1, 1).to_i # => 0 - * t = Time.now.to_i # => 1595263289 - * - * Subseconds are omitted: - * - * t = Time.now # => 2022-07-12 09:13:48.5075976 -0500 - * t.to_i # => 1657635228 + * Time.utc(1970, 1, 1, 0, 0, 0).to_i # => 0 + * Time.utc(1970, 1, 1, 0, 0, 0, 999999).to_i # => 0 + * Time.utc(1950, 1, 1, 0, 0, 0).to_i # => -631152000 + * Time.utc(1990, 1, 1, 0, 0, 0).to_i # => 631152000 * * Time#tv_sec is an alias for Time#to_i. + * + * Related: Time#to_f Time#to_r. */ static VALUE @@ -3514,16 +3514,20 @@ time_to_i(VALUE time) * call-seq: * to_f -> float * - * Returns the value of +self+ as a Float number of seconds - * since the Epoch. + * Returns the value of +self+ as a Float number + * {Epoch seconds}[rdoc-ref:Time@Epoch+Seconds]; + * subseconds are included. + * * The stored value of +self+ is a * {Rational}[rdoc-ref:Rational@#method-i-to_f], * which means that the returned value may be approximate: * - * t = Time.now # => 2022-07-07 11:23:18.0784889 -0500 - * t.to_f # => 1657210998.0784888 - * t.to_i # => 1657210998 + * Time.utc(1970, 1, 1, 0, 0, 0).to_f # => 0.0 + * Time.utc(1970, 1, 1, 0, 0, 0, 999999).to_f # => 0.999999 + * Time.utc(1950, 1, 1, 0, 0, 0).to_f # => -631152000.0 + * Time.utc(1990, 1, 1, 0, 0, 0).to_f # => 631152000.0 * + * Related: Time#to_i, Time#to_r. */ static VALUE @@ -3539,11 +3543,12 @@ time_to_f(VALUE time) * call-seq: * to_r -> rational * - * Returns the value of +self+ as a Rational number of seconds - * since the Epoch, which is exact: + * Returns the value of +self+ as a Rational exact number of + * {Epoch seconds}[rdoc-ref:Time@Epoch+Seconds]; * * Time.now.to_r # => (16571402750320203/10000000) * + * Related: Time#to_f, Time#to_i. */ static VALUE diff --git a/timev.rb b/timev.rb index ad97d63b5514ac..48aabdc79892af 100644 --- a/timev.rb +++ b/timev.rb @@ -1,19 +1,49 @@ -# Time is an abstraction of dates and times. Time is stored internally as -# the number of seconds with subsecond since the _Epoch_, -# 1970-01-01 00:00:00 UTC. +# A \Time object represents a date and time: # -# The Time class treats GMT -# (Greenwich Mean Time) and UTC (Coordinated Universal Time) as equivalent. -# GMT is the older way of referring to these baseline times but persists in -# the names of calls on POSIX systems. +# Time.new(2000, 1, 1, 0, 0, 0) # => 2000-01-01 00:00:00 -0600 # -# Note: A \Time object uses the resolution available on your system clock. +# Although its value can be expressed as a single numeric +# (see {Epoch Seconds}[rdoc-ref:Time@Epoch+Seconds] below), +# it can be convenient to deal with the value by parts: # -# All times may have subsecond. Be aware of this fact when comparing times -# with each other -- times that are apparently equal when displayed may be -# different when compared. -# (Since Ruby 2.7.0, Time#inspect shows subsecond but -# Time#to_s still doesn't show subsecond.) +# t = Time.new(-2000, 1, 1, 0, 0, 0.0) +# # => -2000-01-01 00:00:00 -0600 +# t.year # => -2000 +# t.month # => 1 +# t.mday # => 1 +# t.hour # => 0 +# t.min # => 0 +# t.sec # => 0 +# t.subsec # => 0 +# +# t = Time.new(2000, 12, 31, 23, 59, 59.5) +# # => 2000-12-31 23:59:59.5 -0600 +# t.year # => 2000 +# t.month # => 12 +# t.mday # => 31 +# t.hour # => 23 +# t.min # => 59 +# t.sec # => 59 +# t.subsec # => (1/2) +# +# == Epoch Seconds +# +Epoch seconds is the exact number of seconds +(including fractional subseconds) since the Unix Epoch, January 1, 1970. +# +# You can retrieve that value exactly using method Time.to_r: +# +# Time.at(0).to_r # => (0/1) +# Time.at(0.999999).to_r # => (9007190247541737/9007199254740992) +# +# Other retrieval methods such as Time#to_i and Time#to_f +# may return a value that rounds or truncates subseconds. +# +# == \Time Resolution +# +# A \Time object derived from the system clock +# (for example, by method Time.now) +# has the resolution supported by the system. # # == Examples # @@ -229,7 +259,9 @@ def self.now(in: nil) # # - A \Time object, whose value is the basis for the returned time; # also influenced by optional keyword argument +in:+ (see below). - # - A numeric number of seconds (since the epoch) for the returned time. + # - A numeric number of + # {Epoch seconds}[rdoc-ref:Time@Epoch+Seconds] + # for the returned time. # # Examples: # @@ -259,7 +291,7 @@ def self.now(in: nil) # Time.at(secs, 1000000, :microsecond) # => 2001-01-01 00:00:00 -0600 # Time.at(secs, -1000000, :microsecond) # => 2000-12-31 23:59:58 -0600 # - # - +:nsec+ or +:nanosecond+: +subsec+ in nanoseconds: + # - +:nanosecond+ or +:nsec+: +subsec+ in nanoseconds: # # Time.at(secs, 0, :nanosecond) # => 2000-12-31 23:59:59 -0600 # Time.at(secs, 500000000, :nanosecond) # => 2000-12-31 23:59:59.5 -0600 From 055fc7b14dc375089b454cc7eaebeb78f9f3efaa Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Sun, 28 Aug 2022 14:57:36 -0700 Subject: [PATCH 278/546] Fix comment in timev.rb --- timev.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/timev.rb b/timev.rb index 48aabdc79892af..891142e0efc8f3 100644 --- a/timev.rb +++ b/timev.rb @@ -28,8 +28,8 @@ # # == Epoch Seconds # -Epoch seconds is the exact number of seconds -(including fractional subseconds) since the Unix Epoch, January 1, 1970. +# Epoch seconds is the exact number of seconds +# (including fractional subseconds) since the Unix Epoch, January 1, 1970. # # You can retrieve that value exactly using method Time.to_r: # From 41a275c89ac8064f18d995251b9fb3349a24273b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 29 Aug 2022 09:52:09 +0900 Subject: [PATCH 279/546] Support main branch for syntax_suggest --- tool/sync_default_gems.rb | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index 58813dc656915e..451e325e5ce80b 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -547,6 +547,12 @@ def sync_lib(repo, upstream = nil) def update_default_gems(gem, release: false) author, repository = REPOSITORIES[gem.to_sym].split('/') + default_branch = case gem + when 'syntax_suggest' + "main" + else + "master" + end puts "Update #{author}/#{repository}" @@ -572,8 +578,8 @@ def update_default_gems(gem, release: false) last_release = `git tag`.chomp.split.delete_if{|v| v =~ /pre|beta/ }.last `git checkout #{last_release}` else - `git checkout master` - `git rebase origin/master` + `git checkout #{default_branch}` + `git rebase origin/#{default_branch}` end end end From 7bdb999d0f28c7bb9d7a35ca775e405674527e5f Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Mon, 29 Aug 2022 12:50:47 +0900 Subject: [PATCH 280/546] Chect what remains in TMPDIR I guess it has a shared library file created by MJIT, but I want to make sure the fact. --- test/rubygems/helper.rb | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index 37e113d3fcacde..29d3924ebab129 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -466,13 +466,18 @@ def teardown Dir.chdir @current_dir # Prevent a race condition on removing TMPDIR being written by MJIT - if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? - RubyVM::MJIT.pause(wait: false) - end - FileUtils.rm_rf @tempdir - if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? - RubyVM::MJIT.resume - end + #if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? + # RubyVM::MJIT.pause(wait: false) + #end + begin + FileUtils.rm_rf @tempdir + ensure + # mame: Temporal code for debugging. Let me confirm what remains in the directory + pp Dir.glob(File.join(@tempdir, "**", "{.*,*}")) if $! + end + #if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? + # RubyVM::MJIT.resume + #end ENV.replace(@orig_env) From 78748a5de2146d00696698340d745bba3ec66496 Mon Sep 17 00:00:00 2001 From: "S.H" Date: Mon, 29 Aug 2022 14:42:19 +0900 Subject: [PATCH 281/546] Introduce `usage_analysis_clear` --- vm.c | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/vm.c b/vm.c index 8cab8b9b57b534..15b6fa3a572bde 100644 --- a/vm.c +++ b/vm.c @@ -4296,51 +4296,48 @@ usage_analysis_register_running(VALUE self) return RBOOL(ruby_vm_collect_usage_func_register != 0); } +static VALUE +usage_analysis_clear(VALUE self, ID usage_hash) +{ + VALUE uh; + uh = rb_const_get(self, usage_hash); + rb_hash_clear(uh); + + return Qtrue; +} + + /* :nodoc: */ static VALUE usage_analysis_insn_clear(VALUE self) { - ID usage_hash; - ID bigram_hash; - VALUE uh; - VALUE bh; - - CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); - CONST_ID(bigram_hash, "USAGE_ANALYSIS_INSN_BIGRAM"); - uh = rb_const_get(rb_cRubyVM, usage_hash); - bh = rb_const_get(rb_cRubyVM, bigram_hash); - rb_hash_clear(uh); - rb_hash_clear(bh); + ID usage_hash; + ID bigram_hash; - return Qtrue; + CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); + CONST_ID(bigram_hash, "USAGE_ANALYSIS_INSN_BIGRAM"); + usage_analysis_clear(rb_cRubyVM, usage_hash); + return usage_analysis_clear(rb_cRubyVM, bigram_hash); } /* :nodoc: */ static VALUE usage_analysis_operand_clear(VALUE self) { - ID usage_hash; - VALUE uh; - - CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); - uh = rb_const_get(rb_cRubyVM, usage_hash); - rb_hash_clear(uh); + ID usage_hash; - return Qtrue; + CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); + return usage_analysis_clear(self, usage_hash); } /* :nodoc: */ static VALUE usage_analysis_register_clear(VALUE self) { - ID usage_hash; - VALUE uh; + ID usage_hash; - CONST_ID(usage_hash, "USAGE_ANALYSIS_REGS"); - uh = rb_const_get(rb_cRubyVM, usage_hash); - rb_hash_clear(uh); - - return Qtrue; + CONST_ID(usage_hash, "USAGE_ANALYSIS_REGS"); + return usage_analysis_clear(self, usage_hash); } #else From 4bc782ed87d05d251d1656ca08e176f8bb56a448 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Mon, 29 Aug 2022 20:23:51 +0900 Subject: [PATCH 282/546] Revert "Chect what remains in TMPDIR" This reverts commit 7bdb999d0f28c7bb9d7a35ca775e405674527e5f. I think I confirmed the mechanism. GCC (invoked by MJIT) creates a temporary file in TMPDIR, which prevents rm_rf from removing the directory. --- test/rubygems/helper.rb | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index 29d3924ebab129..37e113d3fcacde 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -466,18 +466,13 @@ def teardown Dir.chdir @current_dir # Prevent a race condition on removing TMPDIR being written by MJIT - #if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? - # RubyVM::MJIT.pause(wait: false) - #end - begin - FileUtils.rm_rf @tempdir - ensure - # mame: Temporal code for debugging. Let me confirm what remains in the directory - pp Dir.glob(File.join(@tempdir, "**", "{.*,*}")) if $! - end - #if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? - # RubyVM::MJIT.resume - #end + if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? + RubyVM::MJIT.pause(wait: false) + end + FileUtils.rm_rf @tempdir + if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? + RubyVM::MJIT.resume + end ENV.replace(@orig_env) From a319d3cfdc1afef8497321fee7f690052b16739c Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Mon, 29 Aug 2022 15:32:48 +0200 Subject: [PATCH 283/546] Run specs with CHECK_LEAKS=true * To ensure these issues are noticed early. --- .github/workflows/compilers.yml | 2 ++ .github/workflows/spec_guards.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/compilers.yml b/.github/workflows/compilers.yml index 1ccc5dbbc822a8..340dd20f575ff2 100644 --- a/.github/workflows/compilers.yml +++ b/.github/workflows/compilers.yml @@ -257,6 +257,8 @@ jobs: - run: make test-all TESTS='-- ruby -ext-' if: ${{ matrix.entry.check }} - run: make test-spec + env: + CHECK_LEAKS: true if: ${{ matrix.entry.check }} - run: make test-annocheck if: ${{ matrix.entry.check && endsWith(matrix.entry.name, 'annocheck') }} diff --git a/.github/workflows/spec_guards.yml b/.github/workflows/spec_guards.yml index d09f1a24d3b1de..7bffe25bb26610 100644 --- a/.github/workflows/spec_guards.yml +++ b/.github/workflows/spec_guards.yml @@ -40,6 +40,8 @@ jobs: - run: gem install webrick - run: ruby ../mspec/bin/mspec working-directory: spec/ruby + env: + CHECK_LEAKS: true - uses: k0kubun/action-slack@v2.0.0 with: payload: | From 4ee1a687768338a1928014fc6042c320a1a1af3e Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Mon, 29 Aug 2022 15:36:29 +0200 Subject: [PATCH 284/546] Update to ruby/spec@d01709f --- spec/ruby/CONTRIBUTING.md | 5 +- spec/ruby/README.md | 5 +- spec/ruby/core/dir/fixtures/common.rb | 1 + spec/ruby/core/dir/glob_spec.rb | 2 + spec/ruby/core/dir/shared/glob.rb | 5 ++ spec/ruby/core/io/fixtures/classes.rb | 12 +++ spec/ruby/core/io/pipe_spec.rb | 11 +++ spec/ruby/core/kernel/shared/sprintf.rb | 65 ++++++++++++++-- .../core/kernel/shared/sprintf_encoding.rb | 33 ++++++++- spec/ruby/core/regexp/compile_spec.rb | 4 + spec/ruby/core/regexp/new_spec.rb | 14 +--- spec/ruby/core/regexp/shared/new.rb | 36 +++++++++ spec/ruby/core/time/shared/local.rb | 6 +- spec/ruby/language/block_spec.rb | 74 +++++++++++++++++++ spec/ruby/language/keyword_arguments_spec.rb | 18 +++++ spec/ruby/language/method_spec.rb | 7 ++ spec/ruby/language/proc_spec.rb | 7 ++ spec/ruby/library/bigdecimal/to_r_spec.rb | 12 +++ spec/ruby/library/stringio/open_spec.rb | 12 ++- spec/ruby/library/stringio/printf_spec.rb | 27 ++++++- .../library/stringio/read_nonblock_spec.rb | 11 +++ .../library/stringio/write_nonblock_spec.rb | 6 ++ .../ruby/library/zlib/deflate/deflate_spec.rb | 5 ++ .../ruby/library/zlib/inflate/inflate_spec.rb | 7 ++ spec/ruby/optional/capi/encoding_spec.rb | 42 +++++++++++ spec/ruby/optional/capi/ext/encoding_spec.c | 9 +++ spec/ruby/optional/capi/ext/string_spec.c | 7 ++ spec/ruby/optional/capi/ext/util_spec.c | 21 +++--- spec/ruby/optional/capi/string_spec.rb | 32 ++++++++ spec/ruby/optional/capi/util_spec.rb | 5 +- spec/ruby/security/cve_2019_8325_spec.rb | 15 ++-- spec/ruby/shared/sizedqueue/new.rb | 9 ++- 32 files changed, 473 insertions(+), 52 deletions(-) diff --git a/spec/ruby/CONTRIBUTING.md b/spec/ruby/CONTRIBUTING.md index 20258e5c36adbc..adfc2fb0ca4c87 100644 --- a/spec/ruby/CONTRIBUTING.md +++ b/spec/ruby/CONTRIBUTING.md @@ -175,9 +175,10 @@ end #### Guard for bug -In case there is a bug in MRI but the expected behavior is obvious. +In case there is a bug in MRI and the fix will be backported to previous versions. +If it is not backported or not likely, use `ruby_version_is` instead. First, file a bug at https://bugs.ruby-lang.org/. -It is better to use a `ruby_version_is` guard if there was a release with the fix. +The problem is `ruby_bug` would make non-MRI implementations fail this spec while MRI itself does not pass it, so it should only be used if the bug is/will be fixed and backported. ```ruby ruby_bug '#13669', ''...'3.2' do diff --git a/spec/ruby/README.md b/spec/ruby/README.md index 55b248a6c31d39..24b4719fdda515 100644 --- a/spec/ruby/README.md +++ b/spec/ruby/README.md @@ -144,10 +144,9 @@ The file `/etc/services` is required for socket specs (package `netbase` on Debi ### Socket specs from rubysl-socket -Most specs under `library/socket` were imported from [the rubysl-socket project](https://github.com/rubysl/rubysl-socket). +Most specs under `library/socket` were imported from the rubysl-socket project (which is no longer on GitHub). The 3 copyright holders of rubysl-socket, Yorick Peterse, Chuck Remes and -Brian Shirai, [agreed to relicense those specs](https://github.com/rubysl/rubysl-socket/issues/15) -under the MIT license in ruby/spec. +Brian Shirai, agreed to relicense those specs under the MIT license in ruby/spec. ### History and RubySpec diff --git a/spec/ruby/core/dir/fixtures/common.rb b/spec/ruby/core/dir/fixtures/common.rb index a8d6e69c4469d6..087f46b331e249 100644 --- a/spec/ruby/core/dir/fixtures/common.rb +++ b/spec/ruby/core/dir/fixtures/common.rb @@ -82,6 +82,7 @@ def self.mock_dir_files special/test{1}/file[1] special/{}/special + special/test\ +()[]{}/hello_world.erb ] platform_is_not :windows do diff --git a/spec/ruby/core/dir/glob_spec.rb b/spec/ruby/core/dir/glob_spec.rb index 43dac73eee805b..06b52b90fbcced 100644 --- a/spec/ruby/core/dir/glob_spec.rb +++ b/spec/ruby/core/dir/glob_spec.rb @@ -79,6 +79,7 @@ nested/ nested/.dotsubir/ special/ + special/test\ +()[]{}/ special/test{1}/ special/{}/ subdir_one/ @@ -130,6 +131,7 @@ ./nested/ ./nested/.dotsubir/ ./special/ + ./special/test\ +()[]{}/ ./special/test{1}/ ./special/{}/ ./subdir_one/ diff --git a/spec/ruby/core/dir/shared/glob.rb b/spec/ruby/core/dir/shared/glob.rb index 60d4a8c97a6592..33b2828c276fec 100644 --- a/spec/ruby/core/dir/shared/glob.rb +++ b/spec/ruby/core/dir/shared/glob.rb @@ -111,6 +111,10 @@ it "matches files with backslashes in their name" do Dir.glob('special/\\\\{a,b}').should == ['special/\a'] end + + it "matches directory with special characters in their name in complex patterns" do + Dir.glob("special/test +()\\[\\]\\{\\}/hello_world{.{en},}{.{html},}{+{phone},}{.{erb},}").should == ['special/test +()[]{}/hello_world.erb'] + end end it "matches regexp special ^" do @@ -225,6 +229,7 @@ dir/ nested/ special/ + special/test\ +()[]{}/ special/test{1}/ special/{}/ subdir_one/ diff --git a/spec/ruby/core/io/fixtures/classes.rb b/spec/ruby/core/io/fixtures/classes.rb index 067ab59d930145..204a2a101b0033 100644 --- a/spec/ruby/core/io/fixtures/classes.rb +++ b/spec/ruby/core/io/fixtures/classes.rb @@ -7,6 +7,18 @@ module IOSpecs class SubIO < IO end + class SubIOWithRedefinedNew < IO + def self.new(...) + ScratchPad << :redefined_new_called + super + end + + def initialize(...) + ScratchPad << :call_original_initialize + super + end + end + def self.collector Proc.new { |x| ScratchPad << x } end diff --git a/spec/ruby/core/io/pipe_spec.rb b/spec/ruby/core/io/pipe_spec.rb index 2f2cf06f4d32c1..aee0d9003f4e39 100644 --- a/spec/ruby/core/io/pipe_spec.rb +++ b/spec/ruby/core/io/pipe_spec.rb @@ -25,6 +25,17 @@ @r.should be_an_instance_of(IOSpecs::SubIO) @w.should be_an_instance_of(IOSpecs::SubIO) end + + it "does not use IO.new method to create pipes and allows its overriding" do + ScratchPad.record [] + + # so redefined .new is not called, but original #initialize is + @r, @w = IOSpecs::SubIOWithRedefinedNew.pipe + ScratchPad.recorded.should == [:call_original_initialize, :call_original_initialize] # called 2 times - for each pipe (r and w) + + @r.should be_an_instance_of(IOSpecs::SubIOWithRedefinedNew) + @w.should be_an_instance_of(IOSpecs::SubIOWithRedefinedNew) + end end describe "IO.pipe" do diff --git a/spec/ruby/core/kernel/shared/sprintf.rb b/spec/ruby/core/kernel/shared/sprintf.rb index 59f5ab003620a9..2db50bd686878a 100644 --- a/spec/ruby/core/kernel/shared/sprintf.rb +++ b/spec/ruby/core/kernel/shared/sprintf.rb @@ -293,13 +293,13 @@ def obj.to_i; 10; end it "raises ArgumentError if argument is a string of several characters" do -> { @method.call("%c", "abc") - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, /%c requires a character/) end it "raises ArgumentError if argument is an empty string" do -> { @method.call("%c", "") - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, /%c requires a character/) end end @@ -313,9 +313,56 @@ def obj.to_i; 10; end end end - it "supports Unicode characters" do - @method.call("%c", 1286).should == "Ԇ" - @method.call("%c", "ش").should == "ش" + it "raises TypeError if argument is not String or Integer and cannot be converted to them" do + -> { + @method.call("%c", []) + }.should raise_error(TypeError, /no implicit conversion of Array into Integer/) + end + + it "raises TypeError if argument is nil" do + -> { + @method.call("%c", nil) + }.should raise_error(TypeError, /no implicit conversion from nil to integer/) + end + + it "tries to convert argument to String with to_str" do + obj = BasicObject.new + def obj.to_str + "a" + end + + @method.call("%c", obj).should == "a" + end + + it "tries to convert argument to Integer with to_int" do + obj = BasicObject.new + def obj.to_int + 90 + end + + @method.call("%c", obj).should == "Z" + end + + it "raises TypeError if converting to String with to_str returns non-String" do + obj = BasicObject.new + def obj.to_str + :foo + end + + -> { + @method.call("%c", obj) + }.should raise_error(TypeError, /can't convert BasicObject to String/) + end + + it "raises TypeError if converting to Integer with to_int returns non-Integer" do + obj = BasicObject.new + def obj.to_str + :foo + end + + -> { + @method.call("%c", obj) + }.should raise_error(TypeError, /can't convert BasicObject to String/) end end @@ -374,11 +421,11 @@ def obj.to_str @method.call("%4.6s", "abcdefg").should == "abcdef" end - it "formats nli with width" do + it "formats nil with width" do @method.call("%6s", nil).should == " " end - it "formats nli with precision" do + it "formats nil with precision" do @method.call("%.6s", nil).should == "" end @@ -939,4 +986,8 @@ def obj.to_str; end } end end + + it "does not raise error when passed more arguments than needed" do + sprintf("%s %d %c", "string", 2, "c", []).should == "string 2 c" + end end diff --git a/spec/ruby/core/kernel/shared/sprintf_encoding.rb b/spec/ruby/core/kernel/shared/sprintf_encoding.rb index 5ca66b9083bea3..9cedb8b662466b 100644 --- a/spec/ruby/core/kernel/shared/sprintf_encoding.rb +++ b/spec/ruby/core/kernel/shared/sprintf_encoding.rb @@ -1,3 +1,5 @@ +# Keep encoding-related specs in a separate shared example to be able to skip them in IO/File/StringIO specs. +# It's difficult to check result's encoding in the test after writing to a file/io buffer. describe :kernel_sprintf_encoding, shared: true do it "can produce a string with valid encoding" do string = @method.call("good day %{valid}", valid: "e") @@ -25,7 +27,7 @@ result.encoding.should equal(Encoding::UTF_8) end - it "raises Encoding::CompatibilityError if both encodings are ASCII compatible and there ano not ASCII characters" do + it "raises Encoding::CompatibilityError if both encodings are ASCII compatible and there are not ASCII characters" do string = "Ä %s".encode('windows-1252') argument = "Ђ".encode('windows-1251') @@ -33,4 +35,33 @@ @method.call(string, argument) }.should raise_error(Encoding::CompatibilityError) end + + describe "%c" do + it "supports Unicode characters" do + result = @method.call("%c", 1286) + result.should == "Ԇ" + result.bytes.should == [212, 134] + + result = @method.call("%c", "ش") + result.should == "ش" + result.bytes.should == [216, 180] + end + + it "raises error when a codepoint isn't representable in an encoding of a format string" do + format = "%c".encode("ASCII") + + -> { + @method.call(format, 1286) + }.should raise_error(RangeError, /out of char range/) + end + + it "uses the encoding of the format string to interpret codepoints" do + format = "%c".force_encoding("euc-jp") + result = @method.call(format, 9415601) + + result.encoding.should == Encoding::EUC_JP + result.should == "é".encode(Encoding::EUC_JP) + result.bytes.should == [143, 171, 177] + end + end end diff --git a/spec/ruby/core/regexp/compile_spec.rb b/spec/ruby/core/regexp/compile_spec.rb index 329cb4f753ce92..c41399cfbb3539 100644 --- a/spec/ruby/core/regexp/compile_spec.rb +++ b/spec/ruby/core/regexp/compile_spec.rb @@ -13,3 +13,7 @@ describe "Regexp.compile given a Regexp" do it_behaves_like :regexp_new_regexp, :compile end + +describe "Regexp.new given a non-String/Regexp" do + it_behaves_like :regexp_new_non_string_or_regexp, :compile +end diff --git a/spec/ruby/core/regexp/new_spec.rb b/spec/ruby/core/regexp/new_spec.rb index ce662b7a4f6b0b..65f612df55311f 100644 --- a/spec/ruby/core/regexp/new_spec.rb +++ b/spec/ruby/core/regexp/new_spec.rb @@ -11,17 +11,9 @@ describe "Regexp.new given a Regexp" do it_behaves_like :regexp_new_regexp, :new - it_behaves_like :regexp_new_string_binary, :compile + it_behaves_like :regexp_new_string_binary, :new end -describe "Regexp.new given an Integer" do - it "raises a TypeError" do - -> { Regexp.new(1) }.should raise_error(TypeError) - end -end - -describe "Regexp.new given a Float" do - it "raises a TypeError" do - -> { Regexp.new(1.0) }.should raise_error(TypeError) - end +describe "Regexp.new given a non-String/Regexp" do + it_behaves_like :regexp_new_non_string_or_regexp, :new end diff --git a/spec/ruby/core/regexp/shared/new.rb b/spec/ruby/core/regexp/shared/new.rb index a6d9c4811244d7..10c2d3d3909b96 100644 --- a/spec/ruby/core/regexp/shared/new.rb +++ b/spec/ruby/core/regexp/shared/new.rb @@ -24,6 +24,32 @@ class RegexpSpecsSubclassTwo < Regexp; end end end +describe :regexp_new_non_string_or_regexp, shared: true do + it "calls #to_str method for non-String/Regexp argument" do + obj = Object.new + def obj.to_str() "a" end + + Regexp.send(@method, obj).should == /a/ + end + + it "raises TypeError if there is no #to_str method for non-String/Regexp argument" do + obj = Object.new + -> { Regexp.send(@method, obj) }.should raise_error(TypeError, "no implicit conversion of Object into String") + + -> { Regexp.send(@method, 1) }.should raise_error(TypeError, "no implicit conversion of Integer into String") + -> { Regexp.send(@method, 1.0) }.should raise_error(TypeError, "no implicit conversion of Float into String") + -> { Regexp.send(@method, :symbol) }.should raise_error(TypeError, "no implicit conversion of Symbol into String") + -> { Regexp.send(@method, []) }.should raise_error(TypeError, "no implicit conversion of Array into String") + end + + it "raises TypeError if #to_str returns non-String value" do + obj = Object.new + def obj.to_str() [] end + + -> { Regexp.send(@method, obj) }.should raise_error(TypeError, /can't convert Object to String/) + end +end + describe :regexp_new_string, shared: true do it "uses the String argument as an unescaped literal to construct a Regexp object" do Regexp.send(@method, "^hi{2,3}fo.o$").should == /^hi{2,3}fo.o$/ @@ -97,6 +123,16 @@ class RegexpSpecsSubclassTwo < Regexp; end (r.options & Regexp::EXTENDED).should_not == 0 end + it "does not try to convert the second argument to Integer with #to_int method call" do + ScratchPad.clear + obj = Object.new + def obj.to_int() ScratchPad.record(:called) end + + Regexp.send(@method, "Hi", obj) + + ScratchPad.recorded.should == nil + end + ruby_version_is ""..."3.2" do it "treats any non-Integer, non-nil, non-false second argument as IGNORECASE" do r = Regexp.send(@method, 'Hi', Object.new) diff --git a/spec/ruby/core/time/shared/local.rb b/spec/ruby/core/time/shared/local.rb index 997b7186f1193c..2dba23dbd7ea23 100644 --- a/spec/ruby/core/time/shared/local.rb +++ b/spec/ruby/core/time/shared/local.rb @@ -9,10 +9,10 @@ =begin platform_is_not :windows do describe "timezone changes" do - it "correctly adjusts the timezone change to 'CEST' on 'Europe/Amsterdam'" do + it "correctly adjusts the timezone change to 'CET' on 'Europe/Amsterdam'" do with_timezone("Europe/Amsterdam") do - Time.send(@method, 1940, 5, 16).to_a.should == - [0, 40, 1, 16, 5, 1940, 4, 137, true, "CEST"] + Time.send(@method, 1970, 5, 16).to_a.should == + [0, 0, 0, 16, 5, 1970, 6, 136, false, "CET"] end end end diff --git a/spec/ruby/language/block_spec.rb b/spec/ruby/language/block_spec.rb index 42652152a1bdae..d918c12beb2ec9 100644 --- a/spec/ruby/language/block_spec.rb +++ b/spec/ruby/language/block_spec.rb @@ -983,3 +983,77 @@ def a; 1; end end end end + +describe "Anonymous block forwarding" do + ruby_version_is "3.1" do + it "forwards blocks to other functions that formally declare anonymous blocks" do + eval <<-EOF + def b(&); c(&) end + def c(&); yield :non_null end + EOF + + b { |c| c }.should == :non_null + end + + it "requires the anonymous block parameter to be declared if directly passing a block" do + -> { eval "def a; b(&); end; def b; end" }.should raise_error(SyntaxError) + end + + it "works when it's the only declared parameter" do + eval <<-EOF + def inner; yield end + def block_only(&); inner(&) end + EOF + + block_only { 1 }.should == 1 + end + + it "works alongside positional parameters" do + eval <<-EOF + def inner; yield end + def pos(arg1, &); inner(&) end + EOF + + pos(:a) { 1 }.should == 1 + end + + it "works alongside positional arguments and splatted keyword arguments" do + eval <<-EOF + def inner; yield end + def pos_kwrest(arg1, **kw, &); inner(&) end + EOF + + pos_kwrest(:a, arg: 3) { 1 }.should == 1 + end + + it "works alongside positional arguments and disallowed keyword arguments" do + eval <<-EOF + def inner; yield end + def no_kw(arg1, **nil, &); inner(&) end + EOF + + no_kw(:a) { 1 }.should == 1 + end + end + + ruby_version_is "3.2" do + it "works alongside explicit keyword arguments" do + eval <<-EOF + def inner; yield end + def rest_kw(*a, kwarg: 1, &); inner(&) end + def kw(kwarg: 1, &); inner(&) end + def pos_kw_kwrest(arg1, kwarg: 1, **kw, &); inner(&) end + def pos_rkw(arg1, kwarg1:, &); inner(&) end + def all(arg1, arg2, *rest, post1, post2, kw1: 1, kw2: 2, okw1:, okw2:, &); inner(&) end + def all_kwrest(arg1, arg2, *rest, post1, post2, kw1: 1, kw2: 2, okw1:, okw2:, **kw, &); inner(&) end + EOF + + rest_kw { 1 }.should == 1 + kw { 1 }.should == 1 + pos_kw_kwrest(:a) { 1 }.should == 1 + pos_rkw(:a, kwarg1: 3) { 1 }.should == 1 + all(:a, :b, :c, :d, :e, okw1: 'x', okw2: 'y') { 1 }.should == 1 + all_kwrest(:a, :b, :c, :d, :e, okw1: 'x', okw2: 'y') { 1 }.should == 1 + end + end +end diff --git a/spec/ruby/language/keyword_arguments_spec.rb b/spec/ruby/language/keyword_arguments_spec.rb index 0c72f59d383984..8771c5806c0c36 100644 --- a/spec/ruby/language/keyword_arguments_spec.rb +++ b/spec/ruby/language/keyword_arguments_spec.rb @@ -58,6 +58,24 @@ def m(*a, kw:) m(kw: 1).should == [] -> { m(kw: 1, kw2: 2) }.should raise_error(ArgumentError, 'unknown keyword: :kw2') -> { m(kw: 1, true => false) }.should raise_error(ArgumentError, 'unknown keyword: true') + -> { m(kw: 1, a: 1, b: 2, c: 3) }.should raise_error(ArgumentError, 'unknown keywords: :a, :b, :c') + end + + it "raises ArgumentError exception when required keyword argument is not passed" do + def m(a:, b:, c:) + [a, b, c] + end + + -> { m(a: 1, b: 2) }.should raise_error(ArgumentError, /missing keyword: :c/) + -> { m() }.should raise_error(ArgumentError, /missing keywords: :a, :b, :c/) + end + + it "raises ArgumentError for missing keyword arguments even if there are extra ones" do + def m(a:) + a + end + + -> { m(b: 1) }.should raise_error(ArgumentError, /missing keyword: :a/) end it "handle * and ** at the same call site" do diff --git a/spec/ruby/language/method_spec.rb b/spec/ruby/language/method_spec.rb index d464e79403a382..acca074974743e 100644 --- a/spec/ruby/language/method_spec.rb +++ b/spec/ruby/language/method_spec.rb @@ -571,6 +571,13 @@ def m(a:) a end end end + evaluate <<-ruby do + def m(a:, **kw) [a, kw] end + ruby + + -> { m(b: 1) }.should raise_error(ArgumentError) + end + evaluate <<-ruby do def m(a: 1) a end ruby diff --git a/spec/ruby/language/proc_spec.rb b/spec/ruby/language/proc_spec.rb index 8360967ec8b38a..f8a29962b03ece 100644 --- a/spec/ruby/language/proc_spec.rb +++ b/spec/ruby/language/proc_spec.rb @@ -237,4 +237,11 @@ end end end + + describe "taking |required keyword arguments, **kw| arguments" do + it "raises ArgumentError for missing required argument" do + p = proc { |a:, **kw| [a, kw] } + -> { p.call() }.should raise_error(ArgumentError) + end + end end diff --git a/spec/ruby/library/bigdecimal/to_r_spec.rb b/spec/ruby/library/bigdecimal/to_r_spec.rb index 91d2b33993e6b1..c350beff08c765 100644 --- a/spec/ruby/library/bigdecimal/to_r_spec.rb +++ b/spec/ruby/library/bigdecimal/to_r_spec.rb @@ -13,4 +13,16 @@ r.denominator.should eql(1000000000000000000000000) end + it "returns a Rational from a BigDecimal with an exponent" do + r = BigDecimal("1E2").to_r + r.numerator.should eql(100) + r.denominator.should eql(1) + end + + it "returns a Rational from a negative BigDecimal with an exponent" do + r = BigDecimal("-1E2").to_r + r.numerator.should eql(-100) + r.denominator.should eql(1) + end + end diff --git a/spec/ruby/library/stringio/open_spec.rb b/spec/ruby/library/stringio/open_spec.rb index acab6e9056299d..3068e19435903a 100644 --- a/spec/ruby/library/stringio/open_spec.rb +++ b/spec/ruby/library/stringio/open_spec.rb @@ -167,10 +167,14 @@ io.should equal(ret) end - it "sets the mode to read-write" do + it "sets the mode to read-write (r+)" do io = StringIO.open("example") io.closed_read?.should be_false io.closed_write?.should be_false + + io = StringIO.new("example") + io.printf("%d", 123) + io.string.should == "123mple" end it "tries to convert the passed Object to a String using #to_str" do @@ -195,10 +199,14 @@ io.should equal(ret) end - it "sets the mode to read-write" do + it "sets the mode to read-write (r+)" do io = StringIO.open io.closed_read?.should be_false io.closed_write?.should be_false + + io = StringIO.new("example") + io.printf("%d", 123) + io.string.should == "123mple" end it "uses an empty String as the StringIO backend" do diff --git a/spec/ruby/library/stringio/printf_spec.rb b/spec/ruby/library/stringio/printf_spec.rb index 9dd1a3b4104772..f3f669a1855266 100644 --- a/spec/ruby/library/stringio/printf_spec.rb +++ b/spec/ruby/library/stringio/printf_spec.rb @@ -4,7 +4,7 @@ describe "StringIO#printf" do before :each do - @io = StringIO.new('example') + @io = StringIO.new() end it "returns nil" do @@ -12,9 +12,9 @@ end it "pads self with \\000 when the current position is after the end" do - @io.pos = 10 + @io.pos = 3 @io.printf("%d", 123) - @io.string.should == "example\000\000\000123" + @io.string.should == "\000\000\000123" end it "performs format conversion" do @@ -39,6 +39,27 @@ end end +describe "StringIO#printf when in read-write mode" do + before :each do + @io = StringIO.new("example", "r+") + end + + it "starts from the beginning" do + @io.printf("%s", "abcdefghijk") + @io.string.should == "abcdefghijk" + end + + it "does not truncate existing string" do + @io.printf("%s", "abc") + @io.string.should == "abcmple" + end + + it "correctly updates self's position" do + @io.printf("%s", "abc") + @io.pos.should eql(3) + end +end + describe "StringIO#printf when in append mode" do before :each do @io = StringIO.new("example", "a") diff --git a/spec/ruby/library/stringio/read_nonblock_spec.rb b/spec/ruby/library/stringio/read_nonblock_spec.rb index 2a8f926bd06475..d4ec56d9aadafa 100644 --- a/spec/ruby/library/stringio/read_nonblock_spec.rb +++ b/spec/ruby/library/stringio/read_nonblock_spec.rb @@ -5,10 +5,21 @@ describe "StringIO#read_nonblock when passed length, buffer" do it_behaves_like :stringio_read, :read_nonblock + + it "accepts :exception option" do + io = StringIO.new("example") + io.read_nonblock(3, buffer = "", exception: true) + buffer.should == "exa" + end end describe "StringIO#read_nonblock when passed length" do it_behaves_like :stringio_read_length, :read_nonblock + + it "accepts :exception option" do + io = StringIO.new("example") + io.read_nonblock(3, exception: true).should == "exa" + end end describe "StringIO#read_nonblock when passed nil" do diff --git a/spec/ruby/library/stringio/write_nonblock_spec.rb b/spec/ruby/library/stringio/write_nonblock_spec.rb index 4f4c5039fe248c..a457b976679ee6 100644 --- a/spec/ruby/library/stringio/write_nonblock_spec.rb +++ b/spec/ruby/library/stringio/write_nonblock_spec.rb @@ -8,6 +8,12 @@ describe "StringIO#write_nonblock when passed [String]" do it_behaves_like :stringio_write_string, :write_nonblock + + it "accepts :exception option" do + io = StringIO.new("12345", "a") + io.write_nonblock("67890", exception: true) + io.string.should == "1234567890" + end end describe "StringIO#write_nonblock when self is not writable" do diff --git a/spec/ruby/library/zlib/deflate/deflate_spec.rb b/spec/ruby/library/zlib/deflate/deflate_spec.rb index 828880f8d8daf0..50a563ef6f4bb8 100644 --- a/spec/ruby/library/zlib/deflate/deflate_spec.rb +++ b/spec/ruby/library/zlib/deflate/deflate_spec.rb @@ -58,6 +58,11 @@ Array.new(31, 0) + [24, 128, 0, 0, 1]).pack('C*') end + + it "has a binary encoding" do + @deflator.deflate("").encoding.should == Encoding::BINARY + @deflator.finish.encoding.should == Encoding::BINARY + end end describe "Zlib::Deflate#deflate" do diff --git a/spec/ruby/library/zlib/inflate/inflate_spec.rb b/spec/ruby/library/zlib/inflate/inflate_spec.rb index cc33bd4c325aaf..79b72bf91c821f 100644 --- a/spec/ruby/library/zlib/inflate/inflate_spec.rb +++ b/spec/ruby/library/zlib/inflate/inflate_spec.rb @@ -39,6 +39,13 @@ @inflator.finish.should == 'uncompressed_data' end + it "has a binary encoding" do + data = [120, 156, 99, 96, 128, 1, 0, 0, 10, 0, 1].pack('C*') + unzipped = @inflator.inflate data + @inflator.finish.encoding.should == Encoding::BINARY + unzipped.encoding.should == Encoding::BINARY + end + end describe "Zlib::Inflate.inflate" do diff --git a/spec/ruby/optional/capi/encoding_spec.rb b/spec/ruby/optional/capi/encoding_spec.rb index ae557b03d76a0a..aa632b963b8086 100644 --- a/spec/ruby/optional/capi/encoding_spec.rb +++ b/spec/ruby/optional/capi/encoding_spec.rb @@ -63,6 +63,48 @@ end end + describe "rb_enc_strlen" do + before :each do + @str = 'こにちわ' # Each codepoint in this string is 3 bytes in UTF-8 + end + + it "returns the correct string length for the encoding" do + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_8).should == 4 + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::BINARY).should == 12 + end + + it "returns the string length based on a fixed-width encoding's character length, even if the encoding is incompatible" do + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_16BE).should == 6 + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_16LE).should == 6 + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_32BE).should == 3 + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_32LE).should == 3 + end + + it "does not consider strings to be NUL-terminated" do + s = "abc\0def" + @s.rb_enc_strlen(s, s.bytesize, Encoding::US_ASCII).should == 7 + @s.rb_enc_strlen(s, s.bytesize, Encoding::UTF_8).should == 7 + end + + describe "handles broken strings" do + it "combines valid character and invalid character counts in UTF-8" do + # The result is 3 because `rb_enc_strlen` counts the first valid character and then adds + # the byte count for the invalid character that follows for 1 + 2. + @s.rb_enc_strlen(@str, 5, Encoding::UTF_8).should == 3 + end + + it "combines valid character and invalid character counts in UTF-16" do + @s.rb_enc_strlen(@str, 5, Encoding::UTF_16BE).should == 3 + end + + it "rounds up for fixed-width encodings" do + @s.rb_enc_strlen(@str, 7, Encoding::UTF_32BE).should == 2 + @s.rb_enc_strlen(@str, 7, Encoding::UTF_32LE).should == 2 + @s.rb_enc_strlen(@str, 5, Encoding::BINARY).should == 5 + end + end + end + describe "rb_enc_find" do it "returns the encoding of an Encoding" do @s.rb_enc_find("UTF-8").should == "UTF-8" diff --git a/spec/ruby/optional/capi/ext/encoding_spec.c b/spec/ruby/optional/capi/ext/encoding_spec.c index c49f6cde7e6e00..865fc484be2f19 100644 --- a/spec/ruby/optional/capi/ext/encoding_spec.c +++ b/spec/ruby/optional/capi/ext/encoding_spec.c @@ -301,6 +301,14 @@ static VALUE encoding_spec_rb_enc_codelen(VALUE self, VALUE code, VALUE encoding return INT2FIX(rb_enc_codelen(c, enc)); } +static VALUE encoding_spec_rb_enc_strlen(VALUE self, VALUE str, VALUE length, VALUE encoding) { + int l = FIX2INT(length); + char *p = RSTRING_PTR(str); + char *e = p + l; + + return LONG2FIX(rb_enc_strlen(p, e, rb_to_encoding(encoding))); +} + void Init_encoding_spec(void) { VALUE cls; native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*)); @@ -335,6 +343,7 @@ void Init_encoding_spec(void) { rb_define_method(cls, "rb_enc_compatible", encoding_spec_rb_enc_compatible, 2); rb_define_method(cls, "rb_enc_copy", encoding_spec_rb_enc_copy, 2); rb_define_method(cls, "rb_enc_codelen", encoding_spec_rb_enc_codelen, 2); + rb_define_method(cls, "rb_enc_strlen", encoding_spec_rb_enc_strlen, 3); rb_define_method(cls, "rb_enc_find", encoding_spec_rb_enc_find, 1); rb_define_method(cls, "rb_enc_find_index", encoding_spec_rb_enc_find_index, 1); rb_define_method(cls, "rb_enc_isalnum", encoding_spec_rb_enc_isalnum, 2); diff --git a/spec/ruby/optional/capi/ext/string_spec.c b/spec/ruby/optional/capi/ext/string_spec.c index b9a4a55853d419..9cbb50484df1d9 100644 --- a/spec/ruby/optional/capi/ext/string_spec.c +++ b/spec/ruby/optional/capi/ext/string_spec.c @@ -437,6 +437,12 @@ VALUE string_spec_RSTRING_PTR_read(VALUE self, VALUE str, VALUE path) { return capacities; } +VALUE string_spec_RSTRING_PTR_null_terminate(VALUE self, VALUE str, VALUE min_length) { + char* ptr = RSTRING_PTR(str); + char* end = ptr + RSTRING_LEN(str); + return rb_str_new(end, FIX2LONG(min_length)); +} + VALUE string_spec_StringValue(VALUE self, VALUE str) { return StringValue(str); } @@ -662,6 +668,7 @@ void Init_string_spec(void) { rb_define_method(cls, "RSTRING_PTR_after_funcall", string_spec_RSTRING_PTR_after_funcall, 2); rb_define_method(cls, "RSTRING_PTR_after_yield", string_spec_RSTRING_PTR_after_yield, 1); rb_define_method(cls, "RSTRING_PTR_read", string_spec_RSTRING_PTR_read, 2); + rb_define_method(cls, "RSTRING_PTR_null_terminate", string_spec_RSTRING_PTR_null_terminate, 2); rb_define_method(cls, "StringValue", string_spec_StringValue, 1); rb_define_method(cls, "SafeStringValue", string_spec_SafeStringValue, 1); rb_define_method(cls, "rb_str_hash", string_spec_rb_str_hash, 1); diff --git a/spec/ruby/optional/capi/ext/util_spec.c b/spec/ruby/optional/capi/ext/util_spec.c index a7269353c2901b..95ba71ea9dc11f 100644 --- a/spec/ruby/optional/capi/ext/util_spec.c +++ b/spec/ruby/optional/capi/ext/util_spec.c @@ -7,15 +7,18 @@ extern "C" { #endif VALUE util_spec_rb_scan_args(VALUE self, VALUE argv, VALUE fmt, VALUE expected, VALUE acc) { - int i, result, argc = (int)RARRAY_LEN(argv); - VALUE args[6], failed, a1, a2, a3, a4, a5, a6; - - failed = rb_intern("failed"); - a1 = a2 = a3 = a4 = a5 = a6 = failed; - - for(i = 0; i < argc; i++) { - args[i] = rb_ary_entry(argv, i); - } + int result, argc; + VALUE a1, a2, a3, a4, a5, a6; + + argc = (int) RARRAY_LEN(argv); + VALUE* args = RARRAY_PTR(argv); + /* the line above can be replaced with this for Ruby implementations which do not support RARRAY_PTR() yet + VALUE args[6]; + for(int i = 0; i < argc; i++) { + args[i] = rb_ary_entry(argv, i); + } */ + + a1 = a2 = a3 = a4 = a5 = a6 = INT2FIX(-1); #ifdef RB_SCAN_ARGS_KEYWORDS if (*RSTRING_PTR(fmt) == 'k') { diff --git a/spec/ruby/optional/capi/string_spec.rb b/spec/ruby/optional/capi/string_spec.rb index 7ad4d10ee4b1ec..0558fc9f7ddd41 100644 --- a/spec/ruby/optional/capi/string_spec.rb +++ b/spec/ruby/optional/capi/string_spec.rb @@ -97,6 +97,32 @@ def inspect end end + describe "rb_str_set_len on a UTF-16 String" do + before :each do + @str = "abcdefghij".force_encoding(Encoding::UTF_16BE) + # Make sure to unshare the string + @s.rb_str_modify(@str) + end + + it "inserts two NULL bytes at the length" do + @s.rb_str_set_len(@str, 4).b.should == "abcd".b + @s.rb_str_set_len(@str, 8).b.should == "abcd\x00\x00gh".b + end + end + + describe "rb_str_set_len on a UTF-32 String" do + before :each do + @str = "abcdefghijkl".force_encoding(Encoding::UTF_32BE) + # Make sure to unshare the string + @s.rb_str_modify(@str) + end + + it "inserts four NULL bytes at the length" do + @s.rb_str_set_len(@str, 4).b.should == "abcd".b + @s.rb_str_set_len(@str, 12).b.should == "abcd\x00\x00\x00\x00ijkl".b + end + end + describe "rb_str_buf_new" do it "returns the equivalent of an empty string" do buf = @s.rb_str_buf_new(10, nil) @@ -592,6 +618,12 @@ def inspect capacities[0].should < capacities[1] str.should == "fixture file contents to test read() with RSTRING_PTR" end + + it "terminates the string with at least (encoding min length) \\0 bytes" do + @s.RSTRING_PTR_null_terminate("abc", 1).should == "\x00" + @s.RSTRING_PTR_null_terminate("abc".encode("UTF-16BE"), 2).should == "\x00\x00" + @s.RSTRING_PTR_null_terminate("abc".encode("UTF-32BE"), 4).should == "\x00\x00\x00\x00" + end end describe "RSTRING_LEN" do diff --git a/spec/ruby/optional/capi/util_spec.rb b/spec/ruby/optional/capi/util_spec.rb index 64b08940875336..38f6f47b1a7e72 100644 --- a/spec/ruby/optional/capi/util_spec.rb +++ b/spec/ruby/optional/capi/util_spec.rb @@ -15,8 +15,9 @@ end it "assigns the required arguments scanned" do - @o.rb_scan_args([1, 2], "2", 2, @acc).should == 2 - ScratchPad.recorded.should == [1, 2] + obj = Object.new + @o.rb_scan_args([obj, 2], "2", 2, @acc).should == 2 + ScratchPad.recorded.should == [obj, 2] end it "raises an ArgumentError if there are insufficient arguments" do diff --git a/spec/ruby/security/cve_2019_8325_spec.rb b/spec/ruby/security/cve_2019_8325_spec.rb index 7c5e216568e4f1..bbddb3a6cec304 100644 --- a/spec/ruby/security/cve_2019_8325_spec.rb +++ b/spec/ruby/security/cve_2019_8325_spec.rb @@ -5,16 +5,17 @@ describe "CVE-2019-8325 is resisted by" do describe "sanitising error message components" do - silent_ui = Module.new do - attr_accessor :ui - def self.extended(obj) - obj.ui = Gem::SilentUI.new - end + before :each do + @ui = Gem::SilentUI.new + end + + after :each do + @ui.close end it "for the 'while executing' message" do manager = Gem::CommandManager.new - manager.extend(silent_ui) + manager.ui = @ui def manager.process_args(args, build_args) raise StandardError, "\e]2;nyan\a" end @@ -34,7 +35,7 @@ def manager.terminate_interaction(n) it "for the 'loading command' message" do manager = Gem::CommandManager.new - manager.extend(silent_ui) + manager.ui = @ui def manager.require(x) raise 'foo' end diff --git a/spec/ruby/shared/sizedqueue/new.rb b/spec/ruby/shared/sizedqueue/new.rb index 713785fb50da44..2573194efb21ae 100644 --- a/spec/ruby/shared/sizedqueue/new.rb +++ b/spec/ruby/shared/sizedqueue/new.rb @@ -1,7 +1,12 @@ describe :sizedqueue_new, shared: true do - it "raises a TypeError when the given argument is not Numeric" do - -> { @object.call("foo") }.should raise_error(TypeError) + it "raises a TypeError when the given argument doesn't respond to #to_int" do + -> { @object.call("12") }.should raise_error(TypeError) -> { @object.call(Object.new) }.should raise_error(TypeError) + + @object.call(12.9).max.should == 12 + object = Object.new + object.define_singleton_method(:to_int) { 42 } + @object.call(object).max.should == 42 end it "raises an argument error when no argument is given" do From 2ffaa377c212279e4a8bf1da8ac65a00dcdadd53 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 11 May 2022 15:17:44 -0400 Subject: [PATCH 285/546] WIP backend IR sketch --- yjit/src/asm/x86_64/mod.rs | 24 +- yjit/src/ir.rs | 479 +++++++++++++++++++++++++++++++++++++ yjit/src/lib.rs | 1 + 3 files changed, 492 insertions(+), 12 deletions(-) create mode 100644 yjit/src/ir.rs diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 6eb7efaa0a0759..b4ef2e4bf9789a 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -9,20 +9,20 @@ mod tests; pub struct X86Imm { // Size in bits - num_bits: u8, + pub num_bits: u8, // The value of the immediate - value: i64 + pub value: i64 } #[derive(Clone, Copy, Debug)] pub struct X86UImm { // Size in bits - num_bits: u8, + pub num_bits: u8, // The value of the immediate - value: u64 + pub value: u64 } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -38,32 +38,32 @@ pub enum RegType pub struct X86Reg { // Size in bits - num_bits: u8, + pub num_bits: u8, // Register type - reg_type: RegType, + pub reg_type: RegType, // Register index number - reg_no: u8, + pub reg_no: u8, } #[derive(Clone, Copy, Debug)] pub struct X86Mem { // Size in bits - num_bits: u8, + pub num_bits: u8, /// Base register number - base_reg_no: u8, + pub base_reg_no: u8, /// Index register number - idx_reg_no: Option, + pub idx_reg_no: Option, /// SIB scale exponent value (power of two, two bits) - scale_exp: u8, + pub scale_exp: u8, /// Constant displacement from the base, not scaled - disp: i32, + pub disp: i32, } #[derive(Clone, Copy, Debug)] diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs new file mode 100644 index 00000000000000..a20a9824934a3a --- /dev/null +++ b/yjit/src/ir.rs @@ -0,0 +1,479 @@ +#![allow(dead_code)] +#![allow(unused_variables)] +#![allow(unused_imports)] + +use std::convert::From; +use crate::cruby::{VALUE}; +use crate::virtualmem::{CodePtr}; +use crate::asm::x86_64::{X86Opnd, X86Imm, X86UImm, X86Reg, X86Mem, RegType}; +use crate::core::{Context, Type, TempMapping}; + + + + +/* +// Minimally, we might want to specify how many operands and branch targets an insn has +// Branch targets are not interchangeable with other operand types. We distinguish +// between branch and regular instructions. +// +// TODO: should mark instructions that produce no output operand +// +make_ops! { + (Comment, 1, 0), + ... + + // Call is variadic, might need to be special-cased +} +*/ + + + + + + + + + +/// Instruction opcodes +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Op +{ + // Add a comment into the IR at the point that this instruction is added. It + // won't have any impact on that actual compiled code, but it will impact + // the output of ir_print_insns. Accepts as its only operand an EIR_IMM + // operand (typically generated by ir_str_ptr). + Comment, + + // Add a label into the IR at the point that this instruction is added. It + // will eventually be translated into an offset when generating code such + // that EIR_LABEL_IDX operands know where to jump to. Accepts as its only + // operand an EIR_LABEL_NAME operand (typically generated by ir_label_opnd). + Label, + + // Add two operands together, and return the result as a new operand. This + // operand can then be used as the operand on another instruction. It + // accepts two operands, which can be of any type + // + // Under the hood when allocating registers, the IR will determine the most + // efficient way to get these values into memory. For example, if both + // operands are immediates, then it will load the first one into a register + // first with a mov instruction and then add them together. If one of them + // is a register, however, it will just perform a single add instruction. + Add, + + // This is the same as the OP_ADD instruction, except for subtraction. + Sub, + + // This is the same as the OP_ADD instruction, except that it performs the + // binary AND operation. + And, + + // Perform the NOT operation on an individual operand, and return the result + // as a new operand. This operand can then be used as the operand on another + // instruction. + Not, + + // + // Low-level instructions + // + + // A low-level mov instruction. It accepts two operands. + Mov, + + // Bitwise AND test instruction + Test, + + // Jump if not zero + Jnz, + + /* + // The following are conditional jump instructions. They all accept as their + // first operand an EIR_LABEL_NAME, which is used as the target of the jump. + // + // The OP_JUMP_EQ instruction accepts two additional operands, to be + // compared for equality. If they're equal, then the generated code jumps to + // the target label. If they're not, then it continues on to the next + // instruction. + JumpEq, + + // The OP_JUMP_NE instruction is very similar to the OP_JUMP_EQ instruction, + // except it compares for inequality instead. + JumpNe, + + // Checks the overflow flag and conditionally jumps to the target if it is + // currently set. + JumpOvf, + + // A low-level call instruction for calling a function by a pointer. It + // accepts one operand of type EIR_IMM that should be a pointer to the + // function. Usually this is done by first casting the function to a void*, + // as in: ir_const_ptr((void *)&my_function)). + Call, + + // Calls a function by a pointer and returns an operand that contains the + // result of the function. Accepts as its operands a pointer to a function + // of type EIR_IMM (usually generated from ir_const_ptr) and a variable + // number of arguments to the function being called. + // + // This is the higher-level instruction that should be used when you want to + // call a function with arguments, as opposed to OP_CALL which is + // lower-level and just calls a function without moving arguments into + // registers for you. + CCall, + + // Returns from the function being generated immediately. This is different + // from OP_RETVAL in that it does nothing with the return value register + // (whatever is in there is what will get returned). Accepts no operands. + Ret, + + // First, moves a value into the return value register. Then, returns from + // the generated function. Accepts as its only operand the value that should + // be returned from the generated function. + RetVal, + + // A low-level cmp instruction. It accepts two operands. The first it + // expects to be a register. The second can be anything. Most of the time + // this instruction shouldn't be used by the developer since other + // instructions break down to this one. + Cmp, + + // A conditional move instruction that should be preceeded at some point by + // an OP_CMP instruction that would have set the requisite comparison flags. + // Accepts 2 operands, both of which are expected to be of the EIR_REG type. + // + // If the comparison indicates the left compared value is greater than or + // equal to the right compared value, then the conditional move is executed, + // otherwise we just continue on to the next instruction. + // + // This is considered a low-level instruction, and the OP_SELECT_* variants + // should be preferred if possible. + CMovGE, + + // The same as OP_CMOV_GE, except the comparison is greater than. + CMovGT, + + // The same as OP_CMOV_GE, except the comparison is less than or equal. + CMovLE, + + // The same as OP_CMOV_GE, except the comparison is less than. + CMovLT, + + // Selects between two different values based on a comparison of two other + // values. Accepts 4 operands. The first two are the basis of the + // comparison. The second two are the "then" case and the "else" case. You + // can effectively think of this instruction as a ternary operation, where + // the first two values are being compared. + // + // OP_SELECT_GE performs the described ternary using a greater than or equal + // comparison, that is if the first operand is greater than or equal to the + // second operand. + SelectGE, + + // The same as OP_SELECT_GE, except the comparison is greater than. + SelectGT, + + // The same as OP_SELECT_GE, except the comparison is less than or equal. + SelectLE, + + // The same as OP_SELECT_GE, except the comparison is less than. + SelectLT, + + // For later: + // These encode Ruby true/false semantics + // Can be used to enable op fusion of Ruby compare + branch. + // OP_JUMP_TRUE, // (opnd, target) + // OP_JUMP_FALSE, // (opnd, target) + + // For later: + // OP_GUARD_HEAP, // (opnd, target) + // OP_GUARD_IMM, // (opnd, target) + // OP_GUARD_FIXNUM, // (opnd, target) + + // For later: + // OP_COUNTER_INC, (counter_name) + + // For later: + // OP_LEA, + // OP_TEST, + */ +} + + + + + + + + + + +// Register value used by IR operands +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Reg +{ + // Register number/index + reg_no: u8, + + // Size in bits + num_bits: u8, + + // Special register flag EC/CFP/SP/SELF + special: bool, +} + +// Memory location +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Mem +{ + // Base register + base: Reg, + + // Offset relative to the base pointer + disp: i32, + + // Size in bits + num_bits: u8, +} + +/// Operand to an IR instruction +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum Opnd +{ + None, // For insns with no output + + Stack(u16), // Value on the temp stack (idx) + Local(u16), // Local variable (idx, do we need depth too?) + Value(VALUE), // Immediate Ruby value, may be GC'd, movable + InsnOut(usize), // Output of a preceding instruction in this block + String(String), // String constant, used for comments + + // Low-level operands, for lowering + Imm(i64), // Raw signed immediate + UImm(u64), // Raw unsigned immediate + Mem(Mem), // Memory location (num_bits, base_ptr, const_offset) + Reg(Reg), // Machine register (num_bits, idx) +} + +// Special register constants +pub const EC : Opnd = Opnd::Reg(Reg { reg_no: 0, num_bits: 64, special: true }); +pub const CFP : Opnd = Opnd::Reg(Reg { reg_no: 1, num_bits: 64, special: true }); +pub const SP : Opnd = Opnd::Reg(Reg { reg_no: 2, num_bits: 64, special: true }); +pub const SELF : Opnd = Opnd::Reg(Reg { reg_no: 3, num_bits: 64, special: true }); + +impl Opnd +{ + // Convenience constructor for memory operands + pub fn mem(num_bits: u8, base: Opnd, disp: i32) -> Self { + match base { + Opnd::Reg(base_reg) => { + assert!(base_reg.num_bits == 64 && !base_reg.special); + Opnd::Mem(Mem { + num_bits: num_bits, + base: base_reg, + disp: disp, + }) + }, + _ => unreachable!() + } + } +} + +/// Method to convert from an X86Opnd to an IR Opnd +impl From for Opnd { + fn from(opnd: X86Opnd) -> Self { + match opnd { + X86Opnd::None => Opnd::None, + X86Opnd::UImm(X86UImm{ value, .. }) => Opnd::UImm(value), + X86Opnd::Imm(X86Imm{ value, .. }) => Opnd::Imm(value), + + // General-purpose register + X86Opnd::Reg(X86Reg{ num_bits, reg_no, reg_type: RegType::GP }) => { + Opnd::Reg(Reg { + reg_no, + num_bits, + special: false, + }) + } + + // Memory operand with displacement + X86Opnd::Mem(X86Mem{ num_bits, base_reg_no, disp, idx_reg_no: None, scale_exp: 0 }) => { + let base_reg = Reg { num_bits: 64, reg_no: base_reg_no, special: false }; + + Opnd::Mem(Mem { + base: base_reg, + disp, + num_bits + }) + } + + _ => panic!("unsupported x86 operand type") + } + } +} + + + + + + +/// Branch target (something that we can jump to) +/// for branch instructions +#[derive(Clone, PartialEq, Eq, Debug)] +enum BranchTarget +{ + CodePtr(CodePtr), // Pointer to a piece of code (e.g. side-exit) + LabelName(String), // A label without an index in the output + LabelIdx(u32), // A label that has been indexed +} + +/// YJIT IR instruction +pub struct Insn +{ + // Opcode for the instruction + op: Op, + + // List of input operands/values + opnds: Vec, + + // List of branch targets (branch instructions only) + targets: Vec, + + // Position in the generated machine code + // Useful for comments and for patching jumps + pos: Option, +} + +/// Object into which we assemble instructions to be +/// optimized and lowered +struct Assembler +{ + insns: Vec +} + +impl Assembler +{ + fn new() -> Assembler { + Assembler { + insns: Vec::default() + } + } + + fn push_insn(&mut self, op: Op, opnds: Vec, targets: Vec) -> Opnd + { + let insn_idx = self.insns.len(); + + let insn = Insn { + op: op, + opnds: opnds, + targets: targets, + pos: None + }; + self.insns.push(insn); + + // Return an operand for the output of this instruction + Opnd::InsnOut(insn_idx) + } + + // TODO: + //fn label(&self, name: &str) -> BranchTarget + //{ + //} + + // Optimize and compile the stored instructions + fn compile() + { + // Peephole optimizations + // Register allocation + // Generic lowering pass + // Platform-specific lowering + } +} + +impl Assembler +{ + fn add(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd + { + self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]) + } + + fn mov(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd + { + self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]); + Opnd::None + } +} + +// NOTE: these methods are temporary and will likely move +// to context.rs later +// They are just wrappers to convert from X86Opnd into the IR Opnd type +impl Context +{ + pub fn ir_stack_pop(&mut self, n: usize) -> Opnd { + self.stack_pop(n).into() + } + + pub fn ir_stack_push(&mut self, val_type: Type) -> Opnd { + self.stack_push(val_type).into() + } + + pub fn ir_stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> Opnd { + self.stack_push_mapping((mapping, temp_type)).into() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::core::*; + use InsnOpnd::*; + + // Test that this function type checks + fn gen_dup( + ctx: &mut Context, + asm: &mut Assembler, + ) { + let dup_val = ctx.ir_stack_pop(0); + let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); + + let loc0 = ctx.ir_stack_push_mapping((mapping, tmp_type)); + asm.mov(loc0, dup_val); + } + + + + + + #[test] + fn test_add() { + let mut asm = Assembler::new(); + let out = asm.add(SP, Opnd::UImm(1)); + asm.add(out, Opnd::UImm(2)); + } +} + + + + + +// TODO: we need a test instruction +// Can we combine this with a branch? +// +/* +fn guard_object_is_heap( + cb: &mut CodeBlock, + object_opnd: X86Opnd, + _ctx: &mut Context, + side_exit: CodePtr, +) { + add_comment(cb, "guard object is heap"); + + // Test that the object is not an immediate + test(cb, object_opnd, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); + jnz_ptr(cb, side_exit); + + // Test that the object is not false or nil + cmp(cb, object_opnd, uimm_opnd(Qnil.into())); + jbe_ptr(cb, side_exit); +} +*/ \ No newline at end of file diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index 6772f551a87b99..019189e8e89276 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -4,6 +4,7 @@ #![allow(clippy::identity_op)] // Sometimes we do it for style mod asm; +mod ir; mod codegen; mod core; mod cruby; From 909d214708d87e1dab618a04b4780dd926c721ca Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 11 May 2022 16:45:15 -0400 Subject: [PATCH 286/546] Progress on IR sketch --- yjit/src/ir.rs | 80 +++++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs index a20a9824934a3a..8c35f7f61590c0 100644 --- a/yjit/src/ir.rs +++ b/yjit/src/ir.rs @@ -319,7 +319,7 @@ impl From for Opnd { /// Branch target (something that we can jump to) /// for branch instructions #[derive(Clone, PartialEq, Eq, Debug)] -enum BranchTarget +enum Target { CodePtr(CodePtr), // Pointer to a piece of code (e.g. side-exit) LabelName(String), // A label without an index in the output @@ -336,7 +336,7 @@ pub struct Insn opnds: Vec, // List of branch targets (branch instructions only) - targets: Vec, + targets: Vec, // Position in the generated machine code // Useful for comments and for patching jumps @@ -358,7 +358,7 @@ impl Assembler } } - fn push_insn(&mut self, op: Op, opnds: Vec, targets: Vec) -> Opnd + fn push_insn(&mut self, op: Op, opnds: Vec, targets: Vec) -> Opnd { let insn_idx = self.insns.len(); @@ -375,7 +375,7 @@ impl Assembler } // TODO: - //fn label(&self, name: &str) -> BranchTarget + //fn label(&self, name: &str) -> Target //{ //} @@ -391,18 +391,41 @@ impl Assembler impl Assembler { + // Add a comment, no output operand + fn comment(&mut self, text: &str) + { + self.push_insn(Op::Add, vec![ Opnd::String(text.to_owned()) ], vec![]); + } + fn add(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd { self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]) } - fn mov(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd + // Low-level, no output operand + fn test(&mut self, opnd0: Opnd, opnd1: Opnd) + { + self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]); + } + + // Jump if not zero + fn jnz(&mut self, target: Target) + { + self.push_insn(Op::Jnz, vec![], vec![target]); + } + + // Low-level, no output operand + fn mov(&mut self, opnd0: Opnd, opnd1: Opnd) { self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]); - Opnd::None } } + + + + + // NOTE: these methods are temporary and will likely move // to context.rs later // They are just wrappers to convert from X86Opnd into the IR Opnd type @@ -424,7 +447,7 @@ impl Context #[cfg(test)] mod tests { use super::*; - + use crate::cruby::*; use crate::core::*; use InsnOpnd::*; @@ -443,6 +466,23 @@ mod tests { + // TODO + fn guard_object_is_heap( + asm: &mut Assembler, + object_opnd: Opnd, + ctx: &mut Context, + side_exit: CodePtr, + ) { + asm.comment("guard object is heap"); + + // Test that the object is not an immediate + asm.test(object_opnd, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); + asm.jnz(Target::CodePtr(side_exit)); + + // Test that the object is not false or nil + //cmp(cb, object_opnd, uimm_opnd(Qnil.into())); + //jbe_ptr(cb, side_exit); + } #[test] fn test_add() { @@ -451,29 +491,3 @@ mod tests { asm.add(out, Opnd::UImm(2)); } } - - - - - -// TODO: we need a test instruction -// Can we combine this with a branch? -// -/* -fn guard_object_is_heap( - cb: &mut CodeBlock, - object_opnd: X86Opnd, - _ctx: &mut Context, - side_exit: CodePtr, -) { - add_comment(cb, "guard object is heap"); - - // Test that the object is not an immediate - test(cb, object_opnd, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_ptr(cb, side_exit); - - // Test that the object is not false or nil - cmp(cb, object_opnd, uimm_opnd(Qnil.into())); - jbe_ptr(cb, side_exit); -} -*/ \ No newline at end of file From 96e5f9def0121a7ee4f1557b25dade7bdb558df8 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 12 May 2022 13:55:49 -0400 Subject: [PATCH 287/546] Add macro to define ops --- yjit/src/ir.rs | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs index 8c35f7f61590c0..d3cc76d2d1a05f 100644 --- a/yjit/src/ir.rs +++ b/yjit/src/ir.rs @@ -254,12 +254,6 @@ pub enum Opnd Reg(Reg), // Machine register (num_bits, idx) } -// Special register constants -pub const EC : Opnd = Opnd::Reg(Reg { reg_no: 0, num_bits: 64, special: true }); -pub const CFP : Opnd = Opnd::Reg(Reg { reg_no: 1, num_bits: 64, special: true }); -pub const SP : Opnd = Opnd::Reg(Reg { reg_no: 2, num_bits: 64, special: true }); -pub const SELF : Opnd = Opnd::Reg(Reg { reg_no: 3, num_bits: 64, special: true }); - impl Opnd { // Convenience constructor for memory operands @@ -278,6 +272,12 @@ impl Opnd } } +// Special register constants +pub const EC : Opnd = Opnd::Reg(Reg { reg_no: 0, num_bits: 64, special: true }); +pub const CFP : Opnd = Opnd::Reg(Reg { reg_no: 1, num_bits: 64, special: true }); +pub const SP : Opnd = Opnd::Reg(Reg { reg_no: 2, num_bits: 64, special: true }); +pub const SELF : Opnd = Opnd::Reg(Reg { reg_no: 3, num_bits: 64, special: true }); + /// Method to convert from an X86Opnd to an IR Opnd impl From for Opnd { fn from(opnd: X86Opnd) -> Self { @@ -335,6 +335,7 @@ pub struct Insn // List of input operands/values opnds: Vec, + // Kevin asks: do we really need multiple branch targets? // List of branch targets (branch instructions only) targets: Vec, @@ -397,10 +398,12 @@ impl Assembler self.push_insn(Op::Add, vec![ Opnd::String(text.to_owned()) ], vec![]); } + /* fn add(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd { self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]) } + */ // Low-level, no output operand fn test(&mut self, opnd0: Opnd, opnd1: Opnd) @@ -408,23 +411,34 @@ impl Assembler self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]); } - // Jump if not zero - fn jnz(&mut self, target: Target) - { - self.push_insn(Op::Jnz, vec![], vec![target]); - } - // Low-level, no output operand fn mov(&mut self, opnd0: Opnd, opnd1: Opnd) { self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]); } -} - - + // Jump if not zero + fn jnz(&mut self, target: Target) + { + self.push_insn(Op::Jnz, vec![], vec![target]); + } +} +macro_rules! def_push_insn_2_opnd { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd + { + self.push_insn($opcode, vec![opnd0, opnd1], vec![]) + } + } + }; +} +def_push_insn_2_opnd!(add, Op::Add); +def_push_insn_2_opnd!(sub, Op::Sub); +def_push_insn_2_opnd!(and, Op::And); // NOTE: these methods are temporary and will likely move // to context.rs later @@ -463,10 +477,6 @@ mod tests { asm.mov(loc0, dup_val); } - - - - // TODO fn guard_object_is_heap( asm: &mut Assembler, object_opnd: Opnd, From 92e9d1e66186d41a01f6116d1993fbfd66fdf1a6 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 12 May 2022 14:05:48 -0400 Subject: [PATCH 288/546] Switch IR to use Option --- yjit/src/ir.rs | 55 +++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs index d3cc76d2d1a05f..13f81f0af7f9c9 100644 --- a/yjit/src/ir.rs +++ b/yjit/src/ir.rs @@ -44,10 +44,7 @@ pub enum Op // operand (typically generated by ir_str_ptr). Comment, - // Add a label into the IR at the point that this instruction is added. It - // will eventually be translated into an offset when generating code such - // that EIR_LABEL_IDX operands know where to jump to. Accepts as its only - // operand an EIR_LABEL_NAME operand (typically generated by ir_label_opnd). + // Add a label into the IR at the point that this instruction is added. Label, // Add two operands together, and return the result as a new operand. This @@ -323,7 +320,7 @@ enum Target { CodePtr(CodePtr), // Pointer to a piece of code (e.g. side-exit) LabelName(String), // A label without an index in the output - LabelIdx(u32), // A label that has been indexed + LabelIdx(usize), // A label that has been indexed } /// YJIT IR instruction @@ -335,9 +332,8 @@ pub struct Insn // List of input operands/values opnds: Vec, - // Kevin asks: do we really need multiple branch targets? // List of branch targets (branch instructions only) - targets: Vec, + target: Option, // Position in the generated machine code // Useful for comments and for patching jumps @@ -359,14 +355,14 @@ impl Assembler } } - fn push_insn(&mut self, op: Op, opnds: Vec, targets: Vec) -> Opnd + fn push_insn(&mut self, op: Op, opnds: Vec, target: Option) -> Opnd { let insn_idx = self.insns.len(); let insn = Insn { op: op, opnds: opnds, - targets: targets, + target: target, pos: None }; self.insns.push(insn); @@ -375,10 +371,26 @@ impl Assembler Opnd::InsnOut(insn_idx) } - // TODO: - //fn label(&self, name: &str) -> Target - //{ - //} + // Add a label at the current position + fn label(&mut self, name: &str) -> Target + { + let insn_idx = self.insns.len(); + + let insn = Insn { + op: Op::Label, + opnds: vec![], + target: None, + pos: None + }; + self.insns.push(insn); + + Target::LabelIdx(insn_idx) + } + + fn alloc_regs(&mut self) + { + // ??? + } // Optimize and compile the stored instructions fn compile() @@ -395,32 +407,25 @@ impl Assembler // Add a comment, no output operand fn comment(&mut self, text: &str) { - self.push_insn(Op::Add, vec![ Opnd::String(text.to_owned()) ], vec![]); - } - - /* - fn add(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd - { - self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]) + self.push_insn(Op::Add, vec![ Opnd::String(text.to_owned()) ], None); } - */ // Low-level, no output operand fn test(&mut self, opnd0: Opnd, opnd1: Opnd) { - self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]); + self.push_insn(Op::Add, vec![opnd0, opnd1], None); } // Low-level, no output operand fn mov(&mut self, opnd0: Opnd, opnd1: Opnd) { - self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]); + self.push_insn(Op::Add, vec![opnd0, opnd1], None); } // Jump if not zero fn jnz(&mut self, target: Target) { - self.push_insn(Op::Jnz, vec![], vec![target]); + self.push_insn(Op::Jnz, vec![], Some(target)); } } @@ -430,7 +435,7 @@ macro_rules! def_push_insn_2_opnd { { fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd { - self.push_insn($opcode, vec![opnd0, opnd1], vec![]) + self.push_insn($opcode, vec![opnd0, opnd1], None) } } }; From 884cbaabd9c15cdc85809cf713c1be755ea70cf7 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 12 May 2022 14:16:09 -0400 Subject: [PATCH 289/546] Change push insn macros --- yjit/src/ir.rs | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs index 13f81f0af7f9c9..f206d5c392daca 100644 --- a/yjit/src/ir.rs +++ b/yjit/src/ir.rs @@ -399,6 +399,9 @@ impl Assembler // Register allocation // Generic lowering pass // Platform-specific lowering + + // Question: should this method return machine code? + // How do we go from lowered/optimized insn to an array of bytes? } } @@ -410,18 +413,6 @@ impl Assembler self.push_insn(Op::Add, vec![ Opnd::String(text.to_owned()) ], None); } - // Low-level, no output operand - fn test(&mut self, opnd0: Opnd, opnd1: Opnd) - { - self.push_insn(Op::Add, vec![opnd0, opnd1], None); - } - - // Low-level, no output operand - fn mov(&mut self, opnd0: Opnd, opnd1: Opnd) - { - self.push_insn(Op::Add, vec![opnd0, opnd1], None); - } - // Jump if not zero fn jnz(&mut self, target: Target) { @@ -429,7 +420,7 @@ impl Assembler } } -macro_rules! def_push_insn_2_opnd { +macro_rules! def_push_2_opnd { ($op_name:ident, $opcode:expr) => { impl Assembler { @@ -441,9 +432,23 @@ macro_rules! def_push_insn_2_opnd { }; } -def_push_insn_2_opnd!(add, Op::Add); -def_push_insn_2_opnd!(sub, Op::Sub); -def_push_insn_2_opnd!(and, Op::And); +macro_rules! def_push_2_opnd_no_out { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) + { + self.push_insn($opcode, vec![opnd0, opnd1], None); + } + } + }; +} + +def_push_2_opnd!(add, Op::Add); +def_push_2_opnd!(sub, Op::Sub); +def_push_2_opnd!(and, Op::And); +def_push_2_opnd_no_out!(test, Op::Test); +def_push_2_opnd_no_out!(mov, Op::Mov); // NOTE: these methods are temporary and will likely move // to context.rs later From 5021f26b4ba270b2fc36a6fce7b4d54bb65b7062 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 12 May 2022 14:31:17 -0400 Subject: [PATCH 290/546] Complete sketch for guard_object_is_heap --- yjit/src/ir.rs | 61 +++++++++++++------------------------------------- 1 file changed, 15 insertions(+), 46 deletions(-) diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs index f206d5c392daca..fde942212d9b29 100644 --- a/yjit/src/ir.rs +++ b/yjit/src/ir.rs @@ -8,32 +8,6 @@ use crate::virtualmem::{CodePtr}; use crate::asm::x86_64::{X86Opnd, X86Imm, X86UImm, X86Reg, X86Mem, RegType}; use crate::core::{Context, Type, TempMapping}; - - - -/* -// Minimally, we might want to specify how many operands and branch targets an insn has -// Branch targets are not interchangeable with other operand types. We distinguish -// between branch and regular instructions. -// -// TODO: should mark instructions that produce no output operand -// -make_ops! { - (Comment, 1, 0), - ... - - // Call is variadic, might need to be special-cased -} -*/ - - - - - - - - - /// Instruction opcodes #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum Op @@ -80,8 +54,12 @@ pub enum Op // Bitwise AND test instruction Test, - // Jump if not zero + // Compare two operands + Cmp, + + // Low-level conditional jump instructions Jnz, + Jbe, /* // The following are conditional jump instructions. They all accept as their @@ -128,12 +106,6 @@ pub enum Op // be returned from the generated function. RetVal, - // A low-level cmp instruction. It accepts two operands. The first it - // expects to be a register. The second can be anything. Most of the time - // this instruction shouldn't be used by the developer since other - // instructions break down to this one. - Cmp, - // A conditional move instruction that should be preceeded at some point by // an OP_CMP instruction that would have set the requisite comparison flags. // Accepts 2 operands, both of which are expected to be of the EIR_REG type. @@ -195,15 +167,6 @@ pub enum Op */ } - - - - - - - - - // Register value used by IR operands #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub struct Reg @@ -418,6 +381,11 @@ impl Assembler { self.push_insn(Op::Jnz, vec![], Some(target)); } + + fn jbe(&mut self, target: Target) + { + self.push_insn(Op::Jbe, vec![], Some(target)); + } } macro_rules! def_push_2_opnd { @@ -447,8 +415,9 @@ macro_rules! def_push_2_opnd_no_out { def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); -def_push_2_opnd_no_out!(test, Op::Test); def_push_2_opnd_no_out!(mov, Op::Mov); +def_push_2_opnd_no_out!(cmp, Op::Cmp); +def_push_2_opnd_no_out!(test, Op::Test); // NOTE: these methods are temporary and will likely move // to context.rs later @@ -496,12 +465,12 @@ mod tests { asm.comment("guard object is heap"); // Test that the object is not an immediate - asm.test(object_opnd, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); + asm.test(object_opnd.clone(), Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); asm.jnz(Target::CodePtr(side_exit)); // Test that the object is not false or nil - //cmp(cb, object_opnd, uimm_opnd(Qnil.into())); - //jbe_ptr(cb, side_exit); + asm.cmp(object_opnd.clone(), Opnd::UImm(Qnil.into())); + asm.jbe(Target::CodePtr(side_exit)); } #[test] From 7753b6b8b6a011d048a6b3aaf912d1dad7995b7b Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 13 May 2022 15:58:36 -0400 Subject: [PATCH 291/546] Removed String opnd so that we can derive Copy for Opnd --- yjit/src/ir.rs | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs index fde942212d9b29..c632368b7a94e7 100644 --- a/yjit/src/ir.rs +++ b/yjit/src/ir.rs @@ -196,7 +196,7 @@ pub struct Mem } /// Operand to an IR instruction -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum Opnd { None, // For insns with no output @@ -205,7 +205,6 @@ pub enum Opnd Local(u16), // Local variable (idx, do we need depth too?) Value(VALUE), // Immediate Ruby value, may be GC'd, movable InsnOut(usize), // Output of a preceding instruction in this block - String(String), // String constant, used for comments // Low-level operands, for lowering Imm(i64), // Raw signed immediate @@ -287,11 +286,15 @@ enum Target } /// YJIT IR instruction +#[derive(Clone, Debug)] pub struct Insn { // Opcode for the instruction op: Op, + // Optional string for comments and labels + text: Option, + // List of input operands/values opnds: Vec, @@ -324,6 +327,7 @@ impl Assembler let insn = Insn { op: op, + text: None, opnds: opnds, target: target, pos: None @@ -334,6 +338,19 @@ impl Assembler Opnd::InsnOut(insn_idx) } + // Add a comment at the current position + fn comment(&mut self, text: &str) + { + let insn = Insn { + op: Op::Comment, + text: Some(text.to_owned()), + opnds: vec![], + target: None, + pos: None + }; + self.insns.push(insn); + } + // Add a label at the current position fn label(&mut self, name: &str) -> Target { @@ -341,6 +358,7 @@ impl Assembler let insn = Insn { op: Op::Label, + text: Some(name.to_owned()), opnds: vec![], target: None, pos: None @@ -370,12 +388,6 @@ impl Assembler impl Assembler { - // Add a comment, no output operand - fn comment(&mut self, text: &str) - { - self.push_insn(Op::Add, vec![ Opnd::String(text.to_owned()) ], None); - } - // Jump if not zero fn jnz(&mut self, target: Target) { From 2b7d4f277d120229fca4cc9665b44ef1e5cbf7e7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 13 May 2022 14:55:01 -0400 Subject: [PATCH 292/546] IR register allocation PR: https://github.com/Shopify/ruby/pull/289 --- yjit/src/ir.rs | 134 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 130 insertions(+), 4 deletions(-) diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs index c632368b7a94e7..79dcc0200bb0fa 100644 --- a/yjit/src/ir.rs +++ b/yjit/src/ir.rs @@ -298,6 +298,9 @@ pub struct Insn // List of input operands/values opnds: Vec, + // Output operand for this instruction + out: Opnd, + // List of branch targets (branch instructions only) target: Option, @@ -310,29 +313,45 @@ pub struct Insn /// optimized and lowered struct Assembler { - insns: Vec + insns: Vec, + + /// Parallel vec with insns + /// Index of the last insn using the output of this insn + live_ranges: Vec } impl Assembler { fn new() -> Assembler { Assembler { - insns: Vec::default() + insns: Vec::default(), + live_ranges: Vec::default() } } fn push_insn(&mut self, op: Op, opnds: Vec, target: Option) -> Opnd { + // If we find any InsnOut from previous instructions, we're going to + // update the live range of the previous instruction to point to this + // one. let insn_idx = self.insns.len(); + for opnd in &opnds { + if let Opnd::InsnOut(idx) = opnd { + self.live_ranges[*idx] = insn_idx; + } + } let insn = Insn { op: op, text: None, opnds: opnds, + out: Opnd::None, target: target, pos: None }; + self.insns.push(insn); + self.live_ranges.push(insn_idx); // Return an operand for the output of this instruction Opnd::InsnOut(insn_idx) @@ -345,6 +364,7 @@ impl Assembler op: Op::Comment, text: Some(text.to_owned()), opnds: vec![], + out: Opnd::None, target: None, pos: None }; @@ -360,6 +380,7 @@ impl Assembler op: Op::Label, text: Some(name.to_owned()), opnds: vec![], + out: Opnd::None, target: None, pos: None }; @@ -368,14 +389,83 @@ impl Assembler Target::LabelIdx(insn_idx) } - fn alloc_regs(&mut self) + /// Sets the out field on the various instructions that require allocated + /// registers because their output is used as the operand on a subsequent + /// instruction. This is our implementation of the linear scan algorithm. + fn alloc_regs(&mut self, regs: Vec) { - // ??? + // First, create the pool of registers. + let mut pool: u32 = 0; + + // Mutate the pool bitmap to indicate that the register at that index + // has been allocated and is live. + fn alloc_reg(pool: &mut u32, regs: &Vec) -> Reg { + for index in 0..regs.len() { + if (*pool & (1 << index)) == 0 { + *pool |= 1 << index; + return regs[index]; + } + } + + unreachable!("Register spill not supported"); + } + + // Mutate the pool bitmap to indicate that the given register is being + // returned as it is no longer used by the instruction that previously + // held it. + fn dealloc_reg(pool: &mut u32, regs: &Vec, reg: &Reg) { + let reg_index = regs.iter().position(|elem| elem == reg).unwrap(); + *pool &= !(1 << reg_index); + } + + // Next, create the next list of instructions. + let mut next_insns: Vec = Vec::default(); + + // Finally, walk the existing instructions and allocate. + for (index, mut insn) in self.insns.drain(..).enumerate() { + if self.live_ranges[index] != index { + // This instruction is used by another instruction, so we need + // to allocate a register for it. + insn.out = Opnd::Reg(alloc_reg(&mut pool, ®s)); + } + + // Check if this is the last instruction that uses an operand that + // spans more than one instruction. In that case, return the + // allocated register to the pool. + for opnd in &insn.opnds { + if let Opnd::InsnOut(idx) = opnd { + // Since we have an InsnOut, we know it spans more that one + // instruction. + let start_index = *idx; + assert!(start_index < index); + + // We're going to check if this is the last instruction that + // uses this operand. If it is, we can return the allocated + // register to the pool. + if self.live_ranges[start_index] == index { + if let Opnd::Reg(reg) = next_insns[start_index].out { + dealloc_reg(&mut pool, ®s, ®); + } else { + unreachable!(); + } + } + } + } + + // Push the instruction onto the next list of instructions now that + // we have checked everything we needed to check. + next_insns.push(insn); + } + + assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); + self.insns = next_insns; } // Optimize and compile the stored instructions fn compile() { + // TODO: splitting pass, split_insns() + // Peephole optimizations // Register allocation // Generic lowering pass @@ -491,4 +581,40 @@ mod tests { let out = asm.add(SP, Opnd::UImm(1)); asm.add(out, Opnd::UImm(2)); } + + #[test] + fn test_alloc_regs() { + let mut asm = Assembler::new(); + + // Get the first output that we're going to reuse later. + let out1 = asm.add(EC, Opnd::UImm(1)); + + // Pad some instructions in to make sure it can handle that. + asm.add(EC, Opnd::UImm(2)); + + // Get the second output we're going to reuse. + let out2 = asm.add(EC, Opnd::UImm(3)); + + // Pad another instruction. + asm.add(EC, Opnd::UImm(4)); + + // Reuse both the previously captured outputs. + asm.add(out1, out2); + + // Now get a third output to make sure that the pool has registers to + // allocate now that the previous ones have been returned. + let out3 = asm.add(EC, Opnd::UImm(5)); + asm.add(out3, Opnd::UImm(6)); + + // Here we're going to allocate the registers. + let reg1 = Reg { reg_no: 0, num_bits: 64, special: false }; + let reg2 = Reg { reg_no: 1, num_bits: 64, special: false }; + asm.alloc_regs(vec![reg1, reg2]); + + // Now we're going to verify that the out field has been appropriately + // updated for each of the instructions that needs it. + assert_eq!(asm.insns[0].out, Opnd::Reg(reg1)); + assert_eq!(asm.insns[2].out, Opnd::Reg(reg2)); + assert_eq!(asm.insns[5].out, Opnd::Reg(reg1)); + } } From a3d8e20ceaa934b56383c368f8c3838384f71a73 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 16 May 2022 14:48:28 -0400 Subject: [PATCH 293/546] Split insns (https://github.com/Shopify/ruby/pull/290) * Split instructions if necessary * Add a reusable transform_insns function * Split out comments labels from transform_insns * Refactor alloc_regs to use transform_insns --- yjit/src/ir.rs | 141 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 116 insertions(+), 25 deletions(-) diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs index 79dcc0200bb0fa..9a4fc559de80dd 100644 --- a/yjit/src/ir.rs +++ b/yjit/src/ir.rs @@ -48,6 +48,9 @@ pub enum Op // Low-level instructions // + // A low-level instruction that loads a value into a register. + Load, + // A low-level mov instruction. It accepts two operands. Mov, @@ -389,10 +392,83 @@ impl Assembler Target::LabelIdx(insn_idx) } + /// Transform input instructions, consumes the input assembler + fn transform_insns(mut self, mut map_insn: F) -> Assembler + where F: FnMut(&mut Assembler, usize, Op, Vec, Option) + { + let mut asm = Assembler::new(); + + // indices maps from the old instruction index to the new instruction + // index. + let mut indices: Vec = Vec::default(); + + // Map an operand to the next set of instructions by correcting previous + // InsnOut indices. + fn map_opnd(opnd: Opnd, indices: &mut Vec) -> Opnd { + if let Opnd::InsnOut(index) = opnd { + Opnd::InsnOut(indices[index]) + } else { + opnd + } + } + + for (index, insn) in self.insns.drain(..).enumerate() { + let opnds: Vec = insn.opnds.into_iter().map(|opnd| map_opnd(opnd, &mut indices)).collect(); + + // For each instruction, either handle it here or allow the map_insn + // callback to handle it. + match insn.op { + Op::Comment => { + asm.comment(insn.text.unwrap().as_str()); + }, + Op::Label => { + asm.label(insn.text.unwrap().as_str()); + }, + _ => { + map_insn(&mut asm, index, insn.op, opnds, insn.target); + } + }; + + // Here we're assuming that if we've pushed multiple instructions, + // the output that we're using is still the final instruction that + // was pushed. + indices.push(asm.insns.len() - 1); + } + + asm + } + + /// Transforms the instructions by splitting instructions that cannot be + /// represented in the final architecture into multiple instructions that + /// can. + fn split_insns(self) -> Assembler + { + self.transform_insns(|asm, _, op, opnds, target| { + match op { + // Check for Add, Sub, or Mov instructions with two memory + // operands. + Op::Add | Op::Sub | Op::Mov => { + match opnds.as_slice() { + [Opnd::Mem(_), Opnd::Mem(_)] => { + let output = asm.push_insn(Op::Load, vec![opnds[0]], None); + asm.push_insn(op, vec![output, opnds[1]], None); + }, + _ => { + asm.push_insn(op, opnds, target); + } + } + }, + _ => { + asm.push_insn(op, opnds, target); + } + }; + }) + } + /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. - fn alloc_regs(&mut self, regs: Vec) + fn alloc_regs(mut self, regs: Vec) -> Assembler { // First, create the pool of registers. let mut pool: u32 = 0; @@ -418,21 +494,12 @@ impl Assembler *pool &= !(1 << reg_index); } - // Next, create the next list of instructions. - let mut next_insns: Vec = Vec::default(); - - // Finally, walk the existing instructions and allocate. - for (index, mut insn) in self.insns.drain(..).enumerate() { - if self.live_ranges[index] != index { - // This instruction is used by another instruction, so we need - // to allocate a register for it. - insn.out = Opnd::Reg(alloc_reg(&mut pool, ®s)); - } - + let live_ranges: Vec = std::mem::take(&mut self.live_ranges); + let result = self.transform_insns(|asm, index, op, opnds, target| { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. - for opnd in &insn.opnds { + for opnd in &opnds { if let Opnd::InsnOut(idx) = opnd { // Since we have an InsnOut, we know it spans more that one // instruction. @@ -442,8 +509,8 @@ impl Assembler // We're going to check if this is the last instruction that // uses this operand. If it is, we can return the allocated // register to the pool. - if self.live_ranges[start_index] == index { - if let Opnd::Reg(reg) = next_insns[start_index].out { + if live_ranges[start_index] == index { + if let Opnd::Reg(reg) = asm.insns[start_index].out { dealloc_reg(&mut pool, ®s, ®); } else { unreachable!(); @@ -452,18 +519,25 @@ impl Assembler } } - // Push the instruction onto the next list of instructions now that - // we have checked everything we needed to check. - next_insns.push(insn); - } + asm.push_insn(op, opnds, target); + + if live_ranges[index] != index { + // This instruction is used by another instruction, so we need + // to allocate a register for it. + let length = asm.insns.len(); + asm.insns[length - 1].out = Opnd::Reg(alloc_reg(&mut pool, ®s)); + } + }); assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); - self.insns = next_insns; + result } // Optimize and compile the stored instructions - fn compile() + fn compile(self, regs: Vec) -> Assembler { + self.split_insns().alloc_regs(regs) + // TODO: splitting pass, split_insns() // Peephole optimizations @@ -582,6 +656,23 @@ mod tests { asm.add(out, Opnd::UImm(2)); } + #[test] + fn test_split_insns() { + let mut asm = Assembler::new(); + + let reg1 = Reg { reg_no: 0, num_bits: 64, special: false }; + let reg2 = Reg { reg_no: 1, num_bits: 64, special: false }; + + asm.add( + Opnd::mem(64, Opnd::Reg(reg1), 0), + Opnd::mem(64, Opnd::Reg(reg2), 0) + ); + + let result = asm.split_insns(); + assert_eq!(result.insns.len(), 2); + assert_eq!(result.insns[0].op, Op::Load); + } + #[test] fn test_alloc_regs() { let mut asm = Assembler::new(); @@ -609,12 +700,12 @@ mod tests { // Here we're going to allocate the registers. let reg1 = Reg { reg_no: 0, num_bits: 64, special: false }; let reg2 = Reg { reg_no: 1, num_bits: 64, special: false }; - asm.alloc_regs(vec![reg1, reg2]); + let result = asm.alloc_regs(vec![reg1, reg2]); // Now we're going to verify that the out field has been appropriately // updated for each of the instructions that needs it. - assert_eq!(asm.insns[0].out, Opnd::Reg(reg1)); - assert_eq!(asm.insns[2].out, Opnd::Reg(reg2)); - assert_eq!(asm.insns[5].out, Opnd::Reg(reg1)); + assert_eq!(result.insns[0].out, Opnd::Reg(reg1)); + assert_eq!(result.insns[2].out, Opnd::Reg(reg2)); + assert_eq!(result.insns[5].out, Opnd::Reg(reg1)); } } From 18dc379aca69fd9dc72debae3fd504399799e86f Mon Sep 17 00:00:00 2001 From: git Date: Tue, 30 Aug 2022 00:45:14 +0900 Subject: [PATCH 294/546] * 2022-08-30 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 812897df21a4dd..2f2b02a57fe9ab 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 29 +#define RUBY_RELEASE_DAY 30 #include "ruby/version.h" #include "ruby/internal/abi.h" From e9cc17dcc9a365d59330b8c37baeafed5d75a519 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 17 May 2022 17:31:36 -0400 Subject: [PATCH 295/546] Start work on platform-specific codegen --- yjit/src/asm/x86_64/mod.rs | 51 ++++++++---- yjit/src/{ => backend}/ir.rs | 138 ++++++++++++++++----------------- yjit/src/backend/mod.rs | 3 + yjit/src/backend/x86_64/mod.rs | 55 +++++++++++++ yjit/src/codegen.rs | 1 - yjit/src/lib.rs | 2 +- 6 files changed, 161 insertions(+), 89 deletions(-) rename yjit/src/{ => backend}/ir.rs (89%) create mode 100644 yjit/src/backend/mod.rs create mode 100644 yjit/src/backend/x86_64/mod.rs diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index b4ef2e4bf9789a..0a930ecf60ccb2 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -34,7 +34,7 @@ pub enum RegType IP, } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct X86Reg { // Size in bits @@ -157,22 +157,39 @@ const RBP_REG_NO: u8 = 5; const R12_REG_NO: u8 = 12; const R13_REG_NO: u8 = 13; -pub const RAX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RAX_REG_NO }); -pub const RCX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 1 }); -pub const RDX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 2 }); -pub const RBX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 3 }); -pub const RSP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RSP_REG_NO }); -pub const RBP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RBP_REG_NO }); -pub const RSI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 6 }); -pub const RDI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 7 }); -pub const R8: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 8 }); -pub const R9: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 9 }); -pub const R10: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 10 }); -pub const R11: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 11 }); -pub const R12: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R12_REG_NO }); -pub const R13: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R13_REG_NO }); -pub const R14: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 14 }); -pub const R15: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 15 }); +pub const RAX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RAX_REG_NO }; +pub const RCX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 1 }; +pub const RDX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 2 }; +pub const RBX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 3 }; +pub const RSP_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RSP_REG_NO }; +pub const RBP_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RBP_REG_NO }; +pub const RSI_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 6 }; +pub const RDI_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 7 }; +pub const R8_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 8 }; +pub const R9_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 9 }; +pub const R10_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 10 }; +pub const R11_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 11 }; +pub const R12_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R12_REG_NO }; +pub const R13_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R13_REG_NO }; +pub const R14_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 14 }; +pub const R15_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 15 }; + +pub const RAX: X86Opnd = X86Opnd::Reg(RAX_REG); +pub const RCX: X86Opnd = X86Opnd::Reg(RCX_REG); +pub const RDX: X86Opnd = X86Opnd::Reg(RDX_REG); +pub const RBX: X86Opnd = X86Opnd::Reg(RBX_REG); +pub const RSP: X86Opnd = X86Opnd::Reg(RSP_REG); +pub const RBP: X86Opnd = X86Opnd::Reg(RBP_REG); +pub const RSI: X86Opnd = X86Opnd::Reg(RSI_REG); +pub const RDI: X86Opnd = X86Opnd::Reg(RDI_REG); +pub const R8: X86Opnd = X86Opnd::Reg(R8_REG); +pub const R9: X86Opnd = X86Opnd::Reg(R9_REG); +pub const R10: X86Opnd = X86Opnd::Reg(R10_REG); +pub const R11: X86Opnd = X86Opnd::Reg(R11_REG); +pub const R12: X86Opnd = X86Opnd::Reg(R12_REG); +pub const R13: X86Opnd = X86Opnd::Reg(R13_REG); +pub const R14: X86Opnd = X86Opnd::Reg(R14_REG); +pub const R15: X86Opnd = X86Opnd::Reg(R15_REG); // 32-bit GP registers pub const EAX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 0 }); diff --git a/yjit/src/ir.rs b/yjit/src/backend/ir.rs similarity index 89% rename from yjit/src/ir.rs rename to yjit/src/backend/ir.rs index 9a4fc559de80dd..9cff4aeac999b2 100644 --- a/yjit/src/ir.rs +++ b/yjit/src/backend/ir.rs @@ -5,17 +5,22 @@ use std::convert::From; use crate::cruby::{VALUE}; use crate::virtualmem::{CodePtr}; +use crate::asm::{CodeBlock}; use crate::asm::x86_64::{X86Opnd, X86Imm, X86UImm, X86Reg, X86Mem, RegType}; use crate::core::{Context, Type, TempMapping}; +#[cfg(target_arch = "x86_64")] +use crate::backend::x86_64::*; + +//#[cfg(target_arch = "aarch64")] +//use crate::backend:aarch64::* + /// Instruction opcodes #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum Op { - // Add a comment into the IR at the point that this instruction is added. It - // won't have any impact on that actual compiled code, but it will impact - // the output of ir_print_insns. Accepts as its only operand an EIR_IMM - // operand (typically generated by ir_str_ptr). + // Add a comment into the IR at the point that this instruction is added. + // It won't have any impact on that actual compiled code. Comment, // Add a label into the IR at the point that this instruction is added. @@ -51,6 +56,9 @@ pub enum Op // A low-level instruction that loads a value into a register. Load, + // Low-level instruction to store a value to memory. + Store, + // A low-level mov instruction. It accepts two operands. Mov, @@ -170,32 +178,18 @@ pub enum Op */ } -// Register value used by IR operands -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub struct Reg -{ - // Register number/index - reg_no: u8, - - // Size in bits - num_bits: u8, - - // Special register flag EC/CFP/SP/SELF - special: bool, -} - // Memory location #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub struct Mem { // Base register - base: Reg, + pub(super) base: Reg, // Offset relative to the base pointer - disp: i32, + pub(super) disp: i32, // Size in bits - num_bits: u8, + pub(super) num_bits: u8, } /// Operand to an IR instruction @@ -222,7 +216,7 @@ impl Opnd pub fn mem(num_bits: u8, base: Opnd, disp: i32) -> Self { match base { Opnd::Reg(base_reg) => { - assert!(base_reg.num_bits == 64 && !base_reg.special); + assert!(base_reg.num_bits == 64); Opnd::Mem(Mem { num_bits: num_bits, base: base_reg, @@ -234,12 +228,6 @@ impl Opnd } } -// Special register constants -pub const EC : Opnd = Opnd::Reg(Reg { reg_no: 0, num_bits: 64, special: true }); -pub const CFP : Opnd = Opnd::Reg(Reg { reg_no: 1, num_bits: 64, special: true }); -pub const SP : Opnd = Opnd::Reg(Reg { reg_no: 2, num_bits: 64, special: true }); -pub const SELF : Opnd = Opnd::Reg(Reg { reg_no: 3, num_bits: 64, special: true }); - /// Method to convert from an X86Opnd to an IR Opnd impl From for Opnd { fn from(opnd: X86Opnd) -> Self { @@ -249,17 +237,13 @@ impl From for Opnd { X86Opnd::Imm(X86Imm{ value, .. }) => Opnd::Imm(value), // General-purpose register - X86Opnd::Reg(X86Reg{ num_bits, reg_no, reg_type: RegType::GP }) => { - Opnd::Reg(Reg { - reg_no, - num_bits, - special: false, - }) + X86Opnd::Reg(reg) => { + Opnd::Reg(reg) } // Memory operand with displacement X86Opnd::Mem(X86Mem{ num_bits, base_reg_no, disp, idx_reg_no: None, scale_exp: 0 }) => { - let base_reg = Reg { num_bits: 64, reg_no: base_reg_no, special: false }; + let base_reg = Reg { num_bits: 64, reg_no: base_reg_no, reg_type: RegType::GP }; Opnd::Mem(Mem { base: base_reg, @@ -273,15 +257,10 @@ impl From for Opnd { } } - - - - - /// Branch target (something that we can jump to) /// for branch instructions #[derive(Clone, PartialEq, Eq, Debug)] -enum Target +pub enum Target { CodePtr(CodePtr), // Pointer to a piece of code (e.g. side-exit) LabelName(String), // A label without an index in the output @@ -293,30 +272,30 @@ enum Target pub struct Insn { // Opcode for the instruction - op: Op, + pub(super) op: Op, // Optional string for comments and labels - text: Option, + pub(super) text: Option, // List of input operands/values - opnds: Vec, + pub(super) opnds: Vec, // Output operand for this instruction - out: Opnd, + pub(super) out: Opnd, // List of branch targets (branch instructions only) - target: Option, + pub(super) target: Option, // Position in the generated machine code // Useful for comments and for patching jumps - pos: Option, + pub(super) pos: Option, } /// Object into which we assemble instructions to be /// optimized and lowered -struct Assembler +pub struct Assembler { - insns: Vec, + pub(super) insns: Vec, /// Parallel vec with insns /// Index of the last insn using the output of this insn @@ -450,7 +429,7 @@ impl Assembler Op::Add | Op::Sub | Op::Mov => { match opnds.as_slice() { [Opnd::Mem(_), Opnd::Mem(_)] => { - let output = asm.push_insn(Op::Load, vec![opnds[0]], None); + let output = asm.load(opnds[0]); asm.push_insn(op, vec![output, opnds[1]], None); }, _ => { @@ -534,19 +513,18 @@ impl Assembler } // Optimize and compile the stored instructions - fn compile(self, regs: Vec) -> Assembler + fn compile(self, cb: &mut CodeBlock) { - self.split_insns().alloc_regs(regs) + // NOTE: for arm we're going to want to split loads but also stores + // This can be done in a platform-agnostic way, but the set of passes + // we run will be slightly different. - // TODO: splitting pass, split_insns() + let scratch_regs = Self::get_scrach_regs(); - // Peephole optimizations - // Register allocation - // Generic lowering pass - // Platform-specific lowering - - // Question: should this method return machine code? - // How do we go from lowered/optimized insn to an array of bytes? + self + .split_insns() + .alloc_regs(scratch_regs) + .target_emit(cb) } } @@ -564,6 +542,18 @@ impl Assembler } } +macro_rules! def_push_1_opnd { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + fn $op_name(&mut self, opnd0: Opnd) -> Opnd + { + self.push_insn($opcode, vec![opnd0], None) + } + } + }; +} + macro_rules! def_push_2_opnd { ($op_name:ident, $opcode:expr) => { impl Assembler @@ -591,6 +581,7 @@ macro_rules! def_push_2_opnd_no_out { def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); +def_push_1_opnd!(load, Op::Load); def_push_2_opnd_no_out!(mov, Op::Mov); def_push_2_opnd_no_out!(cmp, Op::Cmp); def_push_2_opnd_no_out!(test, Op::Test); @@ -660,12 +651,11 @@ mod tests { fn test_split_insns() { let mut asm = Assembler::new(); - let reg1 = Reg { reg_no: 0, num_bits: 64, special: false }; - let reg2 = Reg { reg_no: 1, num_bits: 64, special: false }; + let regs = Assembler::get_scrach_regs(); asm.add( - Opnd::mem(64, Opnd::Reg(reg1), 0), - Opnd::mem(64, Opnd::Reg(reg2), 0) + Opnd::mem(64, Opnd::Reg(regs[0]), 0), + Opnd::mem(64, Opnd::Reg(regs[1]), 0) ); let result = asm.split_insns(); @@ -698,14 +688,22 @@ mod tests { asm.add(out3, Opnd::UImm(6)); // Here we're going to allocate the registers. - let reg1 = Reg { reg_no: 0, num_bits: 64, special: false }; - let reg2 = Reg { reg_no: 1, num_bits: 64, special: false }; - let result = asm.alloc_regs(vec![reg1, reg2]); + let result = asm.alloc_regs(Assembler::get_scrach_regs()); // Now we're going to verify that the out field has been appropriately // updated for each of the instructions that needs it. - assert_eq!(result.insns[0].out, Opnd::Reg(reg1)); - assert_eq!(result.insns[2].out, Opnd::Reg(reg2)); - assert_eq!(result.insns[5].out, Opnd::Reg(reg1)); + let regs = Assembler::get_scrach_regs(); + assert_eq!(result.insns[0].out, Opnd::Reg(regs[0])); + assert_eq!(result.insns[2].out, Opnd::Reg(regs[1])); + assert_eq!(result.insns[5].out, Opnd::Reg(regs[0])); + } + + #[test] + fn test_compile() + { + // TODO: test full compile pipeline + + + } } diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs new file mode 100644 index 00000000000000..a83fe4f69ebc1f --- /dev/null +++ b/yjit/src/backend/mod.rs @@ -0,0 +1,3 @@ +pub mod x86_64; + +pub mod ir; \ No newline at end of file diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs new file mode 100644 index 00000000000000..257373e86f666d --- /dev/null +++ b/yjit/src/backend/x86_64/mod.rs @@ -0,0 +1,55 @@ +#![allow(dead_code)] +#![allow(unused_variables)] +#![allow(unused_imports)] + +use crate::asm::{CodeBlock}; +use crate::asm::x86_64::*; +use crate::backend::ir::*; + +// Use the x86 register type for this platform +pub type Reg = X86Reg; + +// Callee-saved registers +pub const CFP: Opnd = Opnd::Reg(R13_REG); +pub const EC: Opnd = Opnd::Reg(R12_REG); +pub const SP: Opnd = Opnd::Reg(RBX_REG); + +impl Assembler +{ + // Get the list of registers from which we can allocate on this platform + pub fn get_scrach_regs() -> Vec + { + vec![ + RAX_REG, + RCX_REG, + ] + } + + // Emit platform-specific machine code + pub fn target_emit(&self, cb: &mut CodeBlock) + { + + + + for insn in &self.insns { + + + // For each instruction, either handle it here or allow the map_insn + // callback to handle it. + match insn.op { + Op::Comment => { + }, + Op::Label => { + }, + _ => { + } + }; + + + } + + + + + } +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 119477f50516ad..67d3ecd5731af7 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -33,7 +33,6 @@ pub const REG0: X86Opnd = RAX; pub const REG0_32: X86Opnd = EAX; pub const REG0_8: X86Opnd = AL; pub const REG1: X86Opnd = RCX; -// pub const REG1_32: X86Opnd = ECX; // A block that can be invalidated needs space to write a jump. // We'll reserve a minimum size for any block that could diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index 019189e8e89276..752b7872c15315 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -4,7 +4,7 @@ #![allow(clippy::identity_op)] // Sometimes we do it for style mod asm; -mod ir; +mod backend; mod codegen; mod core; mod cruby; From a2aa289594352db98b893aae716cebae0556a20e Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 18 May 2022 11:36:55 -0400 Subject: [PATCH 296/546] Function to map from Opnd => X86Opnd --- yjit/src/asm/x86_64/mod.rs | 4 +-- yjit/src/backend/ir.rs | 16 ++++++--- yjit/src/backend/x86_64/mod.rs | 59 +++++++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 22 deletions(-) diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 0a930ecf60ccb2..1f3dfd2e249a26 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -5,7 +5,7 @@ use crate::asm::*; // Import the assembler tests module mod tests; -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct X86Imm { // Size in bits @@ -15,7 +15,7 @@ pub struct X86Imm pub value: i64 } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct X86UImm { // Size in bits diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 9cff4aeac999b2..a561d4bb496945 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -183,7 +183,7 @@ pub enum Op pub struct Mem { // Base register - pub(super) base: Reg, + pub(super) base_reg: Reg, // Offset relative to the base pointer pub(super) disp: i32, @@ -198,8 +198,11 @@ pub enum Opnd { None, // For insns with no output - Stack(u16), // Value on the temp stack (idx) - Local(u16), // Local variable (idx, do we need depth too?) + // NOTE: for now Context directly returns memory operands, + // but eventually we'd like to have Stack and Local operand types + //Stack(u16), // Value on the temp stack (idx) + //Local(u16), // Local variable (idx, do we need depth too?) + Value(VALUE), // Immediate Ruby value, may be GC'd, movable InsnOut(usize), // Output of a preceding instruction in this block @@ -219,7 +222,7 @@ impl Opnd assert!(base_reg.num_bits == 64); Opnd::Mem(Mem { num_bits: num_bits, - base: base_reg, + base_reg: base_reg, disp: disp, }) }, @@ -228,6 +231,9 @@ impl Opnd } } +/// NOTE: this is useful during the port but can probably be removed once +/// Context returns ir::Opnd instead of X86Opnd +/// /// Method to convert from an X86Opnd to an IR Opnd impl From for Opnd { fn from(opnd: X86Opnd) -> Self { @@ -246,7 +252,7 @@ impl From for Opnd { let base_reg = Reg { num_bits: 64, reg_no: base_reg_no, reg_type: RegType::GP }; Opnd::Mem(Mem { - base: base_reg, + base_reg: base_reg, disp, num_bits }) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 257373e86f666d..67e220fd8b73da 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -14,6 +14,31 @@ pub const CFP: Opnd = Opnd::Reg(R13_REG); pub const EC: Opnd = Opnd::Reg(R12_REG); pub const SP: Opnd = Opnd::Reg(RBX_REG); +/// Map Opnd to X86Opnd +impl From for X86Opnd { + fn from(opnd: Opnd) -> Self { + match opnd { + //Value(VALUE), // Immediate Ruby value, may be GC'd, movable + //InsnOut(usize), // Output of a preceding instruction in this block + + Opnd::None => X86Opnd::None, + + Opnd::UImm(val) => uimm_opnd(val), + Opnd::Imm(val) => imm_opnd(val), + + // General-purpose register + Opnd::Reg(reg) => X86Opnd::Reg(reg), + + // Memory operand with displacement + Opnd::Mem(Mem{ num_bits, base_reg, disp }) => { + mem_opnd(num_bits, X86Opnd::Reg(base_reg), disp) + } + + _ => panic!("unsupported x86 operand type") + } + } +} + impl Assembler { // Get the list of registers from which we can allocate on this platform @@ -28,28 +53,32 @@ impl Assembler // Emit platform-specific machine code pub fn target_emit(&self, cb: &mut CodeBlock) { - - - + // For each instruction for insn in &self.insns { - - - // For each instruction, either handle it here or allow the map_insn - // callback to handle it. match insn.op { - Op::Comment => { - }, - Op::Label => { - }, - _ => { - } - }; + Op::Comment => {}, + Op::Label => {}, + Op::Add => { - } + //add(cb, ) + }, + /* + Load + Store, + Mov, + Test, + Cmp, + Jnz, + Jbe, + */ + + _ => panic!("unsupported instruction passed to x86 backend") + }; + } } } From 369911d31de0446dbee805a5e4ddd5691518e6ff Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 18 May 2022 14:41:43 -0400 Subject: [PATCH 297/546] Add dbg!() for Assembler. Fix regalloc issue. --- yjit/src/backend/ir.rs | 62 ++++++++++++++++++++++++---------- yjit/src/backend/x86_64/mod.rs | 18 ++++++---- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index a561d4bb496945..41eef8c60b7c2f 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -2,6 +2,7 @@ #![allow(unused_variables)] #![allow(unused_imports)] +use std::fmt; use std::convert::From; use crate::cruby::{VALUE}; use crate::virtualmem::{CodePtr}; @@ -317,6 +318,7 @@ impl Assembler } } + /// Append an instruction to the list fn push_insn(&mut self, op: Op, opnds: Vec, target: Option) -> Opnd { // If we find any InsnOut from previous instructions, we're going to @@ -345,7 +347,7 @@ impl Assembler Opnd::InsnOut(insn_idx) } - // Add a comment at the current position + /// Add a comment at the current position fn comment(&mut self, text: &str) { let insn = Insn { @@ -359,7 +361,7 @@ impl Assembler self.insns.push(insn); } - // Add a label at the current position + /// Add a label at the current position fn label(&mut self, name: &str) -> Target { let insn_idx = self.insns.len(); @@ -430,13 +432,14 @@ impl Assembler { self.transform_insns(|asm, _, op, opnds, target| { match op { - // Check for Add, Sub, or Mov instructions with two memory - // operands. - Op::Add | Op::Sub | Op::Mov => { + // Check for Add, Sub, And, Mov, with two memory operands. + // Load one operand into memory. + Op::Add | Op::Sub | Op::And => { match opnds.as_slice() { [Opnd::Mem(_), Opnd::Mem(_)] => { - let output = asm.load(opnds[0]); - asm.push_insn(op, vec![output, opnds[1]], None); + // We load opnd1 because for mov, opnd0 is the output + let opnd1 = asm.load(opnds[1]); + asm.push_insn(op, vec![opnds[0], opnd1], None); }, _ => { asm.push_insn(op, opnds, target); @@ -480,7 +483,8 @@ impl Assembler } let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - let result = self.transform_insns(|asm, index, op, opnds, target| { + + let asm = self.transform_insns(|asm, index, op, opnds, target| { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. @@ -498,24 +502,37 @@ impl Assembler if let Opnd::Reg(reg) = asm.insns[start_index].out { dealloc_reg(&mut pool, ®s, ®); } else { - unreachable!(); + unreachable!("no register allocated for insn"); } } } } + // Replace InsnOut operands by their corresponding register + let opnds = opnds.into_iter().map(|opnd| + match opnd { + Opnd::InsnOut(idx) => asm.insns[idx].out, + _ => opnd, + } + ).collect(); + asm.push_insn(op, opnds, target); + let num_insns = asm.insns.len(); if live_ranges[index] != index { // This instruction is used by another instruction, so we need // to allocate a register for it. - let length = asm.insns.len(); - asm.insns[length - 1].out = Opnd::Reg(alloc_reg(&mut pool, ®s)); + asm.insns[num_insns - 1].out = Opnd::Reg(alloc_reg(&mut pool, ®s)); + } + else + { + // Nobody is using the output of this instruction + asm.insns[num_insns - 1].out = Opnd::None; } }); assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); - result + asm } // Optimize and compile the stored instructions @@ -527,10 +544,16 @@ impl Assembler let scratch_regs = Self::get_scrach_regs(); - self + dbg!(self .split_insns() - .alloc_regs(scratch_regs) - .target_emit(cb) + .alloc_regs(scratch_regs)) + .target_emit(cb); + } +} + +impl fmt::Debug for Assembler { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_list().entries(self.insns.iter()).finish() } } @@ -704,12 +727,17 @@ mod tests { assert_eq!(result.insns[5].out, Opnd::Reg(regs[0])); } + // Test full codegen pipeline #[test] fn test_compile() { - // TODO: test full compile pipeline - + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(64 * 1024); + let regs = Assembler::get_scrach_regs(); + let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); + asm.add(out, Opnd::UImm(2)); + asm.compile(&mut cb); } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 67e220fd8b73da..2eb12e3d27045a 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -18,9 +18,12 @@ pub const SP: Opnd = Opnd::Reg(RBX_REG); impl From for X86Opnd { fn from(opnd: Opnd) -> Self { match opnd { + // NOTE: these operand types need to be lowered first //Value(VALUE), // Immediate Ruby value, may be GC'd, movable //InsnOut(usize), // Output of a preceding instruction in this block + Opnd::InsnOut(idx) => panic!("InsnOut operand made it past register allocation"), + Opnd::None => X86Opnd::None, Opnd::UImm(val) => uimm_opnd(val), @@ -59,18 +62,19 @@ impl Assembler Op::Comment => {}, Op::Label => {}, - Op::Add => { - - //add(cb, ) + Op::Add => add(cb, insn.opnds[0].into(), insn.opnds[1].into()), + /* + Load + Store, + */ + Op::Mov => add(cb, insn.opnds[0].into(), insn.opnds[1].into()), - }, + // Test and set flags + Op::Test => add(cb, insn.opnds[0].into(), insn.opnds[1].into()), /* - Load - Store, - Mov, Test, Cmp, Jnz, From 75c995b0d10515568ccfe8f67be1bd3bbcbb4b69 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 18 May 2022 16:00:45 -0400 Subject: [PATCH 298/546] Bias register allocator to reuse first operand --- yjit/src/backend/ir.rs | 53 +++++++++++++++++++++++++--------- yjit/src/backend/x86_64/mod.rs | 1 + 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 41eef8c60b7c2f..7f6a20c191e05d 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -434,7 +434,7 @@ impl Assembler match op { // Check for Add, Sub, And, Mov, with two memory operands. // Load one operand into memory. - Op::Add | Op::Sub | Op::And => { + Op::Add | Op::Sub | Op::And | Op::Mov => { match opnds.as_slice() { [Opnd::Mem(_), Opnd::Mem(_)] => { // We load opnd1 because for mov, opnd0 is the output @@ -508,27 +508,42 @@ impl Assembler } } + // If this instruction is used by another instruction, + // we need to allocate a register to it + let mut out_reg = Opnd::None; + if live_ranges[index] != index { + // If this instruction's first operand maps to a register and + // this is the last use of the register, reuse the register + // We do this to improve register allocation on x86 + if opnds.len() > 0 { + if let Opnd::InsnOut(idx) = opnds[0] { + if live_ranges[idx] == index { + if let Opnd::Reg(reg) = asm.insns[idx].out { + out_reg = Opnd::Reg(alloc_reg(&mut pool, &vec![reg])) + } + } + } + } + + if out_reg == Opnd::None { + // Allocate a new register for this instruction + out_reg = Opnd::Reg(alloc_reg(&mut pool, ®s)) + } + } + // Replace InsnOut operands by their corresponding register - let opnds = opnds.into_iter().map(|opnd| + let reg_opnds = opnds.into_iter().map(|opnd| match opnd { Opnd::InsnOut(idx) => asm.insns[idx].out, _ => opnd, } ).collect(); - asm.push_insn(op, opnds, target); + asm.push_insn(op, reg_opnds, target); + // Set the output register for this instruction let num_insns = asm.insns.len(); - if live_ranges[index] != index { - // This instruction is used by another instruction, so we need - // to allocate a register for it. - asm.insns[num_insns - 1].out = Opnd::Reg(alloc_reg(&mut pool, ®s)); - } - else - { - // Nobody is using the output of this instruction - asm.insns[num_insns - 1].out = Opnd::None; - } + asm.insns[num_insns - 1].out = out_reg; }); assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); @@ -732,7 +747,7 @@ mod tests { fn test_compile() { let mut asm = Assembler::new(); - let mut cb = CodeBlock::new_dummy(64 * 1024); + let mut cb = CodeBlock::new_dummy(1024); let regs = Assembler::get_scrach_regs(); let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); @@ -740,4 +755,14 @@ mod tests { asm.compile(&mut cb); } + + // Test full codegen pipeline + #[test] + fn test_mov_mem2mem() + { + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); + asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); + asm.compile(&mut cb); + } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 2eb12e3d27045a..00b9998b6960fe 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -69,6 +69,7 @@ impl Assembler Store, */ + Op::Load => add(cb, insn.out.into(), insn.opnds[0].into()), Op::Mov => add(cb, insn.opnds[0].into(), insn.opnds[1].into()), // Test and set flags From 99cfbdca6b7c2dd167137e7256cc861961969ffd Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 19 May 2022 12:26:20 -0400 Subject: [PATCH 299/546] Fix bug with asm.comment() --- yjit/src/backend/ir.rs | 3 +++ yjit/src/backend/x86_64/mod.rs | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 7f6a20c191e05d..7971f69842f38e 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -359,6 +359,7 @@ impl Assembler pos: None }; self.insns.push(insn); + self.live_ranges.push(self.insns.len()); } /// Add a label at the current position @@ -375,6 +376,7 @@ impl Assembler pos: None }; self.insns.push(insn); + self.live_ranges.push(self.insns.len()); Target::LabelIdx(insn_idx) } @@ -762,6 +764,7 @@ mod tests { { let mut asm = Assembler::new(); let mut cb = CodeBlock::new_dummy(1024); + asm.comment("check that comments work too"); asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); asm.compile(&mut cb); } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 00b9998b6960fe..ed68e13eb6fbc2 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -59,7 +59,9 @@ impl Assembler // For each instruction for insn in &self.insns { match insn.op { + // TODO: need to map the position of comments in the machine code Op::Comment => {}, + Op::Label => {}, Op::Add => add(cb, insn.opnds[0].into(), insn.opnds[1].into()), @@ -70,13 +72,12 @@ impl Assembler */ Op::Load => add(cb, insn.out.into(), insn.opnds[0].into()), - Op::Mov => add(cb, insn.opnds[0].into(), insn.opnds[1].into()), + Op::Mov => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), // Test and set flags - Op::Test => add(cb, insn.opnds[0].into(), insn.opnds[1].into()), + Op::Test => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), /* - Test, Cmp, Jnz, Jbe, From 564f9503603ae261561193f69f1fbdef6a140aa1 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 19 May 2022 13:32:56 -0400 Subject: [PATCH 300/546] Make assembler methods public, sketch gen_dup with new backend --- yjit/src/backend/ir.rs | 18 +++++++++--------- yjit/src/codegen.rs | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 7971f69842f38e..d26eb289c66d4b 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -311,7 +311,7 @@ pub struct Assembler impl Assembler { - fn new() -> Assembler { + pub fn new() -> Assembler { Assembler { insns: Vec::default(), live_ranges: Vec::default() @@ -348,7 +348,7 @@ impl Assembler } /// Add a comment at the current position - fn comment(&mut self, text: &str) + pub fn comment(&mut self, text: &str) { let insn = Insn { op: Op::Comment, @@ -363,7 +363,7 @@ impl Assembler } /// Add a label at the current position - fn label(&mut self, name: &str) -> Target + pub fn label(&mut self, name: &str) -> Target { let insn_idx = self.insns.len(); @@ -553,7 +553,7 @@ impl Assembler } // Optimize and compile the stored instructions - fn compile(self, cb: &mut CodeBlock) + pub fn compile(self, cb: &mut CodeBlock) { // NOTE: for arm we're going to want to split loads but also stores // This can be done in a platform-agnostic way, but the set of passes @@ -577,12 +577,12 @@ impl fmt::Debug for Assembler { impl Assembler { // Jump if not zero - fn jnz(&mut self, target: Target) + pub fn jnz(&mut self, target: Target) { self.push_insn(Op::Jnz, vec![], Some(target)); } - fn jbe(&mut self, target: Target) + pub fn jbe(&mut self, target: Target) { self.push_insn(Op::Jbe, vec![], Some(target)); } @@ -592,7 +592,7 @@ macro_rules! def_push_1_opnd { ($op_name:ident, $opcode:expr) => { impl Assembler { - fn $op_name(&mut self, opnd0: Opnd) -> Opnd + pub fn $op_name(&mut self, opnd0: Opnd) -> Opnd { self.push_insn($opcode, vec![opnd0], None) } @@ -604,7 +604,7 @@ macro_rules! def_push_2_opnd { ($op_name:ident, $opcode:expr) => { impl Assembler { - fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd + pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd { self.push_insn($opcode, vec![opnd0, opnd1], None) } @@ -616,7 +616,7 @@ macro_rules! def_push_2_opnd_no_out { ($op_name:ident, $opcode:expr) => { impl Assembler { - fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) + pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) { self.push_insn($opcode, vec![opnd0, opnd1], None); } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 67d3ecd5731af7..22e3c45438942f 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -901,6 +901,42 @@ fn gen_dup( KeepCompiling } + + + + + + +use crate::backend::ir::*; + +#[allow(dead_code)] +fn gen_dup_ir( + _jit: &mut JITState, + ctx: &mut Context, + cb: &mut CodeBlock, + _ocb: &mut OutlinedCb, +) -> CodegenStatus { + + let mut asm = Assembler::new(); + + let dup_val = ctx.ir_stack_pop(0); + let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); + + let loc0 = ctx.ir_stack_push_mapping((mapping, tmp_type)); + asm.mov(loc0, dup_val); + + asm.compile(cb); + + KeepCompiling +} + + + + + + + + // duplicate stack top n elements fn gen_dupn( jit: &mut JITState, From 1b2ee62149d5fa8d8cbe2097f9fd7a3af31989c2 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 19 May 2022 15:01:20 -0400 Subject: [PATCH 301/546] Implement target-specific insn splitting with Kevin. Add tests. --- yjit/src/backend/ir.rs | 61 ++++++++++++++++++++-------------- yjit/src/backend/x86_64/mod.rs | 46 ++++++++++++++++++++++++- yjit/src/codegen.rs | 3 -- 3 files changed, 81 insertions(+), 29 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index d26eb289c66d4b..e292160efc5319 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -306,7 +306,7 @@ pub struct Assembler /// Parallel vec with insns /// Index of the last insn using the output of this insn - live_ranges: Vec + pub(super) live_ranges: Vec } impl Assembler @@ -319,7 +319,7 @@ impl Assembler } /// Append an instruction to the list - fn push_insn(&mut self, op: Op, opnds: Vec, target: Option) -> Opnd + pub(super) fn push_insn(&mut self, op: Op, opnds: Vec, target: Option) -> Opnd { // If we find any InsnOut from previous instructions, we're going to // update the live range of the previous instruction to point to this @@ -382,7 +382,7 @@ impl Assembler } /// Transform input instructions, consumes the input assembler - fn transform_insns(mut self, mut map_insn: F) -> Assembler + pub(super) fn transform_insns(mut self, mut map_insn: F) -> Assembler where F: FnMut(&mut Assembler, usize, Op, Vec, Option) { let mut asm = Assembler::new(); @@ -430,7 +430,7 @@ impl Assembler /// Transforms the instructions by splitting instructions that cannot be /// represented in the final architecture into multiple instructions that /// can. - fn split_insns(self) -> Assembler + pub(super) fn split_loads(self) -> Assembler { self.transform_insns(|asm, _, op, opnds, target| { match op { @@ -458,7 +458,7 @@ impl Assembler /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. - fn alloc_regs(mut self, regs: Vec) -> Assembler + pub(super) fn alloc_regs(mut self, regs: Vec) -> Assembler { // First, create the pool of registers. let mut pool: u32 = 0; @@ -517,6 +517,8 @@ impl Assembler // If this instruction's first operand maps to a register and // this is the last use of the register, reuse the register // We do this to improve register allocation on x86 + // e.g. out = add(reg0, reg1) + // reg0 = add(reg0, reg1) if opnds.len() > 0 { if let Opnd::InsnOut(idx) = opnds[0] { if live_ranges[idx] == index { @@ -527,8 +529,8 @@ impl Assembler } } + // Allocate a new register for this instruction if out_reg == Opnd::None { - // Allocate a new register for this instruction out_reg = Opnd::Reg(alloc_reg(&mut pool, ®s)) } } @@ -552,19 +554,11 @@ impl Assembler asm } - // Optimize and compile the stored instructions + /// Compile the instructions down to machine code pub fn compile(self, cb: &mut CodeBlock) { - // NOTE: for arm we're going to want to split loads but also stores - // This can be done in a platform-agnostic way, but the set of passes - // we run will be slightly different. - - let scratch_regs = Self::get_scrach_regs(); - - dbg!(self - .split_insns() - .alloc_regs(scratch_regs)) - .target_emit(cb); + let scratch_regs = Self::get_scratch_regs(); + self.compile_with_regs(cb, scratch_regs); } } @@ -694,17 +688,17 @@ mod tests { } #[test] - fn test_split_insns() { + fn test_split_loads() { let mut asm = Assembler::new(); - let regs = Assembler::get_scrach_regs(); + let regs = Assembler::get_scratch_regs(); asm.add( Opnd::mem(64, Opnd::Reg(regs[0]), 0), Opnd::mem(64, Opnd::Reg(regs[1]), 0) ); - let result = asm.split_insns(); + let result = asm.split_loads(); assert_eq!(result.insns.len(), 2); assert_eq!(result.insns[0].op, Op::Load); } @@ -734,11 +728,11 @@ mod tests { asm.add(out3, Opnd::UImm(6)); // Here we're going to allocate the registers. - let result = asm.alloc_regs(Assembler::get_scrach_regs()); + let result = asm.alloc_regs(Assembler::get_scratch_regs()); // Now we're going to verify that the out field has been appropriately // updated for each of the instructions that needs it. - let regs = Assembler::get_scrach_regs(); + let regs = Assembler::get_scratch_regs(); assert_eq!(result.insns[0].out, Opnd::Reg(regs[0])); assert_eq!(result.insns[2].out, Opnd::Reg(regs[1])); assert_eq!(result.insns[5].out, Opnd::Reg(regs[0])); @@ -750,7 +744,7 @@ mod tests { { let mut asm = Assembler::new(); let mut cb = CodeBlock::new_dummy(1024); - let regs = Assembler::get_scrach_regs(); + let regs = Assembler::get_scratch_regs(); let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); asm.add(out, Opnd::UImm(2)); @@ -758,14 +752,31 @@ mod tests { asm.compile(&mut cb); } - // Test full codegen pipeline + // Test memory-to-memory move #[test] fn test_mov_mem2mem() { let mut asm = Assembler::new(); let mut cb = CodeBlock::new_dummy(1024); + let regs = Assembler::get_scratch_regs(); + asm.comment("check that comments work too"); asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); - asm.compile(&mut cb); + + asm.compile_with_regs(&mut cb, vec![regs[0]]); + } + + // Test load of register into new register + #[test] + fn test_load_reg() + { + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); + let regs = Assembler::get_scratch_regs(); + + let out = asm.load(SP); + asm.mov(Opnd::mem(64, SP, 0), out); + + asm.compile_with_regs(&mut cb, vec![regs[0]]); } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index ed68e13eb6fbc2..65259a72f6d649 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -45,7 +45,7 @@ impl From for X86Opnd { impl Assembler { // Get the list of registers from which we can allocate on this platform - pub fn get_scrach_regs() -> Vec + pub fn get_scratch_regs() -> Vec { vec![ RAX_REG, @@ -53,6 +53,40 @@ impl Assembler ] } + // Emit platform-specific machine code + fn target_split(mut self) -> Assembler + { + let live_ranges: Vec = std::mem::take(&mut self.live_ranges); + + self.transform_insns(|asm, index, op, opnds, target| { + match op { + Op::Add | Op::Sub | Op::And => { + match opnds.as_slice() { + // Instruction output whose live range spans beyond this instruction + [Opnd::InsnOut(out_idx), _] => { + if live_ranges[*out_idx] > index { + let opnd0 = asm.load(opnds[0]); + asm.push_insn(op, vec![opnd0, opnds[1]], None); + return; + } + }, + + [Opnd::Mem(_), _] => { + let opnd0 = asm.load(opnds[0]); + asm.push_insn(op, vec![opnd0, opnds[1]], None); + return; + }, + + _ => {} + } + }, + _ => {} + }; + + asm.push_insn(op, opnds, target); + }) + } + // Emit platform-specific machine code pub fn target_emit(&self, cb: &mut CodeBlock) { @@ -87,4 +121,14 @@ impl Assembler }; } } + + // Optimize and compile the stored instructions + pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) + { + dbg!(self + .target_split() + .split_loads() + .alloc_regs(regs)) + .target_emit(cb); + } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 22e3c45438942f..28a2e6ca34b34a 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -934,9 +934,6 @@ fn gen_dup_ir( - - - // duplicate stack top n elements fn gen_dupn( jit: &mut JITState, From 151cc55baa8d73969a57228d3a18458e5fe663ed Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 19 May 2022 16:17:09 -0400 Subject: [PATCH 302/546] Fix issue with load, gen_dup --- yjit/src/backend/x86_64/mod.rs | 17 ++++++++--------- yjit/src/codegen.rs | 16 +++++++--------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 65259a72f6d649..03da5e4d104c98 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -98,14 +98,13 @@ impl Assembler Op::Label => {}, - Op::Add => add(cb, insn.opnds[0].into(), insn.opnds[1].into()), - - /* - Load - Store, - */ + Op::Add => { + assert_eq!(insn.out, insn.opnds[0]); + add(cb, insn.opnds[0].into(), insn.opnds[1].into()) + }, - Op::Load => add(cb, insn.out.into(), insn.opnds[0].into()), + Op::Load => mov(cb, insn.out.into(), insn.opnds[0].into()), + //Store Op::Mov => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), // Test and set flags @@ -125,10 +124,10 @@ impl Assembler // Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) { - dbg!(self + self .target_split() .split_loads() - .alloc_regs(regs)) + .alloc_regs(regs) .target_emit(cb); } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 28a2e6ca34b34a..1799036e46d01a 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3,6 +3,7 @@ use crate::asm::x86_64::*; use crate::asm::*; +use crate::backend::ir::*; use crate::core::*; use crate::cruby::*; use crate::invariants::*; @@ -885,6 +886,10 @@ fn gen_pop( KeepCompiling } + + + +/* fn gen_dup( _jit: &mut JITState, ctx: &mut Context, @@ -900,17 +905,10 @@ fn gen_dup( KeepCompiling } - - - - - - - -use crate::backend::ir::*; +*/ #[allow(dead_code)] -fn gen_dup_ir( +fn gen_dup( _jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, From 872940e215dd571c45e9c30d96fa7b9f61dc0442 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 19 May 2022 16:51:47 -0400 Subject: [PATCH 303/546] Add test with register reuse --- yjit/src/backend/ir.rs | 16 ++++++++++++++++ yjit/src/backend/x86_64/mod.rs | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index e292160efc5319..22350ec506fa03 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -779,4 +779,20 @@ mod tests { asm.compile_with_regs(&mut cb, vec![regs[0]]); } + + // Multiple registers needed and register reuse + #[test] + fn test_reuse_reg() + { + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); + let regs = Assembler::get_scratch_regs(); + + let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); + let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); + let v2 = asm.add(v0, Opnd::UImm(1)); + asm.add(v0, v2); + + asm.compile_with_regs(&mut cb, vec![regs[0], regs[1]]); + } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 03da5e4d104c98..17d542c3ca9f1d 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -99,7 +99,8 @@ impl Assembler Op::Label => {}, Op::Add => { - assert_eq!(insn.out, insn.opnds[0]); + // FIXME: this fails because insn.out is none sometimes + //assert_eq!(insn.out, insn.opnds[0]); add(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, From 0032b02045af081df30f35b508b6b790e44fcdc2 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 24 May 2022 12:03:21 -0400 Subject: [PATCH 304/546] Add gen_dupn --- yjit/src/backend/ir.rs | 4 ++++ yjit/src/codegen.rs | 48 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 22350ec506fa03..932ba9f0bebfec 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -631,6 +631,10 @@ def_push_2_opnd_no_out!(test, Op::Test); // They are just wrappers to convert from X86Opnd into the IR Opnd type impl Context { + pub fn ir_stack_opnd(&mut self, idx: i32) -> Opnd { + self.stack_opnd(idx).into() + } + pub fn ir_stack_pop(&mut self, n: usize) -> Opnd { self.stack_pop(n).into() } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 1799036e46d01a..834b75192a7393 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -932,6 +932,7 @@ fn gen_dup( +/* // duplicate stack top n elements fn gen_dupn( jit: &mut JITState, @@ -963,6 +964,53 @@ fn gen_dupn( KeepCompiling } +*/ + + +// duplicate stack top n elements +fn gen_dupn( + jit: &mut JITState, + ctx: &mut Context, + cb: &mut CodeBlock, + _ocb: &mut OutlinedCb, +) -> CodegenStatus { + + let mut asm = Assembler::new(); + + let nval: VALUE = jit_get_arg(jit, 0); + let VALUE(n) = nval; + + // In practice, seems to be only used for n==2 + if n != 2 { + return CantCompile; + } + + let opnd1: Opnd = ctx.ir_stack_opnd(1); + let opnd0: Opnd = ctx.ir_stack_opnd(0); + + let mapping1 = ctx.get_opnd_mapping(StackOpnd(1)); + let mapping0 = ctx.get_opnd_mapping(StackOpnd(0)); + + let dst1: Opnd = ctx.ir_stack_push_mapping(mapping1); + asm.mov(dst1, opnd1); + + let dst0: Opnd = ctx.ir_stack_push_mapping(mapping0); + asm.mov(dst0, opnd0); + + asm.compile(cb); + + KeepCompiling +} + + + + + + + + + + // Swap top 2 stack entries fn gen_swap( From a88fc48b3a61b63aa1c2f4b05981e0d8726e2b9e Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 24 May 2022 15:44:46 -0400 Subject: [PATCH 305/546] Add CCall IR insn, implement gen_swap() --- yjit/src/backend/ir.rs | 58 +++++++++++++++++++++++++++++++--- yjit/src/backend/x86_64/mod.rs | 22 ++++++++++--- yjit/src/codegen.rs | 46 ++++++++++++++++----------- 3 files changed, 99 insertions(+), 27 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 932ba9f0bebfec..9ed8f34c3eb0ea 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -73,6 +73,9 @@ pub enum Op Jnz, Jbe, + // C function call with N arguments (variadic) + CCall, + /* // The following are conditional jump instructions. They all accept as their // first operand an EIR_LABEL_NAME, which is used as the target of the jump. @@ -269,9 +272,10 @@ impl From for Opnd { #[derive(Clone, PartialEq, Eq, Debug)] pub enum Target { - CodePtr(CodePtr), // Pointer to a piece of code (e.g. side-exit) + CodePtr(CodePtr), // Pointer to a piece of YJIT-generated code (e.g. side-exit) + FunPtr(*const u8), // Pointer to a C function LabelName(String), // A label without an index in the output - LabelIdx(usize), // A label that has been indexed + LabelIdx(usize), // A label that has been indexed } /// YJIT IR instruction @@ -466,16 +470,24 @@ impl Assembler // Mutate the pool bitmap to indicate that the register at that index // has been allocated and is live. fn alloc_reg(pool: &mut u32, regs: &Vec) -> Reg { - for index in 0..regs.len() { + for (index, reg) in regs.iter().enumerate() { if (*pool & (1 << index)) == 0 { *pool |= 1 << index; - return regs[index]; + return *reg; } } unreachable!("Register spill not supported"); } + // Allocate a specific register + fn take_reg(pool: &mut u32, regs: &Vec, reg: &Reg) -> Reg { + let reg_index = regs.iter().position(|elem| elem == reg).unwrap(); + assert_eq!(*pool & (1 << reg_index), 0); + *pool |= 1 << reg_index; + return regs[reg_index]; + } + // Mutate the pool bitmap to indicate that the given register is being // returned as it is no longer used by the instruction that previously // held it. @@ -510,10 +522,21 @@ impl Assembler } } + // C return values need to be mapped to the C return register + if op == Op::CCall { + assert_eq!(pool, 0, "register lives past C function call"); + } + // If this instruction is used by another instruction, // we need to allocate a register to it let mut out_reg = Opnd::None; if live_ranges[index] != index { + + // C return values need to be mapped to the C return register + if op == Op::CCall { + out_reg = Opnd::Reg(take_reg(&mut pool, ®s, &RET_REG)) + } + // If this instruction's first operand maps to a register and // this is the last use of the register, reuse the register // We do this to improve register allocation on x86 @@ -523,7 +546,7 @@ impl Assembler if let Opnd::InsnOut(idx) = opnds[0] { if live_ranges[idx] == index { if let Opnd::Reg(reg) = asm.insns[idx].out { - out_reg = Opnd::Reg(alloc_reg(&mut pool, &vec![reg])) + out_reg = Opnd::Reg(take_reg(&mut pool, ®s, ®)) } } } @@ -580,6 +603,12 @@ impl Assembler { self.push_insn(Op::Jbe, vec![], Some(target)); } + + pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd + { + let target = Target::FunPtr(fptr); + self.push_insn(Op::CCall, opnds, Some(target)) + } } macro_rules! def_push_1_opnd { @@ -799,4 +828,23 @@ mod tests { asm.compile_with_regs(&mut cb, vec![regs[0], regs[1]]); } + + #[test] + fn test_c_call() + { + extern "sysv64" fn dummy_c_fun(v0: usize, v1: usize) + { + } + + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); + let regs = Assembler::get_scratch_regs(); + + asm.ccall( + dummy_c_fun as *const u8, + vec![Opnd::mem(64, SP, 0), Opnd::UImm(1)] + ); + + asm.compile_with_regs(&mut cb, vec![regs[0], regs[1]]); + } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 17d542c3ca9f1d..f6a901f77dffe1 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -14,6 +14,9 @@ pub const CFP: Opnd = Opnd::Reg(R13_REG); pub const EC: Opnd = Opnd::Reg(R12_REG); pub const SP: Opnd = Opnd::Reg(RBX_REG); +// C return value register on this platform +pub const RET_REG: Reg = RAX_REG; + /// Map Opnd to X86Opnd impl From for X86Opnd { fn from(opnd: Opnd) -> Self { @@ -54,7 +57,7 @@ impl Assembler } // Emit platform-specific machine code - fn target_split(mut self) -> Assembler + fn x86_split(mut self) -> Assembler { let live_ranges: Vec = std::mem::take(&mut self.live_ranges); @@ -88,7 +91,7 @@ impl Assembler } // Emit platform-specific machine code - pub fn target_emit(&self, cb: &mut CodeBlock) + pub fn x86_emit(&self, cb: &mut CodeBlock) { // For each instruction for insn in &self.insns { @@ -117,6 +120,17 @@ impl Assembler Jbe, */ + // C function call + Op::CCall => { + // Temporary + assert!(insn.opnds.len() < C_ARG_REGS.len()); + + // For each operand + for (idx, opnd) in insn.opnds.iter().enumerate() { + mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); + } + }, + _ => panic!("unsupported instruction passed to x86 backend") }; } @@ -126,9 +140,9 @@ impl Assembler pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) { self - .target_split() + .x86_split() .split_loads() .alloc_regs(regs) - .target_emit(cb); + .x86_emit(cb); } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 834b75192a7393..f9b3d513dc3a37 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -931,7 +931,6 @@ fn gen_dup( - /* // duplicate stack top n elements fn gen_dupn( @@ -966,7 +965,6 @@ fn gen_dupn( } */ - // duplicate stack top n elements fn gen_dupn( jit: &mut JITState, @@ -1002,16 +1000,6 @@ fn gen_dupn( KeepCompiling } - - - - - - - - - - // Swap top 2 stack entries fn gen_swap( _jit: &mut JITState, @@ -1031,19 +1019,23 @@ fn stack_swap( _reg0: X86Opnd, _reg1: X86Opnd, ) { - let opnd0 = ctx.stack_opnd(offset0 as i32); - let opnd1 = ctx.stack_opnd(offset1 as i32); + let mut asm = Assembler::new(); + + let stack0_mem = ctx.ir_stack_opnd(offset0 as i32); + let stack1_mem = ctx.ir_stack_opnd(offset1 as i32); let mapping0 = ctx.get_opnd_mapping(StackOpnd(offset0)); let mapping1 = ctx.get_opnd_mapping(StackOpnd(offset1)); - mov(cb, REG0, opnd0); - mov(cb, REG1, opnd1); - mov(cb, opnd0, REG1); - mov(cb, opnd1, REG0); + let stack0_reg = asm.load(stack0_mem); + let stack1_reg = asm.load(stack1_mem); + asm.mov(stack0_mem, stack1_reg); + asm.mov(stack1_mem, stack0_reg); ctx.set_opnd_mapping(StackOpnd(offset0), mapping1); ctx.set_opnd_mapping(StackOpnd(offset1), mapping0); + + asm.compile(cb); } fn gen_putnil( @@ -1277,6 +1269,7 @@ fn gen_newarray( KeepCompiling } + // dup array fn gen_duparray( jit: &mut JITState, @@ -1299,6 +1292,23 @@ fn gen_duparray( KeepCompiling } + + + +/* +let mut asm = Assembler::new(); + +//asm.ccall(rb_ary_resurrect as *const u8, vec![ary]); + +asm.compile(cb); +*/ + + + + + + + // dup hash fn gen_duphash( jit: &mut JITState, From efb45acb2932dd8ebd60853584370ca75653cdf8 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 24 May 2022 16:30:18 -0400 Subject: [PATCH 306/546] Load GC Value operands into registers --- yjit/src/backend/ir.rs | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 9ed8f34c3eb0ea..63a04789841631 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -219,7 +219,7 @@ pub enum Opnd impl Opnd { - // Convenience constructor for memory operands + /// Convenience constructor for memory operands pub fn mem(num_bits: u8, base: Opnd, disp: i32) -> Self { match base { Opnd::Reg(base_reg) => { @@ -233,6 +233,11 @@ impl Opnd _ => unreachable!() } } + + /// Constant pointer operand + pub fn const_ptr(ptr: *const u8) -> Self { + Opnd::UImm(ptr as u64) + } } /// NOTE: this is useful during the port but can probably be removed once @@ -436,7 +441,32 @@ impl Assembler /// can. pub(super) fn split_loads(self) -> Assembler { + // Load operands that are GC values into a register + fn load_gc_opnds(op: Op, opnds: Vec, asm: &mut Assembler) -> Vec + { + if op == Op::Load || op == Op::Mov { + return opnds; + } + + fn map_opnd(opnd: Opnd, asm: &mut Assembler) -> Opnd { + if let Opnd::Value(val) = opnd { + // If this is a heap object, load it into a register + if !val.special_const_p() { + asm.load(opnd); + } + } + + opnd + } + + opnds.into_iter().map(|opnd| map_opnd(opnd, asm)).collect() + } + self.transform_insns(|asm, _, op, opnds, target| { + // Load heap object operands into registers because most + // instructions can't directly work with 64-bit constants + let opnds = load_gc_opnds(op, opnds, asm); + match op { // Check for Add, Sub, And, Mov, with two memory operands. // Load one operand into memory. From 7c83a904a49a8ba3a1b78474a6d51a7a32178a4a Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 25 May 2022 15:05:54 -0400 Subject: [PATCH 307/546] Implement gc offset logic --- yjit/src/asm/x86_64/mod.rs | 2 +- yjit/src/backend/ir.rs | 68 ++++++++++++++++++++++------------ yjit/src/backend/x86_64/mod.rs | 30 +++++++++++---- yjit/src/codegen.rs | 62 +++++++++++++++++++++++++------ yjit/src/core.rs | 14 +++---- 5 files changed, 126 insertions(+), 50 deletions(-) diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 1f3dfd2e249a26..1ada5ffbb7a944 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -214,7 +214,7 @@ pub const AX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType: pub const CX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 1 }); pub const DX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 2 }); pub const BX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 3 }); -pub const SP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 4 }); +//pub const SP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 4 }); pub const BP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 5 }); pub const SI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 6 }); pub const DI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 7 }); diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 63a04789841631..a578564afb28fa 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -9,6 +9,7 @@ use crate::virtualmem::{CodePtr}; use crate::asm::{CodeBlock}; use crate::asm::x86_64::{X86Opnd, X86Imm, X86UImm, X86Reg, X86Mem, RegType}; use crate::core::{Context, Type, TempMapping}; +use crate::codegen::{JITState}; #[cfg(target_arch = "x86_64")] use crate::backend::x86_64::*; @@ -16,6 +17,10 @@ use crate::backend::x86_64::*; //#[cfg(target_arch = "aarch64")] //use crate::backend:aarch64::* +pub const EC: Opnd = _EC; +pub const CFP: Opnd = _CFP; +pub const SP: Opnd = _SP; + /// Instruction opcodes #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum Op @@ -63,6 +68,9 @@ pub enum Op // A low-level mov instruction. It accepts two operands. Mov, + // Load effective address + Lea, + // Bitwise AND test instruction Test, @@ -315,7 +323,7 @@ pub struct Assembler /// Parallel vec with insns /// Index of the last insn using the output of this insn - pub(super) live_ranges: Vec + pub(super) live_ranges: Vec, } impl Assembler @@ -323,7 +331,7 @@ impl Assembler pub fn new() -> Assembler { Assembler { insns: Vec::default(), - live_ranges: Vec::default() + live_ranges: Vec::default(), } } @@ -608,10 +616,10 @@ impl Assembler } /// Compile the instructions down to machine code - pub fn compile(self, cb: &mut CodeBlock) + pub fn compile(self, jit: &mut JITState, cb: &mut CodeBlock) { let scratch_regs = Self::get_scratch_regs(); - self.compile_with_regs(cb, scratch_regs); + self.compile_with_regs(jit, cb, scratch_regs); } } @@ -682,6 +690,7 @@ def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); def_push_1_opnd!(load, Op::Load); def_push_2_opnd_no_out!(mov, Op::Mov); +def_push_2_opnd_no_out!(lea, Op::Lea); def_push_2_opnd_no_out!(cmp, Op::Cmp); def_push_2_opnd_no_out!(test, Op::Test); @@ -690,6 +699,10 @@ def_push_2_opnd_no_out!(test, Op::Test); // They are just wrappers to convert from X86Opnd into the IR Opnd type impl Context { + pub fn ir_sp_opnd(&mut self, idx: isize) -> Opnd { + self.sp_opnd(idx).into() + } + pub fn ir_stack_opnd(&mut self, idx: i32) -> Opnd { self.stack_opnd(idx).into() } @@ -801,62 +814,71 @@ mod tests { assert_eq!(result.insns[5].out, Opnd::Reg(regs[0])); } + fn setup_asm(num_regs: usize) -> (Assembler, JITState, CodeBlock, Vec) { + let blockid = BlockId { + iseq: std::ptr::null(), + idx: 0, + }; + let block = Block::new(blockid, &Context::default()); + + let mut regs = Assembler::get_scratch_regs(); + + return ( + Assembler::new(), + JITState::new(&block), + CodeBlock::new_dummy(1024), + regs.drain(0..num_regs).collect() + ); + } + // Test full codegen pipeline #[test] fn test_compile() { - let mut asm = Assembler::new(); - let mut cb = CodeBlock::new_dummy(1024); - let regs = Assembler::get_scratch_regs(); + let (mut asm, mut jit, mut cb, regs) = setup_asm(1); let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); asm.add(out, Opnd::UImm(2)); - asm.compile(&mut cb); + asm.compile(&mut jit, &mut cb); } // Test memory-to-memory move #[test] fn test_mov_mem2mem() { - let mut asm = Assembler::new(); - let mut cb = CodeBlock::new_dummy(1024); - let regs = Assembler::get_scratch_regs(); + let (mut asm, mut jit, mut cb, regs) = setup_asm(1); asm.comment("check that comments work too"); asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); - asm.compile_with_regs(&mut cb, vec![regs[0]]); + asm.compile_with_regs(&mut jit, &mut cb, regs); } // Test load of register into new register #[test] fn test_load_reg() { - let mut asm = Assembler::new(); - let mut cb = CodeBlock::new_dummy(1024); - let regs = Assembler::get_scratch_regs(); + let (mut asm, mut jit, mut cb, regs) = setup_asm(1); let out = asm.load(SP); asm.mov(Opnd::mem(64, SP, 0), out); - asm.compile_with_regs(&mut cb, vec![regs[0]]); + asm.compile_with_regs(&mut jit, &mut cb, regs); } // Multiple registers needed and register reuse #[test] fn test_reuse_reg() { - let mut asm = Assembler::new(); - let mut cb = CodeBlock::new_dummy(1024); - let regs = Assembler::get_scratch_regs(); + let (mut asm, mut jit, mut cb, regs) = setup_asm(2); let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); let v2 = asm.add(v0, Opnd::UImm(1)); asm.add(v0, v2); - asm.compile_with_regs(&mut cb, vec![regs[0], regs[1]]); + asm.compile_with_regs(&mut jit, &mut cb, regs); } #[test] @@ -866,15 +888,13 @@ mod tests { { } - let mut asm = Assembler::new(); - let mut cb = CodeBlock::new_dummy(1024); - let regs = Assembler::get_scratch_regs(); + let (mut asm, mut jit, mut cb, regs) = setup_asm(2); asm.ccall( dummy_c_fun as *const u8, vec![Opnd::mem(64, SP, 0), Opnd::UImm(1)] ); - asm.compile_with_regs(&mut cb, vec![regs[0], regs[1]]); + asm.compile_with_regs(&mut jit, &mut cb, regs); } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index f6a901f77dffe1..f6ebcc564325fc 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -4,15 +4,17 @@ use crate::asm::{CodeBlock}; use crate::asm::x86_64::*; +use crate::codegen::{JITState}; +use crate::cruby::*; use crate::backend::ir::*; // Use the x86 register type for this platform pub type Reg = X86Reg; // Callee-saved registers -pub const CFP: Opnd = Opnd::Reg(R13_REG); -pub const EC: Opnd = Opnd::Reg(R12_REG); -pub const SP: Opnd = Opnd::Reg(RBX_REG); +pub const _CFP: Opnd = Opnd::Reg(R13_REG); +pub const _EC: Opnd = Opnd::Reg(R12_REG); +pub const _SP: Opnd = Opnd::Reg(RBX_REG); // C return value register on this platform pub const RET_REG: Reg = RAX_REG; @@ -91,7 +93,7 @@ impl Assembler } // Emit platform-specific machine code - pub fn x86_emit(&self, cb: &mut CodeBlock) + pub fn x86_emit(&mut self, jit: &mut JITState, cb: &mut CodeBlock) { // For each instruction for insn in &self.insns { @@ -107,8 +109,22 @@ impl Assembler add(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, - Op::Load => mov(cb, insn.out.into(), insn.opnds[0].into()), + //TODO: //Store + + Op::Load => { + mov(cb, insn.out.into(), insn.opnds[0].into()); + + // If the value being loaded is a heapp object + if let Opnd::Value(val) = insn.opnds[0] { + if !val.special_const_p() { + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + jit.add_gc_obj_offset(ptr_offset); + } + } + }, + Op::Mov => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), // Test and set flags @@ -137,12 +153,12 @@ impl Assembler } // Optimize and compile the stored instructions - pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) + pub fn compile_with_regs(self, jit: &mut JITState, cb: &mut CodeBlock, regs: Vec) { self .x86_split() .split_loads() .alloc_regs(regs) - .x86_emit(cb); + .x86_emit(jit, cb); } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index f9b3d513dc3a37..a94eeb62ca281d 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -118,10 +118,9 @@ impl JITState { self.opcode } - pub fn add_gc_object_offset(self: &mut JITState, ptr_offset: u32) { + pub fn add_gc_obj_offset(self: &mut JITState, ptr_offset: u32) { let mut gc_obj_vec: RefMut<_> = self.block.borrow_mut(); - gc_obj_vec.add_gc_object_offset(ptr_offset); - + gc_obj_vec.add_gc_obj_offset(ptr_offset); incr_counter!(num_gc_obj_refs); } @@ -161,7 +160,7 @@ pub fn jit_mov_gc_ptr(jit: &mut JITState, cb: &mut CodeBlock, reg: X86Opnd, ptr: let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); if !ptr.special_const_p() { - jit.add_gc_object_offset(ptr_offset); + jit.add_gc_obj_offset(ptr_offset); } } @@ -292,6 +291,18 @@ fn jit_save_pc(jit: &JITState, cb: &mut CodeBlock, scratch_reg: X86Opnd) { mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), scratch_reg); } +// Save the incremented PC on the CFP +// This is necessary when callees can raise or allocate +fn ir_jit_save_pc(jit: &JITState, asm: &mut Assembler) { + let pc: *mut VALUE = jit.get_pc(); + let ptr: *mut VALUE = unsafe { + let cur_insn_len = insn_len(jit.get_opcode()) as isize; + pc.offset(cur_insn_len) + }; + + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(ptr as *const u8)); +} + /// Save the current SP on the CFP /// This realigns the interpreter SP with the JIT SP /// Note: this will change the current value of REG_SP, @@ -306,6 +317,25 @@ fn gen_save_sp(cb: &mut CodeBlock, ctx: &mut Context) { } } +/// Save the current SP on the CFP +/// This realigns the interpreter SP with the JIT SP +/// Note: this will change the current value of REG_SP, +/// which could invalidate memory operands +fn ir_gen_save_sp(jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { + if ctx.get_sp_offset() != 0 { + let stack_pointer = ctx.ir_sp_opnd(0); + asm.lea(SP, stack_pointer); + let cfp_sp_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); + asm.mov(cfp_sp_opnd, SP); + ctx.set_sp_offset(0); + } +} + + + + + + /// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that /// could: /// - Perform GC allocation @@ -326,6 +356,15 @@ fn jit_prepare_routine_call( ctx.clear_local_types(); } + + + + + + + + + /// Record the current codeblock write position for rewriting into a jump into /// the outlined block later. Used to implement global code invalidation. fn record_global_inval_patch(cb: &mut CodeBlock, outline_block_target_pos: CodePtr) { @@ -909,7 +948,7 @@ fn gen_dup( #[allow(dead_code)] fn gen_dup( - _jit: &mut JITState, + jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, _ocb: &mut OutlinedCb, @@ -923,7 +962,7 @@ fn gen_dup( let loc0 = ctx.ir_stack_push_mapping((mapping, tmp_type)); asm.mov(loc0, dup_val); - asm.compile(cb); + asm.compile(jit, cb); KeepCompiling } @@ -995,23 +1034,24 @@ fn gen_dupn( let dst0: Opnd = ctx.ir_stack_push_mapping(mapping0); asm.mov(dst0, opnd0); - asm.compile(cb); + asm.compile(jit, cb); KeepCompiling } // Swap top 2 stack entries fn gen_swap( - _jit: &mut JITState, + jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - stack_swap(ctx, cb, 0, 1, REG0, REG1); + stack_swap(jit, ctx, cb, 0, 1, REG0, REG1); KeepCompiling } fn stack_swap( + jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, offset0: u16, @@ -1035,7 +1075,7 @@ fn stack_swap( ctx.set_opnd_mapping(StackOpnd(offset0), mapping1); ctx.set_opnd_mapping(StackOpnd(offset1), mapping0); - asm.compile(cb); + asm.compile(jit, cb); } fn gen_putnil( @@ -4621,7 +4661,7 @@ fn gen_send_iseq( let offset1: u16 = (argc - 1 - kwarg_idx_i32 - args_before_kw) .try_into() .unwrap(); - stack_swap(ctx, cb, offset0, offset1, REG1, REG0); + stack_swap(jit, ctx, cb, offset0, offset1, REG1, REG0); // Next we're going to do some bookkeeping on our end so // that we know the order that the arguments are diff --git a/yjit/src/core.rs b/yjit/src/core.rs index cb026f6a3b96b2..68cc5e799ffcb2 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -390,7 +390,7 @@ pub struct Block { // FIXME: should these be code pointers instead? // Offsets for GC managed objects in the mainline code block - gc_object_offsets: Vec, + gc_obj_offsets: Vec, // CME dependencies of this block, to help to remove all pointers to this // block in the system. @@ -582,7 +582,7 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { } // Walk over references to objects in generated code. - for offset in &block.gc_object_offsets { + for offset in &block.gc_obj_offsets { let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); // Creating an unaligned pointer is well defined unlike in C. let value_address = value_address as *const VALUE; @@ -640,7 +640,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { } // Walk over references to objects in generated code. - for offset in &block.gc_object_offsets { + for offset in &block.gc_obj_offsets { let offset_to_value = offset.as_usize(); let value_code_ptr = cb.get_ptr(offset_to_value); let value_ptr: *const u8 = value_code_ptr.raw_ptr(); @@ -808,7 +808,7 @@ fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) { } // Run write barriers for all objects in generated code. - for offset in &block.gc_object_offsets { + for offset in &block.gc_obj_offsets { let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); // Creating an unaligned pointer is well defined unlike in C. let value_address: *const VALUE = value_address.cast(); @@ -844,7 +844,7 @@ impl Block { end_addr: None, incoming: Vec::new(), outgoing: Vec::new(), - gc_object_offsets: Vec::new(), + gc_obj_offsets: Vec::new(), cme_dependencies: Vec::new(), entry_exit: None, }; @@ -905,8 +905,8 @@ impl Block { self.end_idx = end_idx; } - pub fn add_gc_object_offset(self: &mut Block, ptr_offset: u32) { - self.gc_object_offsets.push(ptr_offset); + pub fn add_gc_obj_offset(self: &mut Block, ptr_offset: u32) { + self.gc_obj_offsets.push(ptr_offset); } /// Instantiate a new CmeDependency struct and add it to the list of From 04e2ccede4e992a6e0d18ed506d76625ee7da8a3 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 6 Jun 2022 15:54:22 -0400 Subject: [PATCH 308/546] Change codegen.rs to use backend Assembler directly --- yjit/src/backend/ir.rs | 17 +++++ yjit/src/backend/x86_64/mod.rs | 6 +- yjit/src/codegen.rs | 126 +++++++++------------------------ yjit/src/lib.rs | 7 ++ 4 files changed, 62 insertions(+), 94 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index a578564afb28fa..fa8a7b8e2b3f4e 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -84,6 +84,9 @@ pub enum Op // C function call with N arguments (variadic) CCall, + // C function return + CRet, + /* // The following are conditional jump instructions. They all accept as their // first operand an EIR_LABEL_NAME, which is used as the target of the jump. @@ -661,6 +664,18 @@ macro_rules! def_push_1_opnd { }; } +macro_rules! def_push_1_opnd_no_out { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + pub fn $op_name(&mut self, opnd0: Opnd) + { + self.push_insn($opcode, vec![opnd0], None); + } + } + }; +} + macro_rules! def_push_2_opnd { ($op_name:ident, $opcode:expr) => { impl Assembler @@ -688,7 +703,9 @@ macro_rules! def_push_2_opnd_no_out { def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); +def_push_1_opnd_no_out!(cret, Op::CRet); def_push_1_opnd!(load, Op::Load); +def_push_2_opnd_no_out!(store, Op::Store); def_push_2_opnd_no_out!(mov, Op::Mov); def_push_2_opnd_no_out!(lea, Op::Lea); def_push_2_opnd_no_out!(cmp, Op::Cmp); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index f6ebcc564325fc..eb54ced2bfd3f4 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -109,8 +109,7 @@ impl Assembler add(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, - //TODO: - //Store + Op::Store => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), Op::Load => { mov(cb, insn.out.into(), insn.opnds[0].into()); @@ -127,6 +126,9 @@ impl Assembler Op::Mov => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), + // Load effective address + Op::Lea => lea(cb, insn.opnds[0].into(), insn.opnds[1].into()), + // Test and set flags Op::Test => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index a94eeb62ca281d..955d87eb68bdf1 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -54,7 +54,7 @@ enum CodegenStatus { type InsnGenFn = fn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus; @@ -778,6 +778,9 @@ pub fn gen_single_block( // Mark the start position of the block blockref.borrow_mut().set_start_addr(cb.get_write_ptr()); + // Create a backend assembler instance + let mut asm = Assembler::new(); + // For each instruction to compile // NOTE: could rewrite this loop with a std::iter::Iterator while insn_idx < iseq_size { @@ -832,7 +835,7 @@ pub fn gen_single_block( } // Call the code generation function - status = gen_fn(&mut jit, &mut ctx, cb, ocb); + status = gen_fn(&mut jit, &mut ctx, &mut asm, ocb); } // If we can't compile this instruction @@ -869,6 +872,9 @@ pub fn gen_single_block( // Finish filling out the block { + // Compile code into the code block + asm.compile(&mut jit, cb); + let mut block = jit.block.borrow_mut(); // Mark the end position of the block @@ -887,19 +893,6 @@ pub fn gen_single_block( return Err(()); } - // TODO: we may want a feature for this called dump_insns? Can leave commented for now - /* - if (YJIT_DUMP_MODE >= 2) { - // Dump list of compiled instrutions - fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq); - for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) { - int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx)); - fprintf(stderr, " %04d %s\n", idx, insn_name(opcode)); - idx += insn_len(opcode); - } - } - */ - // Block compiled successfully Ok(blockref) } @@ -907,7 +900,7 @@ pub fn gen_single_block( fn gen_nop( _jit: &mut JITState, _ctx: &mut Context, - _cb: &mut CodeBlock, + _asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Do nothing @@ -917,7 +910,7 @@ fn gen_nop( fn gen_pop( _jit: &mut JITState, ctx: &mut Context, - _cb: &mut CodeBlock, + _asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Decrement SP @@ -925,90 +918,27 @@ fn gen_pop( KeepCompiling } - - - -/* -fn gen_dup( - _jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let dup_val = ctx.stack_pop(0); - let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); - - let loc0 = ctx.stack_push_mapping((mapping, tmp_type)); - mov(cb, REG0, dup_val); - mov(cb, loc0, REG0); - - KeepCompiling -} -*/ - -#[allow(dead_code)] fn gen_dup( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let mut asm = Assembler::new(); - let dup_val = ctx.ir_stack_pop(0); let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); let loc0 = ctx.ir_stack_push_mapping((mapping, tmp_type)); asm.mov(loc0, dup_val); - asm.compile(jit, cb); - - KeepCompiling -} - - - - -/* -// duplicate stack top n elements -fn gen_dupn( - jit: &mut JITState, - ctx: &mut Context, - cb: &mut CodeBlock, - _ocb: &mut OutlinedCb, -) -> CodegenStatus { - let nval: VALUE = jit_get_arg(jit, 0); - let VALUE(n) = nval; - - // In practice, seems to be only used for n==2 - if n != 2 { - return CantCompile; - } - - let opnd1: X86Opnd = ctx.stack_opnd(1); - let opnd0: X86Opnd = ctx.stack_opnd(0); - - let mapping1 = ctx.get_opnd_mapping(StackOpnd(1)); - let mapping0 = ctx.get_opnd_mapping(StackOpnd(0)); - - let dst1: X86Opnd = ctx.stack_push_mapping(mapping1); - mov(cb, REG0, opnd1); - mov(cb, dst1, REG0); - - let dst0: X86Opnd = ctx.stack_push_mapping(mapping0); - mov(cb, REG0, opnd0); - mov(cb, dst0, REG0); - KeepCompiling } -*/ // duplicate stack top n elements fn gen_dupn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { @@ -1034,8 +964,6 @@ fn gen_dupn( let dst0: Opnd = ctx.ir_stack_push_mapping(mapping0); asm.mov(dst0, opnd0); - asm.compile(jit, cb); - KeepCompiling } @@ -1043,24 +971,22 @@ fn gen_dupn( fn gen_swap( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - stack_swap(jit, ctx, cb, 0, 1, REG0, REG1); + stack_swap(jit, ctx, asm, 0, 1, REG0, REG1); KeepCompiling } fn stack_swap( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, offset0: u16, offset1: u16, _reg0: X86Opnd, _reg1: X86Opnd, ) { - let mut asm = Assembler::new(); - let stack0_mem = ctx.ir_stack_opnd(offset0 as i32); let stack1_mem = ctx.ir_stack_opnd(offset1 as i32); @@ -1074,10 +1000,18 @@ fn stack_swap( ctx.set_opnd_mapping(StackOpnd(offset0), mapping1); ctx.set_opnd_mapping(StackOpnd(offset1), mapping0); - - asm.compile(jit, cb); } + + + + + + + + + +/* fn gen_putnil( jit: &mut JITState, ctx: &mut Context, @@ -6069,6 +6003,7 @@ fn gen_opt_invokebuiltin_delegate( KeepCompiling } +*/ /// Maps a YARV opcode to a code generation function (if supported) fn get_gen_fn(opcode: VALUE) -> Option { @@ -6078,10 +6013,12 @@ fn get_gen_fn(opcode: VALUE) -> Option { match opcode { YARVINSN_nop => Some(gen_nop), - YARVINSN_pop => Some(gen_pop), + //YARVINSN_pop => Some(gen_pop), YARVINSN_dup => Some(gen_dup), YARVINSN_dupn => Some(gen_dupn), YARVINSN_swap => Some(gen_swap), + + /* YARVINSN_putnil => Some(gen_putnil), YARVINSN_putobject => Some(gen_putobject), YARVINSN_putobject_INT2FIX_0_ => Some(gen_putobject_int2fix), @@ -6163,6 +6100,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_getspecial => Some(gen_getspecial), YARVINSN_getclassvariable => Some(gen_getclassvariable), YARVINSN_setclassvariable => Some(gen_setclassvariable), + */ // Unimplemented opcode, YJIT won't generate code for this yet _ => None, @@ -6350,6 +6288,7 @@ impl CodegenGlobals { /// Register codegen functions for some Ruby core methods fn reg_method_codegen_fns(&mut self) { + /* unsafe { // Specialization for C methods. See yjit_reg_method() for details. self.yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not); @@ -6378,6 +6317,7 @@ impl CodegenGlobals { jit_thread_s_current, ); } + */ } /// Get a mutable reference to the codegen globals instance @@ -6442,6 +6382,7 @@ impl CodegenGlobals { } } +/* #[cfg(test)] mod tests { use super::*; @@ -6704,3 +6645,4 @@ mod tests { gen_leave(&mut jit, &mut context, &mut cb, &mut ocb); } } +*/ \ No newline at end of file diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index 752b7872c15315..e92186da7de5c4 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -3,6 +3,13 @@ #![allow(clippy::too_many_arguments)] // :shrug: #![allow(clippy::identity_op)] // Sometimes we do it for style + +// Temporary while switching to the new backend +#![allow(dead_code)] +#![allow(unused)] + + + mod asm; mod backend; mod codegen; From 39dd8b2dfbb50aab7731466b57c39eaf96e66996 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 6 Jun 2022 17:57:07 -0400 Subject: [PATCH 309/546] Add test for lea and ret. Fix codegen for lea and ret. --- yjit/src/backend/ir.rs | 19 +++++++++++++++---- yjit/src/backend/x86_64/mod.rs | 13 +++++++++++-- yjit/src/codegen.rs | 3 ++- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index fa8a7b8e2b3f4e..f6a02909d982cb 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -65,12 +65,12 @@ pub enum Op // Low-level instruction to store a value to memory. Store, - // A low-level mov instruction. It accepts two operands. - Mov, - // Load effective address Lea, + // A low-level mov instruction. It accepts two operands. + Mov, + // Bitwise AND test instruction Test, @@ -705,9 +705,9 @@ def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); def_push_1_opnd_no_out!(cret, Op::CRet); def_push_1_opnd!(load, Op::Load); +def_push_1_opnd!(lea, Op::Lea); def_push_2_opnd_no_out!(store, Op::Store); def_push_2_opnd_no_out!(mov, Op::Mov); -def_push_2_opnd_no_out!(lea, Op::Lea); def_push_2_opnd_no_out!(cmp, Op::Cmp); def_push_2_opnd_no_out!(test, Op::Test); @@ -914,4 +914,15 @@ mod tests { asm.compile_with_regs(&mut jit, &mut cb, regs); } + + #[test] + fn test_lea_ret() + { + let (mut asm, mut jit, mut cb, regs) = setup_asm(1); + + let addr = asm.lea(Opnd::mem(64, SP, 0)); + asm.cret(addr); + + asm.compile_with_regs(&mut jit, &mut cb, regs); + } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index eb54ced2bfd3f4..4aa04b29aa2b37 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -127,7 +127,7 @@ impl Assembler Op::Mov => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), // Load effective address - Op::Lea => lea(cb, insn.opnds[0].into(), insn.opnds[1].into()), + Op::Lea => lea(cb, insn.out.into(), insn.opnds[0].into()), // Test and set flags Op::Test => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), @@ -149,7 +149,16 @@ impl Assembler } }, - _ => panic!("unsupported instruction passed to x86 backend") + Op::CRet => { + // TODO: bias allocation towards return register + if insn.opnds[0] != Opnd::Reg(RET_REG) { + mov(cb, RAX, insn.opnds[0].into()); + } + + ret(cb); + } + + _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn.op) }; } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 955d87eb68bdf1..28cf05e95c6e24 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -324,7 +324,8 @@ fn gen_save_sp(cb: &mut CodeBlock, ctx: &mut Context) { fn ir_gen_save_sp(jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { if ctx.get_sp_offset() != 0 { let stack_pointer = ctx.ir_sp_opnd(0); - asm.lea(SP, stack_pointer); + let sp_addr = asm.lea(stack_pointer); + asm.mov(SP, sp_addr); let cfp_sp_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); asm.mov(cfp_sp_opnd, SP); ctx.set_sp_offset(0); From a1b8c947380716a5ffca2b1888a6310e8132b00c Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 15 Jun 2022 13:10:13 -0400 Subject: [PATCH 310/546] * Arm64 Beginnings (https://github.com/Shopify/ruby/pull/291) * Initial setup for aarch64 * ADDS and SUBS * ADD and SUB for immediates * Revert moved code * Documentation * Rename Arm64* to A64* * Comments on shift types * Share sig_imm_size and unsig_imm_size --- yjit/src/asm/arm64/README.md | 18 ++ .../arm64/inst/data_processing_immediate.rs | 198 +++++++++++++++++ .../arm64/inst/data_processing_register.rs | 202 ++++++++++++++++++ yjit/src/asm/arm64/inst/family.rs | 34 +++ yjit/src/asm/arm64/inst/mod.rs | 32 +++ yjit/src/asm/arm64/inst/sf.rs | 19 ++ yjit/src/asm/arm64/mod.rs | 4 + yjit/src/asm/arm64/opnd.rs | 159 ++++++++++++++ yjit/src/asm/mod.rs | 73 +++++++ yjit/src/asm/x86_64/mod.rs | 46 +--- yjit/src/asm/x86_64/tests.rs | 12 -- yjit/src/backend/arm64/mod.rs | 63 ++++++ yjit/src/backend/ir.rs | 4 +- 13 files changed, 810 insertions(+), 54 deletions(-) create mode 100644 yjit/src/asm/arm64/README.md create mode 100644 yjit/src/asm/arm64/inst/data_processing_immediate.rs create mode 100644 yjit/src/asm/arm64/inst/data_processing_register.rs create mode 100644 yjit/src/asm/arm64/inst/family.rs create mode 100644 yjit/src/asm/arm64/inst/mod.rs create mode 100644 yjit/src/asm/arm64/inst/sf.rs create mode 100644 yjit/src/asm/arm64/mod.rs create mode 100644 yjit/src/asm/arm64/opnd.rs create mode 100644 yjit/src/backend/arm64/mod.rs diff --git a/yjit/src/asm/arm64/README.md b/yjit/src/asm/arm64/README.md new file mode 100644 index 00000000000000..3d0ec57d340080 --- /dev/null +++ b/yjit/src/asm/arm64/README.md @@ -0,0 +1,18 @@ +# Arm64 + +This module is responsible for encoding YJIT operands into an appropriate Arm64 encoding. + +## Architecture + +Every instruction in the Arm64 instruction set is 32 bits wide and is represented in little-endian order. Because they're all going to the same size, we represent each instruction by a struct that implements `From for u32`, which contains the mechanism for encoding each instruction. + +Generally each set of instructions falls under a certain family (like data processing -- register). These instructions are encoded similarly, so we group them into their own submodules. The encoding for each type is shown in the documentation for the struct that ends up being created. + +In general each set of bytes inside of the struct has either a direct value (usually a `u8`/`u16`) or some kind of `enum` that can be converted directly into a `u32`. + +## Helpful links + +* [Arm A64 Instruction Set Architecture](https://developer.arm.com/documentation/ddi0596/2021-12?lang=en) Official documentation +* [armconverter.com](https://armconverter.com/) A website that encodes Arm assembly syntax +* [hatstone](https://github.com/tenderlove/hatstone) A wrapper around the Capstone disassembler written in Ruby +* [onlinedisassembler.com](https://onlinedisassembler.com/odaweb/) A web-based disassembler diff --git a/yjit/src/asm/arm64/inst/data_processing_immediate.rs b/yjit/src/asm/arm64/inst/data_processing_immediate.rs new file mode 100644 index 00000000000000..12498848b284bd --- /dev/null +++ b/yjit/src/asm/arm64/inst/data_processing_immediate.rs @@ -0,0 +1,198 @@ +use super::{ + super::opnd::*, + family::Family, + sf::Sf +}; + +/// The operation being performed by this instruction. +enum Op { + Add = 0b0, + Sub = 0b1 +} + +// Whether or not to update the flags when this instruction is performed. +enum S { + LeaveFlags = 0b0, + UpdateFlags = 0b1 +} + +/// How much to shift the immediate by. +enum Shift { + LSL0 = 0b0, // no shift + LSL12 = 0b1 // logical shift left by 12 bits +} + +/// The struct that represents an A64 data processing -- immediate instruction +/// that can be encoded. +/// +/// Add/subtract (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 0 1 0 | +/// | sf op S sh imm12.................................... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct DataProcessingImmediate { + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf, + + /// The opcode for this instruction. + op: Op, + + /// Whether or not to update the flags when this instruction is performed. + s: S, + + /// How much to shift the immediate by. + shift: Shift, + + /// The value of the immediate. + imm12: u16, + + /// The register number of the first operand register. + rn: u8, + + /// The register number of the destination register. + rd: u8 +} + +impl DataProcessingImmediate { + /// ADD (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en + pub fn add(rd: &A64Opnd, rn: &A64Opnd, imm12: &A64Opnd) -> Self { + let (rd, rn, imm12) = Self::unwrap(rd, rn, imm12); + + Self { + sf: rd.num_bits.into(), + op: Op::Add, + s: S::LeaveFlags, + shift: Shift::LSL0, + imm12: imm12.value as u16, + rn: rn.reg_no, + rd: rd.reg_no + } + } + + /// ADDS (immediate, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en + pub fn adds(rd: &A64Opnd, rn: &A64Opnd, imm12: &A64Opnd) -> Self { + let (rd, rn, imm12) = Self::unwrap(rd, rn, imm12); + + Self { + sf: rd.num_bits.into(), + op: Op::Add, + s: S::UpdateFlags, + shift: Shift::LSL0, + imm12: imm12.value as u16, + rn: rn.reg_no, + rd: rd.reg_no + } + } + + /// SUB (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en + pub fn sub(rd: &A64Opnd, rn: &A64Opnd, imm12: &A64Opnd) -> Self { + let (rd, rn, imm12) = Self::unwrap(rd, rn, imm12); + + Self { + sf: rd.num_bits.into(), + op: Op::Sub, + s: S::LeaveFlags, + shift: Shift::LSL0, + imm12: imm12.value as u16, + rn: rn.reg_no, + rd: rd.reg_no + } + } + + /// SUBS (immediate, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en + pub fn subs(rd: &A64Opnd, rn: &A64Opnd, imm12: &A64Opnd) -> Self { + let (rd, rn, imm12) = Self::unwrap(rd, rn, imm12); + + Self { + sf: rd.num_bits.into(), + op: Op::Sub, + s: S::UpdateFlags, + shift: Shift::LSL0, + imm12: imm12.value as u16, + rn: rn.reg_no, + rd: rd.reg_no + } + } + + /// Extract out two registers and an immediate from the given operands. + /// Panic if any of the operands do not match the expected type or size. + fn unwrap<'a>(rd: &'a A64Opnd, rn: &'a A64Opnd, imm12: &'a A64Opnd) -> (&'a A64Reg, &'a A64Reg, &'a A64UImm) { + match (rd, rn, imm12) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "Both rd and rn operands to a data processing immediate instruction must be of the same size."); + assert!(imm12.num_bits <= 12, "The immediate operand to a data processing immediate instruction must be 12 bits or less."); + (rd, rn, imm12) + }, + _ => { + panic!("Expected 2 register operands and an immediate operand for a data processing immediate instruction."); + } + } + } +} + +impl From for u32 { + /// Convert a data processing instruction into a 32-bit value. + fn from(inst: DataProcessingImmediate) -> Self { + 0 + | (inst.sf as u32).wrapping_shl(31) + | (inst.op as u32).wrapping_shl(30) + | (inst.s as u32).wrapping_shl(29) + | (Family::DataProcessingImmediate as u32).wrapping_shl(25) + | (0b1 << 24) + | (inst.shift as u32).wrapping_shl(22) + | (inst.imm12 as u32).wrapping_shl(10) + | (inst.rn as u32).wrapping_shl(5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert a data processing instruction into a 4 byte array. + fn from(inst: DataProcessingImmediate) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add() { + let uimm12 = A64Opnd::new_uimm(7); + let inst = DataProcessingImmediate::add(&X0, &X1, &uimm12); + let result: u32 = inst.into(); + assert_eq!(0x91001c20, result); + } + + #[test] + fn test_adds() { + let uimm12 = A64Opnd::new_uimm(7); + let inst = DataProcessingImmediate::adds(&X0, &X1, &uimm12); + let result: u32 = inst.into(); + assert_eq!(0xb1001c20, result); + } + + #[test] + fn test_sub() { + let uimm12 = A64Opnd::new_uimm(7); + let inst = DataProcessingImmediate::sub(&X0, &X1, &uimm12); + let result: u32 = inst.into(); + assert_eq!(0xd1001c20, result); + } + + #[test] + fn test_subs() { + let uimm12 = A64Opnd::new_uimm(7); + let inst = DataProcessingImmediate::subs(&X0, &X1, &uimm12); + let result: u32 = inst.into(); + assert_eq!(0xf1001c20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/data_processing_register.rs b/yjit/src/asm/arm64/inst/data_processing_register.rs new file mode 100644 index 00000000000000..6203034e3fea26 --- /dev/null +++ b/yjit/src/asm/arm64/inst/data_processing_register.rs @@ -0,0 +1,202 @@ +use super::{ + super::opnd::*, + family::Family, + sf::Sf +}; + +/// The operation being performed by this instruction. +enum Op { + Add = 0b0, + Sub = 0b1 +} + +// Whether or not to update the flags when this instruction is performed. +enum S { + LeaveFlags = 0b0, + UpdateFlags = 0b1 +} + +/// The type of shift to perform on the second operand register. +enum Shift { + LSL = 0b00, // logical shift left (unsigned) + LSR = 0b01, // logical shift right (unsigned) + ASR = 0b10 // arithmetic shift right (signed) +} + +/// The struct that represents an A64 data processing -- register instruction +/// that can be encoded. +/// +/// Add/subtract (shifted register) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 1 0 | +/// | sf op S shift rm.............. imm6............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct DataProcessingRegister { + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf, + + /// The opcode for this instruction. + op: Op, + + /// Whether or not to update the flags when this instruction is performed. + s: S, + + /// The type of shift to perform on the second operand register. + shift: Shift, + + /// The register number of the second operand register. + rm: u8, + + /// The amount to shift the second operand register by. + imm6: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The register number of the destination register. + rd: u8 +} + +impl DataProcessingRegister { + /// ADD (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en + pub fn add(rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) -> Self { + let (rd, rn, rm) = Self::unwrap(rd, rn, rm); + + Self { + sf: rd.num_bits.into(), + op: Op::Add, + s: S::LeaveFlags, + shift: Shift::LSL, + rm: rm.reg_no, + imm6: 0, + rn: rn.reg_no, + rd: rd.reg_no + } + } + + /// ADDS (shifted register, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--shifted-register---Add--shifted-register---setting-flags-?lang=en + pub fn adds(rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) -> Self { + let (rd, rn, rm) = Self::unwrap(rd, rn, rm); + + Self { + sf: rd.num_bits.into(), + op: Op::Add, + s: S::UpdateFlags, + shift: Shift::LSL, + rm: rm.reg_no, + imm6: 0, + rn: rn.reg_no, + rd: rd.reg_no + } + } + + /// SUB (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en + pub fn sub(rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) -> Self { + let (rd, rn, rm) = Self::unwrap(rd, rn, rm); + + Self { + sf: rd.num_bits.into(), + op: Op::Sub, + s: S::LeaveFlags, + shift: Shift::LSL, + rm: rm.reg_no, + imm6: 0, + rn: rn.reg_no, + rd: rd.reg_no + } + } + + /// SUBS (shifted register, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--shifted-register---Subtract--shifted-register---setting-flags-?lang=en + pub fn subs(rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) -> Self { + let (rd, rn, rm) = Self::unwrap(rd, rn, rm); + + Self { + sf: rd.num_bits.into(), + op: Op::Sub, + s: S::UpdateFlags, + shift: Shift::LSL, + rm: rm.reg_no, + imm6: 0, + rn: rn.reg_no, + rd: rd.reg_no + } + } + + /// Extract out three registers from the given operands. Panic if any of the + /// operands are not registers or if they are not the same size. + fn unwrap<'a>(rd: &'a A64Opnd, rn: &'a A64Opnd, rm: &'a A64Opnd) -> (&'a A64Reg, &'a A64Reg, &'a A64Reg) { + match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "All operands to a data processing register instruction must be of the same size."); + (rd, rn, rm) + }, + _ => { + panic!("Expected 3 register operands for a data processing register instruction."); + } + } + } +} + +impl From for u32 { + /// Convert a data processing instruction into a 32-bit value. + fn from(inst: DataProcessingRegister) -> Self { + 0 + | (inst.sf as u32).wrapping_shl(31) + | (inst.op as u32).wrapping_shl(30) + | (inst.s as u32).wrapping_shl(29) + | (Family::DataProcessingRegister as u32).wrapping_shl(25) + | (0b1 << 24) + | (inst.shift as u32).wrapping_shl(22) + | (inst.rm as u32).wrapping_shl(16) + | (inst.imm6 as u32).wrapping_shl(10) + | (inst.rn as u32).wrapping_shl(5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert a data processing instruction into a 4 byte array. + fn from(inst: DataProcessingRegister) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add() { + let inst = DataProcessingRegister::add(&X0, &X1, &X2); + let result: u32 = inst.into(); + assert_eq!(0x8b020020, result); + } + + #[test] + fn test_adds() { + let inst = DataProcessingRegister::adds(&X0, &X1, &X2); + let result: u32 = inst.into(); + assert_eq!(0xab020020, result); + } + + #[test] + fn test_sub() { + let inst = DataProcessingRegister::sub(&X0, &X1, &X2); + let result: u32 = inst.into(); + assert_eq!(0xcb020020, result); + } + + #[test] + fn test_subs() { + let inst = DataProcessingRegister::subs(&X0, &X1, &X2); + let result: u32 = inst.into(); + assert_eq!(0xeb020020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/family.rs b/yjit/src/asm/arm64/inst/family.rs new file mode 100644 index 00000000000000..ff5a335406e66d --- /dev/null +++ b/yjit/src/asm/arm64/inst/family.rs @@ -0,0 +1,34 @@ +/// These are the top-level encodings. They're effectively the family of +/// instructions, as each instruction within those groups shares these same +/// bits (28-25). +/// +/// In the documentation, you can see that some of the bits are +/// optional (e.g., x1x0 for loads and stores). We represent that here as 0100 +/// since we're bitwise ORing the family into the resulting encoding. +/// +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding?lang=en +pub enum Family { + /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Reserved?lang=en + Reserved = 0b0000, + + /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/SME-encodings?lang=en + SMEEncodings = 0b0001, + + /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/SVE-encodings?lang=en + SVEEncodings = 0b0010, + + /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en + DataProcessingImmediate = 0b1000, + + /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en + BranchesAndSystem = 0b1010, + + /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en + LoadsAndStores = 0b0100, + + /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en + DataProcessingRegister = 0b0101, + + /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en + DataProcessingScalar = 0b0111 +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs new file mode 100644 index 00000000000000..5a0e148ff93c21 --- /dev/null +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -0,0 +1,32 @@ +mod data_processing_immediate; +mod data_processing_register; +mod family; +mod sf; + +use data_processing_immediate::DataProcessingImmediate; +use data_processing_register::DataProcessingRegister; + +use crate::asm::CodeBlock; +use super::opnd::*; + +/// ADD +pub fn add(cb: &mut CodeBlock, rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) { + let bytes: [u8; 4] = match rm { + A64Opnd::UImm(_) => DataProcessingImmediate::add(rd, rn, rm).into(), + A64Opnd::Reg(_) => DataProcessingRegister::add(rd, rn, rm).into(), + _ => panic!("Invalid operand combination to add.") + }; + + cb.write_bytes(&bytes); +} + +/// SUB +pub fn sub(cb: &mut CodeBlock, rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) { + let bytes: [u8; 4] = match rm { + A64Opnd::UImm(_) => DataProcessingImmediate::sub(rd, rn, rm).into(), + A64Opnd::Reg(_) => DataProcessingRegister::sub(rd, rn, rm).into(), + _ => panic!("Invalid operand combination to add.") + }; + + cb.write_bytes(&bytes); +} diff --git a/yjit/src/asm/arm64/inst/sf.rs b/yjit/src/asm/arm64/inst/sf.rs new file mode 100644 index 00000000000000..c2fd33302c1ef8 --- /dev/null +++ b/yjit/src/asm/arm64/inst/sf.rs @@ -0,0 +1,19 @@ +/// This is commonly the top-most bit in the encoding of the instruction, and +/// represents whether register operands should be treated as 64-bit registers +/// or 32-bit registers. +pub enum Sf { + Sf32 = 0b0, + Sf64 = 0b1 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From for Sf { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Sf::Sf64, + 32 => Sf::Sf32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs new file mode 100644 index 00000000000000..85a472ddec19ff --- /dev/null +++ b/yjit/src/asm/arm64/mod.rs @@ -0,0 +1,4 @@ +#![allow(dead_code)] // For instructions and operands we're not currently using. + +mod inst; +mod opnd; diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs new file mode 100644 index 00000000000000..3f86a89fd5ec45 --- /dev/null +++ b/yjit/src/asm/arm64/opnd.rs @@ -0,0 +1,159 @@ +use crate::asm::{imm_num_bits, uimm_num_bits}; + +/// This operand represents a signed immediate value. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct A64Imm +{ + // Size in bits + pub num_bits: u8, + + // The value of the immediate + pub value: i64 +} + +impl A64Imm { + pub fn new(value: i64) -> Self { + A64Imm { num_bits: imm_num_bits(value), value } + } +} + +/// This operand represents an unsigned immediate value. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct A64UImm +{ + // Size in bits + pub num_bits: u8, + + // The value of the immediate + pub value: u64 +} + +impl A64UImm { + pub fn new(value: u64) -> Self { + A64UImm { num_bits: uimm_num_bits(value), value } + } +} + +/// This operand represents a register. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct A64Reg +{ + // Size in bits + pub num_bits: u8, + + // Register index number + pub reg_no: u8, +} + +#[derive(Clone, Copy, Debug)] +pub enum A64Opnd +{ + // Dummy operand + None, + + // Immediate value + Imm(A64Imm), + + // Unsigned immediate + UImm(A64UImm), + + // Register + Reg(A64Reg), +} + +impl A64Opnd { + /// Create a new immediate value operand. + pub fn new_imm(value: i64) -> Self { + A64Opnd::Imm(A64Imm::new(value)) + } + + /// Create a new unsigned immediate value operand. + pub fn new_uimm(value: u64) -> Self { + A64Opnd::UImm(A64UImm::new(value)) + } + + /// Convenience function to check if this operand is a register. + pub fn is_reg(&self) -> bool { + match self { + A64Opnd::Reg(_) => true, + _ => false + } + } +} + +pub const X0_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 0 }; +pub const X1_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 1 }; +pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 }; +pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 }; + +pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 }; +pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 }; + +// 64-bit registers +pub const X0: A64Opnd = A64Opnd::Reg(X0_REG); +pub const X1: A64Opnd = A64Opnd::Reg(X1_REG); +pub const X2: A64Opnd = A64Opnd::Reg(X2_REG); +pub const X3: A64Opnd = A64Opnd::Reg(X3_REG); +pub const X4: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 4 }); +pub const X5: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 5 }); +pub const X6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 6 }); +pub const X7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 7 }); +pub const X8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 8 }); +pub const X9: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 9 }); +pub const X10: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 10 }); +pub const X11: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 11 }); +pub const X12: A64Opnd = A64Opnd::Reg(X12_REG); +pub const X13: A64Opnd = A64Opnd::Reg(X13_REG); +pub const X14: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 14 }); +pub const X15: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 15 }); +pub const X16: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 16 }); +pub const X17: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 17 }); +pub const X18: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 18 }); +pub const X19: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 19 }); +pub const X20: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 20 }); +pub const X21: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 21 }); +pub const X22: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 22 }); +pub const X23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 23 }); +pub const X24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 24 }); +pub const X25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 25 }); +pub const X26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 26 }); +pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); +pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 }); +pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 }); +pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 }); + +// 32-bit registers +pub const W0: A64Reg = A64Reg { num_bits: 32, reg_no: 0 }; +pub const W1: A64Reg = A64Reg { num_bits: 32, reg_no: 1 }; +pub const W2: A64Reg = A64Reg { num_bits: 32, reg_no: 2 }; +pub const W3: A64Reg = A64Reg { num_bits: 32, reg_no: 3 }; +pub const W4: A64Reg = A64Reg { num_bits: 32, reg_no: 4 }; +pub const W5: A64Reg = A64Reg { num_bits: 32, reg_no: 5 }; +pub const W6: A64Reg = A64Reg { num_bits: 32, reg_no: 6 }; +pub const W7: A64Reg = A64Reg { num_bits: 32, reg_no: 7 }; +pub const W8: A64Reg = A64Reg { num_bits: 32, reg_no: 8 }; +pub const W9: A64Reg = A64Reg { num_bits: 32, reg_no: 9 }; +pub const W10: A64Reg = A64Reg { num_bits: 32, reg_no: 10 }; +pub const W11: A64Reg = A64Reg { num_bits: 32, reg_no: 11 }; +pub const W12: A64Reg = A64Reg { num_bits: 32, reg_no: 12 }; +pub const W13: A64Reg = A64Reg { num_bits: 32, reg_no: 13 }; +pub const W14: A64Reg = A64Reg { num_bits: 32, reg_no: 14 }; +pub const W15: A64Reg = A64Reg { num_bits: 32, reg_no: 15 }; +pub const W16: A64Reg = A64Reg { num_bits: 32, reg_no: 16 }; +pub const W17: A64Reg = A64Reg { num_bits: 32, reg_no: 17 }; +pub const W18: A64Reg = A64Reg { num_bits: 32, reg_no: 18 }; +pub const W19: A64Reg = A64Reg { num_bits: 32, reg_no: 19 }; +pub const W20: A64Reg = A64Reg { num_bits: 32, reg_no: 20 }; +pub const W21: A64Reg = A64Reg { num_bits: 32, reg_no: 21 }; +pub const W22: A64Reg = A64Reg { num_bits: 32, reg_no: 22 }; +pub const W23: A64Reg = A64Reg { num_bits: 32, reg_no: 23 }; +pub const W24: A64Reg = A64Reg { num_bits: 32, reg_no: 24 }; +pub const W25: A64Reg = A64Reg { num_bits: 32, reg_no: 25 }; +pub const W26: A64Reg = A64Reg { num_bits: 32, reg_no: 26 }; +pub const W27: A64Reg = A64Reg { num_bits: 32, reg_no: 27 }; +pub const W28: A64Reg = A64Reg { num_bits: 32, reg_no: 28 }; +pub const W29: A64Reg = A64Reg { num_bits: 32, reg_no: 29 }; +pub const W30: A64Reg = A64Reg { num_bits: 32, reg_no: 30 }; + +// C argument registers +pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3]; diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index e16e8569251ad9..751f9fce0baf4e 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -9,6 +9,8 @@ use crate::virtualmem::{VirtualMem, CodePtr}; #[rustfmt::skip] pub mod x86_64; +pub mod arm64; + // // TODO: need a field_size_of macro, to compute the size of a struct field in bytes // @@ -294,3 +296,74 @@ impl OutlinedCb { &mut self.cb } } + +/// Compute the number of bits needed to encode a signed value +pub fn imm_num_bits(imm: i64) -> u8 +{ + // Compute the smallest size this immediate fits in + if imm >= i8::MIN.into() && imm <= i8::MAX.into() { + return 8; + } + if imm >= i16::MIN.into() && imm <= i16::MAX.into() { + return 16; + } + if imm >= i32::MIN.into() && imm <= i32::MAX.into() { + return 32; + } + + return 64; +} + +/// Compute the number of bits needed to encode an unsigned value +pub fn uimm_num_bits(uimm: u64) -> u8 +{ + // Compute the smallest size this immediate fits in + if uimm <= u8::MAX.into() { + return 8; + } + else if uimm <= u16::MAX.into() { + return 16; + } + else if uimm <= u32::MAX.into() { + return 32; + } + + return 64; +} + +#[cfg(test)] +mod tests +{ + use super::*; + + #[test] + fn test_imm_num_bits() + { + assert_eq!(imm_num_bits(i8::MIN.into()), 8); + assert_eq!(imm_num_bits(i8::MAX.into()), 8); + + assert_eq!(imm_num_bits(i16::MIN.into()), 16); + assert_eq!(imm_num_bits(i16::MAX.into()), 16); + + assert_eq!(imm_num_bits(i32::MIN.into()), 32); + assert_eq!(imm_num_bits(i32::MAX.into()), 32); + + assert_eq!(imm_num_bits(i64::MIN.into()), 64); + assert_eq!(imm_num_bits(i64::MAX.into()), 64); + } + + #[test] + fn test_uimm_num_bits() { + assert_eq!(uimm_num_bits(u8::MIN.into()), 8); + assert_eq!(uimm_num_bits(u8::MAX.into()), 8); + + assert_eq!(uimm_num_bits(((u8::MAX as u16) + 1).into()), 16); + assert_eq!(uimm_num_bits(u16::MAX.into()), 16); + + assert_eq!(uimm_num_bits(((u16::MAX as u32) + 1).into()), 32); + assert_eq!(uimm_num_bits(u32::MAX.into()), 32); + + assert_eq!(uimm_num_bits(((u32::MAX as u64) + 1).into()), 64); + assert_eq!(uimm_num_bits(u64::MAX.into()), 64); + } +} diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 1ada5ffbb7a944..fdfac82f921f95 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -118,7 +118,7 @@ impl X86Opnd { X86Opnd::Mem(mem) => { if mem.disp != 0 { // Compute the required displacement size - let num_bits = sig_imm_size(mem.disp.into()); + let num_bits = imm_num_bits(mem.disp.into()); if num_bits > 32 { panic!("displacement does not fit in 32 bits"); } @@ -250,40 +250,6 @@ pub const C_ARG_REGS: [X86Opnd; 6] = [RDI, RSI, RDX, RCX, R8, R9]; //=========================================================================== -/// Compute the number of bits needed to encode a signed value -pub fn sig_imm_size(imm: i64) -> u8 -{ - // Compute the smallest size this immediate fits in - if imm >= i8::MIN.into() && imm <= i8::MAX.into() { - return 8; - } - if imm >= i16::MIN.into() && imm <= i16::MAX.into() { - return 16; - } - if imm >= i32::MIN.into() && imm <= i32::MAX.into() { - return 32; - } - - return 64; -} - -/// Compute the number of bits needed to encode an unsigned value -pub fn unsig_imm_size(imm: u64) -> u8 -{ - // Compute the smallest size this immediate fits in - if imm <= u8::MAX.into() { - return 8; - } - else if imm <= u16::MAX.into() { - return 16; - } - else if imm <= u32::MAX.into() { - return 32; - } - - return 64; -} - /// Shorthand for memory operand with base register and displacement pub fn mem_opnd(num_bits: u8, base_reg: X86Opnd, disp: i32) -> X86Opnd { @@ -362,12 +328,12 @@ static x86opnd_t resize_opnd(x86opnd_t opnd, uint32_t num_bits) pub fn imm_opnd(value: i64) -> X86Opnd { - X86Opnd::Imm(X86Imm { num_bits: sig_imm_size(value), value }) + X86Opnd::Imm(X86Imm { num_bits: imm_num_bits(value), value }) } pub fn uimm_opnd(value: u64) -> X86Opnd { - X86Opnd::UImm(X86UImm { num_bits: unsig_imm_size(value), value }) + X86Opnd::UImm(X86UImm { num_bits: uimm_num_bits(value), value }) } pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd @@ -619,7 +585,7 @@ fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_r }, // R/M + UImm (_, X86Opnd::UImm(uimm)) => { - let num_bits = sig_imm_size(uimm.value.try_into().unwrap()); + let num_bits = imm_num_bits(uimm.value.try_into().unwrap()); if num_bits <= 8 { // 8-bit immediate @@ -1013,7 +979,7 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { } let output_num_bits:u32 = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; - assert!(sig_imm_size(imm.value) <= (output_num_bits as u8)); + assert!(imm_num_bits(imm.value) <= (output_num_bits as u8)); cb.write_int(imm.value as u64, output_num_bits); }, // M + UImm @@ -1028,7 +994,7 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { } let output_num_bits = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; - assert!(sig_imm_size(uimm.value as i64) <= (output_num_bits as u8)); + assert!(imm_num_bits(uimm.value as i64) <= (output_num_bits as u8)); cb.write_int(uimm.value, output_num_bits); }, // * + Imm/UImm diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs index ffcc0634202937..92691803a30800 100644 --- a/yjit/src/asm/x86_64/tests.rs +++ b/yjit/src/asm/x86_64/tests.rs @@ -1,18 +1,6 @@ #![cfg(test)] use crate::asm::x86_64::*; -use std::fmt; - -/// Produce hex string output from the bytes in a code block -impl<'a> fmt::LowerHex for super::CodeBlock { - fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { - for pos in 0..self.write_pos { - let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() }; - fmtr.write_fmt(format_args!("{:02x}", byte))?; - } - Ok(()) - } -} /// Check that the bytes for an instruction sequence match a hex string fn check_bytes(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) { diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs new file mode 100644 index 00000000000000..f16f9c0f634360 --- /dev/null +++ b/yjit/src/backend/arm64/mod.rs @@ -0,0 +1,63 @@ +#![allow(dead_code)] +#![allow(unused_variables)] +#![allow(unused_imports)] + +use crate::asm::{CodeBlock}; +use crate::asm::arm64::*; +use crate::codegen::{JITState}; +use crate::cruby::*; +use crate::backend::ir::*; + +// Use the arm64 register type for this platform +pub type Reg = A64Reg; + +// Callee-saved registers +pub const _CFP: Opnd = Opnd::Reg(X9); +pub const _EC: Opnd = Opnd::Reg(X10); +pub const _SP: Opnd = Opnd::Reg(X11); + +// C return value register on this platform +pub const RET_REG: Reg = X0; + +/// Map Opnd to A64Opnd +impl From for A64Opnd { + fn from(opnd: Opnd) -> Self { + match opnd { + Opnd::UImm(val) => uimm_opnd(val), + Opnd::Imm(val) => imm_opnd(val), + Opnd::Reg(reg) => A64Opnd::Reg(reg), + _ => panic!("unsupported arm64 operand type") + } + } +} + +impl Assembler +{ + // Get the list of registers from which we can allocate on this platform + pub fn get_scratch_regs() -> Vec + { + vec![X12_REG, X13_REG] + } + + // Split platform-specific instructions + fn arm64_split(mut self) -> Assembler + { + todo!(); + } + + // Emit platform-specific machine code + pub fn arm64_emit(&mut self, jit: &mut JITState, cb: &mut CodeBlock) + { + todo!(); + } + + // Optimize and compile the stored instructions + pub fn compile_with_regs(self, jit: &mut JITState, cb: &mut CodeBlock, regs: Vec) + { + self + .arm64_split() + .split_loads() + .alloc_regs(regs) + .arm64_emit(jit, cb); + } +} diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index f6a02909d982cb..3fe9b8d2ec4950 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -14,8 +14,8 @@ use crate::codegen::{JITState}; #[cfg(target_arch = "x86_64")] use crate::backend::x86_64::*; -//#[cfg(target_arch = "aarch64")] -//use crate::backend:aarch64::* +#[cfg(target_arch = "aarch64")] +use crate::backend::arm64::*; pub const EC: Opnd = _EC; pub const CFP: Opnd = _CFP; From 3133540be79a511c79c3876df40ad25c912ecc79 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 7 Jun 2022 13:38:47 -0400 Subject: [PATCH 311/546] Progress on codegen.rs port --- yjit/src/codegen.rs | 56 ++++++++------------------------------------- 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 28cf05e95c6e24..3491391aa062de 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -40,7 +40,7 @@ pub const REG1: X86Opnd = RCX; // be invalidated. In this case the JMP takes 5 bytes, but // gen_send_general will always MOV the receiving object // into place, so 2 bytes are always written automatically. -pub const JUMP_SIZE_IN_BYTES:usize = 3; +//pub const JUMP_SIZE_IN_BYTES: usize = 3; /// Status returned by code generation functions #[derive(PartialEq, Debug)] @@ -148,6 +148,7 @@ pub fn jit_get_arg(jit: &JITState, arg_idx: isize) -> VALUE { unsafe { *(jit.pc.offset(arg_idx + 1)) } } +/* // Load a VALUE into a register and keep track of the reference if it is on the GC heap. pub fn jit_mov_gc_ptr(jit: &mut JITState, cb: &mut CodeBlock, reg: X86Opnd, ptr: VALUE) { assert!(matches!(reg, X86Opnd::Reg(_))); @@ -163,6 +164,7 @@ pub fn jit_mov_gc_ptr(jit: &mut JITState, cb: &mut CodeBlock, reg: X86Opnd, ptr: jit.add_gc_obj_offset(ptr_offset); } } +*/ // Get the index of the next instruction fn jit_next_insn_idx(jit: &JITState) -> u32 { @@ -281,19 +283,7 @@ macro_rules! counted_exit { // Save the incremented PC on the CFP // This is necessary when callees can raise or allocate -fn jit_save_pc(jit: &JITState, cb: &mut CodeBlock, scratch_reg: X86Opnd) { - let pc: *mut VALUE = jit.get_pc(); - let ptr: *mut VALUE = unsafe { - let cur_insn_len = insn_len(jit.get_opcode()) as isize; - pc.offset(cur_insn_len) - }; - mov(cb, scratch_reg, const_ptr_opnd(ptr as *const u8)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), scratch_reg); -} - -// Save the incremented PC on the CFP -// This is necessary when callees can raise or allocate -fn ir_jit_save_pc(jit: &JITState, asm: &mut Assembler) { +fn jit_save_pc(jit: &JITState, asm: &mut Assembler) { let pc: *mut VALUE = jit.get_pc(); let ptr: *mut VALUE = unsafe { let cur_insn_len = insn_len(jit.get_opcode()) as isize; @@ -307,21 +297,7 @@ fn ir_jit_save_pc(jit: &JITState, asm: &mut Assembler) { /// This realigns the interpreter SP with the JIT SP /// Note: this will change the current value of REG_SP, /// which could invalidate memory operands -fn gen_save_sp(cb: &mut CodeBlock, ctx: &mut Context) { - if ctx.get_sp_offset() != 0 { - let stack_pointer = ctx.sp_opnd(0); - lea(cb, REG_SP, stack_pointer); - let cfp_sp_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP); - mov(cb, cfp_sp_opnd, REG_SP); - ctx.set_sp_offset(0); - } -} - -/// Save the current SP on the CFP -/// This realigns the interpreter SP with the JIT SP -/// Note: this will change the current value of REG_SP, -/// which could invalidate memory operands -fn ir_gen_save_sp(jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { +fn gen_save_sp(jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { if ctx.get_sp_offset() != 0 { let stack_pointer = ctx.ir_sp_opnd(0); let sp_addr = asm.lea(stack_pointer); @@ -332,11 +308,6 @@ fn ir_gen_save_sp(jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { } } - - - - - /// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that /// could: /// - Perform GC allocation @@ -345,27 +316,18 @@ fn ir_gen_save_sp(jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { fn jit_prepare_routine_call( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, scratch_reg: X86Opnd, ) { jit.record_boundary_patch_point = true; - jit_save_pc(jit, cb, scratch_reg); - gen_save_sp(cb, ctx); + jit_save_pc(jit, asm); + gen_save_sp(jit, asm, ctx); // In case the routine calls Ruby methods, it can set local variables // through Kernel#binding and other means. ctx.clear_local_types(); } - - - - - - - - - /// Record the current codeblock write position for rewriting into a jump into /// the outlined block later. Used to implement global code invalidation. fn record_global_inval_patch(cb: &mut CodeBlock, outline_block_target_pos: CodePtr) { @@ -695,6 +657,7 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O return Some(code_ptr); } +/* // Generate code to check for interrupts and take a side-exit. // Warning: this function clobbers REG0 fn gen_check_ints(cb: &mut CodeBlock, side_exit: CodePtr) { @@ -714,6 +677,7 @@ fn gen_check_ints(cb: &mut CodeBlock, side_exit: CodePtr) { ); jnz_ptr(cb, side_exit); } +*/ // Generate a stubbed unconditional jump to the next bytecode instruction. // Blocks that are part of a guard chain can use this to share the same successor. From e22134277b81124ba2ce4cf3e08ad0983c0432c9 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 7 Jun 2022 15:22:06 -0400 Subject: [PATCH 312/546] Remove x86_64 dependency in core.rs --- yjit/src/backend/x86_64/mod.rs | 3 ++- yjit/src/codegen.rs | 4 ++-- yjit/src/core.rs | 41 ++++++++++++++++++++++------------ 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 4aa04b29aa2b37..66f6beefc97ac3 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -111,10 +111,11 @@ impl Assembler Op::Store => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), + // This assumes only load instructions can contain references to GC'd Value operands Op::Load => { mov(cb, insn.out.into(), insn.opnds[0].into()); - // If the value being loaded is a heapp object + // If the value being loaded is a heap object if let Opnd::Value(val) = insn.opnds[0] { if !val.special_const_p() { // The pointer immediate is encoded as the last part of the mov written out diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 3491391aa062de..99436d5f06f71b 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -426,7 +426,7 @@ fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr { // Write the adjusted SP back into the CFP if ctx.get_sp_offset() != 0 { let stack_pointer = ctx.sp_opnd(0); - lea(cb, REG_SP, stack_pointer); + lea(cb, REG_SP, stack_pointer.into()); mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG_SP); } @@ -791,7 +791,7 @@ pub fn gen_single_block( gen_counter_incr!(cb, exec_instruction); // Add a comment for the name of the YARV instruction - add_comment(cb, &insn_name(opcode)); + asm.comment(&insn_name(opcode)); // If requested, dump instructions for debugging if get_option!(dump_insns) { diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 68cc5e799ffcb2..b36e5d2ac09e9c 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1,5 +1,6 @@ -use crate::asm::x86_64::*; +//use crate::asm::x86_64::*; use crate::asm::*; +use crate::backend::ir::*; use crate::codegen::*; use crate::virtualmem::CodePtr; use crate::cruby::*; @@ -970,15 +971,15 @@ impl Context { } /// Get an operand for the adjusted stack pointer address - pub fn sp_opnd(&self, offset_bytes: isize) -> X86Opnd { + pub fn sp_opnd(&self, offset_bytes: isize) -> Opnd { let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes; let offset = offset as i32; - return mem_opnd(64, REG_SP, offset); + return Opnd::mem(64, SP, offset); } /// Push one new value on the temp stack with an explicit mapping /// Return a pointer to the new stack top - pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> X86Opnd { + pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> Opnd { // If type propagation is disabled, store no types if get_option!(no_type_prop) { return self.stack_push_mapping((mapping, Type::Unknown)); @@ -1001,22 +1002,22 @@ impl Context { // SP points just above the topmost value let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32); - return mem_opnd(64, REG_SP, offset); + return Opnd::mem(64, SP, offset); } /// Push one new value on the temp stack /// Return a pointer to the new stack top - pub fn stack_push(&mut self, val_type: Type) -> X86Opnd { + pub fn stack_push(&mut self, val_type: Type) -> Opnd { return self.stack_push_mapping((MapToStack, val_type)); } /// Push the self value on the stack - pub fn stack_push_self(&mut self) -> X86Opnd { + pub fn stack_push_self(&mut self) -> Opnd { return self.stack_push_mapping((MapToSelf, Type::Unknown)); } /// Push a local variable on the stack - pub fn stack_push_local(&mut self, local_idx: usize) -> X86Opnd { + pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd { if local_idx >= MAX_LOCAL_TYPES { return self.stack_push(Type::Unknown); } @@ -1026,12 +1027,12 @@ impl Context { // Pop N values off the stack // Return a pointer to the stack top before the pop operation - pub fn stack_pop(&mut self, n: usize) -> X86Opnd { + pub fn stack_pop(&mut self, n: usize) -> Opnd { assert!(n <= self.stack_size.into()); // SP points just above the topmost value let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32); - let top = mem_opnd(64, REG_SP, offset); + let top = Opnd::mem(64, SP, offset); // Clear the types of the popped values for i in 0..n { @@ -1050,10 +1051,10 @@ impl Context { } /// Get an operand pointing to a slot on the temp stack - pub fn stack_opnd(&self, idx: i32) -> X86Opnd { + pub fn stack_opnd(&self, idx: i32) -> Opnd { // SP points just above the topmost value let offset = ((self.sp_offset as i32) - 1 - idx) * (SIZEOF_VALUE as i32); - let opnd = mem_opnd(64, REG_SP, offset); + let opnd = Opnd::mem(64, SP, offset); return opnd; } @@ -1766,6 +1767,13 @@ fn get_branch_target( // This means the branch stub owns its own reference to the branch let branch_ptr: *const RefCell = BranchRef::into_raw(branchref.clone()); + + + + + todo!("stub codegen with new assembler"); + + /* // Call branch_stub_hit(branch_idx, target_idx, ec) mov(ocb, C_ARG_REGS[2], REG_EC); mov(ocb, C_ARG_REGS[1], uimm_opnd(target_idx as u64)); @@ -1781,6 +1789,7 @@ fn get_branch_target( } else { Some(stub_addr) } + */ } pub fn gen_branch( @@ -1835,7 +1844,8 @@ fn gen_jump_branch( } if shape == BranchShape::Default { - jmp_ptr(cb, target0); + //jmp_ptr(cb, target0); + todo!("jmp_ptr with new assembler"); } } @@ -2017,7 +2027,10 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // Patch in a jump to block.entry_exit. let cur_pos = cb.get_write_ptr(); cb.set_write_ptr(block_start); - jmp_ptr(cb, block_entry_exit); + + //jmp_ptr(cb, block_entry_exit); + todo!("jmp_ptr with new assembler"); + assert!( cb.get_write_ptr() < block_end, "invalidation wrote past end of block" From 26ba0a454c1d08df0afacca2786330198a1daee0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 7 Jun 2022 13:37:49 -0400 Subject: [PATCH 313/546] RET A64 instructions (https://github.com/Shopify/ruby/pull/294) --- .../src/asm/arm64/inst/branches_and_system.rs | 70 +++++++++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 1 + 2 files changed, 71 insertions(+) create mode 100644 yjit/src/asm/arm64/inst/branches_and_system.rs diff --git a/yjit/src/asm/arm64/inst/branches_and_system.rs b/yjit/src/asm/arm64/inst/branches_and_system.rs new file mode 100644 index 00000000000000..6eece11b880605 --- /dev/null +++ b/yjit/src/asm/arm64/inst/branches_and_system.rs @@ -0,0 +1,70 @@ +use super::{ + super::opnd::*, + family::Family, + sf::Sf +}; + +/// The struct that represents an A64 branches and system instruction that can +/// be encoded. +/// +/// RET +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 1 0 0 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 | +/// | rn.............. rm.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +struct BranchesAndSystem { + /// The register holding the address to be branched to. + rn: u8 +} + +impl BranchesAndSystem { + /// RET + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en + pub fn ret(rn: &A64Opnd) -> Self { + match rn { + A64Opnd::None => BranchesAndSystem { rn: 30 }, + A64Opnd::Reg(reg) => BranchesAndSystem { rn: reg.reg_no }, + _ => panic!("Invalid operand for RET") + } + } +} + +impl From for u32 { + /// Convert a data processing instruction into a 32-bit value. + fn from(inst: BranchesAndSystem) -> Self { + 0 + | (0b11 << 30) + | (Family::BranchesAndSystem as u32).wrapping_shl(25) + | (0b1001011111 << 16) + | (inst.rn as u32).wrapping_shl(5) + } +} + +impl From for [u8; 4] { + /// Convert a data processing instruction into a 4 byte array. + fn from(inst: BranchesAndSystem) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ret() { + let inst = BranchesAndSystem::ret(&A64Opnd::None); + let result: u32 = inst.into(); + assert_eq!(0xd65f03C0, result); + } + + #[test] + fn test_ret_rn() { + let inst = BranchesAndSystem::ret(&X20); + let result: u32 = inst.into(); + assert_eq!(0xd65f0280, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 5a0e148ff93c21..becf4251bd016d 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -1,3 +1,4 @@ +mod branches_and_system; mod data_processing_immediate; mod data_processing_register; mod family; From 03ed50310d772e555f819a4b321e4d6593161233 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 7 Jun 2022 16:27:10 -0400 Subject: [PATCH 314/546] Have Assembler::compile() return a list of GC offsets --- yjit/src/backend/arm64/mod.rs | 15 +++++++------ yjit/src/backend/ir.rs | 39 +++++++++++++++------------------- yjit/src/backend/x86_64/mod.rs | 21 +++++++++++------- yjit/src/codegen.rs | 11 ++++------ yjit/src/core.rs | 1 + 5 files changed, 43 insertions(+), 44 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index f16f9c0f634360..8685117c5f6dc3 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -33,31 +33,32 @@ impl From for A64Opnd { impl Assembler { - // Get the list of registers from which we can allocate on this platform + /// Get the list of registers from which we can allocate on this platform pub fn get_scratch_regs() -> Vec { vec![X12_REG, X13_REG] } - // Split platform-specific instructions + /// Split platform-specific instructions fn arm64_split(mut self) -> Assembler { todo!(); } - // Emit platform-specific machine code - pub fn arm64_emit(&mut self, jit: &mut JITState, cb: &mut CodeBlock) + /// Emit platform-specific machine code + /// Returns a list of GC offsets + pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec { todo!(); } - // Optimize and compile the stored instructions - pub fn compile_with_regs(self, jit: &mut JITState, cb: &mut CodeBlock, regs: Vec) + /// Optimize and compile the stored instructions + pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) { self .arm64_split() .split_loads() .alloc_regs(regs) - .arm64_emit(jit, cb); + .arm64_emit(jit, cb) } } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 3fe9b8d2ec4950..09ce6b4d6c8991 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -619,10 +619,12 @@ impl Assembler } /// Compile the instructions down to machine code - pub fn compile(self, jit: &mut JITState, cb: &mut CodeBlock) + /// NOTE: should compile return a list of block labels to enable + /// compiling multiple blocks at a time? + pub fn compile(self, cb: &mut CodeBlock) -> Vec { let scratch_regs = Self::get_scratch_regs(); - self.compile_with_regs(jit, cb, scratch_regs); + self.compile_with_regs(cb, scratch_regs) } } @@ -831,18 +833,11 @@ mod tests { assert_eq!(result.insns[5].out, Opnd::Reg(regs[0])); } - fn setup_asm(num_regs: usize) -> (Assembler, JITState, CodeBlock, Vec) { - let blockid = BlockId { - iseq: std::ptr::null(), - idx: 0, - }; - let block = Block::new(blockid, &Context::default()); - + fn setup_asm(num_regs: usize) -> (Assembler, CodeBlock, Vec) { let mut regs = Assembler::get_scratch_regs(); return ( Assembler::new(), - JITState::new(&block), CodeBlock::new_dummy(1024), regs.drain(0..num_regs).collect() ); @@ -852,50 +847,50 @@ mod tests { #[test] fn test_compile() { - let (mut asm, mut jit, mut cb, regs) = setup_asm(1); + let (mut asm, mut cb, regs) = setup_asm(1); let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); asm.add(out, Opnd::UImm(2)); - asm.compile(&mut jit, &mut cb); + asm.compile(&mut cb); } // Test memory-to-memory move #[test] fn test_mov_mem2mem() { - let (mut asm, mut jit, mut cb, regs) = setup_asm(1); + let (mut asm, mut cb, regs) = setup_asm(1); asm.comment("check that comments work too"); asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); - asm.compile_with_regs(&mut jit, &mut cb, regs); + asm.compile_with_regs(&mut cb, regs); } // Test load of register into new register #[test] fn test_load_reg() { - let (mut asm, mut jit, mut cb, regs) = setup_asm(1); + let (mut asm, mut cb, regs) = setup_asm(1); let out = asm.load(SP); asm.mov(Opnd::mem(64, SP, 0), out); - asm.compile_with_regs(&mut jit, &mut cb, regs); + asm.compile_with_regs(&mut cb, regs); } // Multiple registers needed and register reuse #[test] fn test_reuse_reg() { - let (mut asm, mut jit, mut cb, regs) = setup_asm(2); + let (mut asm, mut cb, regs) = setup_asm(2); let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); let v2 = asm.add(v0, Opnd::UImm(1)); asm.add(v0, v2); - asm.compile_with_regs(&mut jit, &mut cb, regs); + asm.compile_with_regs(&mut cb, regs); } #[test] @@ -905,24 +900,24 @@ mod tests { { } - let (mut asm, mut jit, mut cb, regs) = setup_asm(2); + let (mut asm, mut cb, regs) = setup_asm(2); asm.ccall( dummy_c_fun as *const u8, vec![Opnd::mem(64, SP, 0), Opnd::UImm(1)] ); - asm.compile_with_regs(&mut jit, &mut cb, regs); + asm.compile_with_regs(&mut cb, regs); } #[test] fn test_lea_ret() { - let (mut asm, mut jit, mut cb, regs) = setup_asm(1); + let (mut asm, mut cb, regs) = setup_asm(1); let addr = asm.lea(Opnd::mem(64, SP, 0)); asm.cret(addr); - asm.compile_with_regs(&mut jit, &mut cb, regs); + asm.compile_with_regs(&mut cb, regs); } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 66f6beefc97ac3..d0f57d908fef54 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -49,7 +49,7 @@ impl From for X86Opnd { impl Assembler { - // Get the list of registers from which we can allocate on this platform + /// Get the list of registers from which we can allocate on this platform pub fn get_scratch_regs() -> Vec { vec![ @@ -58,7 +58,7 @@ impl Assembler ] } - // Emit platform-specific machine code + /// Emit platform-specific machine code fn x86_split(mut self) -> Assembler { let live_ranges: Vec = std::mem::take(&mut self.live_ranges); @@ -92,9 +92,12 @@ impl Assembler }) } - // Emit platform-specific machine code - pub fn x86_emit(&mut self, jit: &mut JITState, cb: &mut CodeBlock) + /// Emit platform-specific machine code + pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Vec { + // List of GC offsets + let mut gc_offsets: Vec = Vec::new(); + // For each instruction for insn in &self.insns { match insn.op { @@ -120,7 +123,7 @@ impl Assembler if !val.special_const_p() { // The pointer immediate is encoded as the last part of the mov written out let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - jit.add_gc_obj_offset(ptr_offset); + gc_offsets.push(ptr_offset); } } }, @@ -162,15 +165,17 @@ impl Assembler _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn.op) }; } + + gc_offsets } - // Optimize and compile the stored instructions - pub fn compile_with_regs(self, jit: &mut JITState, cb: &mut CodeBlock, regs: Vec) + /// Optimize and compile the stored instructions + pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Vec { self .x86_split() .split_loads() .alloc_regs(regs) - .x86_emit(jit, cb); + .x86_emit(cb) } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 99436d5f06f71b..d6f8b34596962e 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -118,12 +118,6 @@ impl JITState { self.opcode } - pub fn add_gc_obj_offset(self: &mut JITState, ptr_offset: u32) { - let mut gc_obj_vec: RefMut<_> = self.block.borrow_mut(); - gc_obj_vec.add_gc_obj_offset(ptr_offset); - incr_counter!(num_gc_obj_refs); - } - pub fn get_pc(self: &JITState) -> *mut VALUE { self.pc } @@ -838,10 +832,13 @@ pub fn gen_single_block( // Finish filling out the block { // Compile code into the code block - asm.compile(&mut jit, cb); + let gc_offsets = asm.compile(cb); let mut block = jit.block.borrow_mut(); + // Add the GC offsets to the block + gc_offsets.iter().map(|offs| { block.add_gc_obj_offset(*offs) }); + // Mark the end position of the block block.set_end_addr(cb.get_write_ptr()); diff --git a/yjit/src/core.rs b/yjit/src/core.rs index b36e5d2ac09e9c..10ef9c5151499b 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -908,6 +908,7 @@ impl Block { pub fn add_gc_obj_offset(self: &mut Block, ptr_offset: u32) { self.gc_obj_offsets.push(ptr_offset); + incr_counter!(num_gc_obj_refs); } /// Instantiate a new CmeDependency struct and add it to the list of From c2fdec93a9d533e9e9eaabd96b6bf5210b211abf Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 7 Jun 2022 16:57:16 -0400 Subject: [PATCH 315/546] First pass at porting gen_entry_prologue() --- yjit/src/codegen.rs | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index d6f8b34596962e..6584e0d1272edb 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -610,30 +610,32 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O let old_write_pos = cb.get_write_pos(); + // TODO: figure out if this is actually beneficial for performance // Align the current write position to cache line boundaries cb.align_pos(64); let code_ptr = cb.get_write_ptr(); add_comment(cb, "yjit entry"); - push(cb, REG_CFP); - push(cb, REG_EC); - push(cb, REG_SP); + let mut asm = Assembler::new(); + + // FIXME + //push(cb, REG_CFP); + //push(cb, REG_EC); + //push(cb, REG_SP); // We are passed EC and CFP - mov(cb, REG_EC, C_ARG_REGS[0]); - mov(cb, REG_CFP, C_ARG_REGS[1]); + asm.mov(EC, C_ARG_REGS[0].into()); + asm.mov(CFP, C_ARG_REGS[1].into()); // Load the current SP from the CFP into REG_SP - mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP)); + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); // Setup cfp->jit_return - mov( - cb, - REG0, - code_ptr_opnd(CodegenGlobals::get_leave_exit_code()), + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), + Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr()), ); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0); // We're compiling iseqs that we *expect* to start at `insn_idx`. But in // the case of optional parameters, the interpreter can set the pc to a @@ -642,9 +644,15 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O // compiled for is the same PC that the interpreter wants us to run with. // If they don't match, then we'll take a side exit. if unsafe { get_iseq_flags_has_opt(iseq) } { - gen_pc_guard(cb, iseq, insn_idx); + + // FIXME + todo!(); + + //gen_pc_guard(cb, iseq, insn_idx); } + asm.compile(cb); + // Verify MAX_PROLOGUE_SIZE assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE); From 71770ceee5c515d97dad1a0088008561106d141d Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 8 Jun 2022 10:30:10 -0400 Subject: [PATCH 316/546] Map comments in backend --- yjit/src/backend/x86_64/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index d0f57d908fef54..2d425c2fe06cb2 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -101,8 +101,11 @@ impl Assembler // For each instruction for insn in &self.insns { match insn.op { - // TODO: need to map the position of comments in the machine code - Op::Comment => {}, + Op::Comment => { + if cfg!(feature = "asm_comments") { + cb.add_comment(&insn.text.as_ref().unwrap()); + } + }, Op::Label => {}, From b63f8bb45619c891ce45466031012c0a48defefe Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 7 Jun 2022 14:20:43 -0400 Subject: [PATCH 317/546] LDUR (https://github.com/Shopify/ruby/pull/295) * LDUR * Fix up immediate masking * Consume operands directly * Consistency and cleanup * More consistency and entrypoints * Cleaner syntax for masks * Cleaner shifting for encodings --- .../src/asm/arm64/inst/branches_and_system.rs | 28 ++-- .../arm64/inst/data_processing_immediate.rs | 143 +++++++--------- .../arm64/inst/data_processing_register.rs | 154 ++++++++---------- yjit/src/asm/arm64/inst/loads_and_stores.rs | 99 +++++++++++ yjit/src/asm/arm64/inst/mod.rs | 121 ++++++++++++-- yjit/src/asm/arm64/opnd.rs | 40 ++++- 6 files changed, 379 insertions(+), 206 deletions(-) create mode 100644 yjit/src/asm/arm64/inst/loads_and_stores.rs diff --git a/yjit/src/asm/arm64/inst/branches_and_system.rs b/yjit/src/asm/arm64/inst/branches_and_system.rs index 6eece11b880605..77e99c112ae64b 100644 --- a/yjit/src/asm/arm64/inst/branches_and_system.rs +++ b/yjit/src/asm/arm64/inst/branches_and_system.rs @@ -1,8 +1,4 @@ -use super::{ - super::opnd::*, - family::Family, - sf::Sf -}; +use super::family::Family; /// The struct that represents an A64 branches and system instruction that can /// be encoded. @@ -14,7 +10,7 @@ use super::{ /// | rn.............. rm.............. | /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// -struct BranchesAndSystem { +pub struct BranchesAndSystem { /// The register holding the address to be branched to. rn: u8 } @@ -22,28 +18,24 @@ struct BranchesAndSystem { impl BranchesAndSystem { /// RET /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en - pub fn ret(rn: &A64Opnd) -> Self { - match rn { - A64Opnd::None => BranchesAndSystem { rn: 30 }, - A64Opnd::Reg(reg) => BranchesAndSystem { rn: reg.reg_no }, - _ => panic!("Invalid operand for RET") - } + pub fn ret(rn: u8) -> Self { + Self { rn } } } impl From for u32 { - /// Convert a data processing instruction into a 32-bit value. + /// Convert an instruction into a 32-bit value. fn from(inst: BranchesAndSystem) -> Self { 0 | (0b11 << 30) - | (Family::BranchesAndSystem as u32).wrapping_shl(25) + | ((Family::BranchesAndSystem as u32) << 25) | (0b1001011111 << 16) - | (inst.rn as u32).wrapping_shl(5) + | ((inst.rn as u32) << 5) } } impl From for [u8; 4] { - /// Convert a data processing instruction into a 4 byte array. + /// Convert an instruction into a 4 byte array. fn from(inst: BranchesAndSystem) -> [u8; 4] { let result: u32 = inst.into(); result.to_le_bytes() @@ -56,14 +48,14 @@ mod tests { #[test] fn test_ret() { - let inst = BranchesAndSystem::ret(&A64Opnd::None); + let inst = BranchesAndSystem::ret(30); let result: u32 = inst.into(); assert_eq!(0xd65f03C0, result); } #[test] fn test_ret_rn() { - let inst = BranchesAndSystem::ret(&X20); + let inst = BranchesAndSystem::ret(20); let result: u32 = inst.into(); assert_eq!(0xd65f0280, result); } diff --git a/yjit/src/asm/arm64/inst/data_processing_immediate.rs b/yjit/src/asm/arm64/inst/data_processing_immediate.rs index 12498848b284bd..25117efc22c762 100644 --- a/yjit/src/asm/arm64/inst/data_processing_immediate.rs +++ b/yjit/src/asm/arm64/inst/data_processing_immediate.rs @@ -1,8 +1,4 @@ -use super::{ - super::opnd::*, - family::Family, - sf::Sf -}; +use super::{family::Family, sf::Sf}; /// The operation being performed by this instruction. enum Op { @@ -33,127 +29,106 @@ enum Shift { /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// pub struct DataProcessingImmediate { - /// Whether or not this instruction is operating on 64-bit operands. - sf: Sf, + /// The register number of the destination register. + rd: u8, - /// The opcode for this instruction. - op: Op, + /// The register number of the first operand register. + rn: u8, - /// Whether or not to update the flags when this instruction is performed. - s: S, + /// The value of the immediate. + imm12: u16, /// How much to shift the immediate by. shift: Shift, - /// The value of the immediate. - imm12: u16, + /// Whether or not to update the flags when this instruction is performed. + s: S, - /// The register number of the first operand register. - rn: u8, + /// The opcode for this instruction. + op: Op, - /// The register number of the destination register. - rd: u8 + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf } impl DataProcessingImmediate { /// ADD (immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en - pub fn add(rd: &A64Opnd, rn: &A64Opnd, imm12: &A64Opnd) -> Self { - let (rd, rn, imm12) = Self::unwrap(rd, rn, imm12); - + pub fn add(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { Self { - sf: rd.num_bits.into(), - op: Op::Add, - s: S::LeaveFlags, + rd, + rn, + imm12, shift: Shift::LSL0, - imm12: imm12.value as u16, - rn: rn.reg_no, - rd: rd.reg_no + s: S::LeaveFlags, + op: Op::Add, + sf: num_bits.into() } } /// ADDS (immediate, set flags) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en - pub fn adds(rd: &A64Opnd, rn: &A64Opnd, imm12: &A64Opnd) -> Self { - let (rd, rn, imm12) = Self::unwrap(rd, rn, imm12); - + pub fn adds(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { Self { - sf: rd.num_bits.into(), - op: Op::Add, - s: S::UpdateFlags, + rd, + rn, + imm12, shift: Shift::LSL0, - imm12: imm12.value as u16, - rn: rn.reg_no, - rd: rd.reg_no + s: S::UpdateFlags, + op: Op::Add, + sf: num_bits.into() } } /// SUB (immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en - pub fn sub(rd: &A64Opnd, rn: &A64Opnd, imm12: &A64Opnd) -> Self { - let (rd, rn, imm12) = Self::unwrap(rd, rn, imm12); - + pub fn sub(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { Self { - sf: rd.num_bits.into(), - op: Op::Sub, - s: S::LeaveFlags, + rd, + rn, + imm12, shift: Shift::LSL0, - imm12: imm12.value as u16, - rn: rn.reg_no, - rd: rd.reg_no + s: S::LeaveFlags, + op: Op::Sub, + sf: num_bits.into() } } /// SUBS (immediate, set flags) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en - pub fn subs(rd: &A64Opnd, rn: &A64Opnd, imm12: &A64Opnd) -> Self { - let (rd, rn, imm12) = Self::unwrap(rd, rn, imm12); - + pub fn subs(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { Self { - sf: rd.num_bits.into(), - op: Op::Sub, - s: S::UpdateFlags, + rd, + rn, + imm12, shift: Shift::LSL0, - imm12: imm12.value as u16, - rn: rn.reg_no, - rd: rd.reg_no - } - } - - /// Extract out two registers and an immediate from the given operands. - /// Panic if any of the operands do not match the expected type or size. - fn unwrap<'a>(rd: &'a A64Opnd, rn: &'a A64Opnd, imm12: &'a A64Opnd) -> (&'a A64Reg, &'a A64Reg, &'a A64UImm) { - match (rd, rn, imm12) { - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { - assert!(rd.num_bits == rn.num_bits, "Both rd and rn operands to a data processing immediate instruction must be of the same size."); - assert!(imm12.num_bits <= 12, "The immediate operand to a data processing immediate instruction must be 12 bits or less."); - (rd, rn, imm12) - }, - _ => { - panic!("Expected 2 register operands and an immediate operand for a data processing immediate instruction."); - } + s: S::UpdateFlags, + op: Op::Sub, + sf: num_bits.into() } } } impl From for u32 { - /// Convert a data processing instruction into a 32-bit value. + /// Convert an instruction into a 32-bit value. fn from(inst: DataProcessingImmediate) -> Self { + let imm12 = (inst.imm12 as u32) & ((1 << 12) - 1); + 0 - | (inst.sf as u32).wrapping_shl(31) - | (inst.op as u32).wrapping_shl(30) - | (inst.s as u32).wrapping_shl(29) - | (Family::DataProcessingImmediate as u32).wrapping_shl(25) - | (0b1 << 24) - | (inst.shift as u32).wrapping_shl(22) - | (inst.imm12 as u32).wrapping_shl(10) - | (inst.rn as u32).wrapping_shl(5) + | ((inst.sf as u32) << 31) + | ((inst.op as u32) << 30) + | ((inst.s as u32) << 29) + | ((Family::DataProcessingImmediate as u32) << 25) + | (1 << 24) + | ((inst.shift as u32) << 22) + | (imm12 << 10) + | ((inst.rn as u32) << 5) | inst.rd as u32 } } impl From for [u8; 4] { - /// Convert a data processing instruction into a 4 byte array. + /// Convert an instruction into a 4 byte array. fn from(inst: DataProcessingImmediate) -> [u8; 4] { let result: u32 = inst.into(); result.to_le_bytes() @@ -166,32 +141,28 @@ mod tests { #[test] fn test_add() { - let uimm12 = A64Opnd::new_uimm(7); - let inst = DataProcessingImmediate::add(&X0, &X1, &uimm12); + let inst = DataProcessingImmediate::add(0, 1, 7, 64); let result: u32 = inst.into(); assert_eq!(0x91001c20, result); } #[test] fn test_adds() { - let uimm12 = A64Opnd::new_uimm(7); - let inst = DataProcessingImmediate::adds(&X0, &X1, &uimm12); + let inst = DataProcessingImmediate::adds(0, 1, 7, 64); let result: u32 = inst.into(); assert_eq!(0xb1001c20, result); } #[test] fn test_sub() { - let uimm12 = A64Opnd::new_uimm(7); - let inst = DataProcessingImmediate::sub(&X0, &X1, &uimm12); + let inst = DataProcessingImmediate::sub(0, 1, 7, 64); let result: u32 = inst.into(); assert_eq!(0xd1001c20, result); } #[test] fn test_subs() { - let uimm12 = A64Opnd::new_uimm(7); - let inst = DataProcessingImmediate::subs(&X0, &X1, &uimm12); + let inst = DataProcessingImmediate::subs(0, 1, 7, 64); let result: u32 = inst.into(); assert_eq!(0xf1001c20, result); } diff --git a/yjit/src/asm/arm64/inst/data_processing_register.rs b/yjit/src/asm/arm64/inst/data_processing_register.rs index 6203034e3fea26..7e9f37ab8e7382 100644 --- a/yjit/src/asm/arm64/inst/data_processing_register.rs +++ b/yjit/src/asm/arm64/inst/data_processing_register.rs @@ -1,8 +1,4 @@ -use super::{ - super::opnd::*, - family::Family, - sf::Sf -}; +use super::{family::Family, sf::Sf}; /// The operation being performed by this instruction. enum Op { @@ -34,134 +30,114 @@ enum Shift { /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// pub struct DataProcessingRegister { - /// Whether or not this instruction is operating on 64-bit operands. - sf: Sf, - - /// The opcode for this instruction. - op: Op, + /// The register number of the destination register. + rd: u8, - /// Whether or not to update the flags when this instruction is performed. - s: S, + /// The register number of the first operand register. + rn: u8, - /// The type of shift to perform on the second operand register. - shift: Shift, + /// The amount to shift the second operand register by. + imm6: u8, /// The register number of the second operand register. rm: u8, - /// The amount to shift the second operand register by. - imm6: u8, + /// The type of shift to perform on the second operand register. + shift: Shift, - /// The register number of the first operand register. - rn: u8, + /// Whether or not to update the flags when this instruction is performed. + s: S, - /// The register number of the destination register. - rd: u8 + /// The opcode for this instruction. + op: Op, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf } impl DataProcessingRegister { /// ADD (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en - pub fn add(rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) -> Self { - let (rd, rn, rm) = Self::unwrap(rd, rn, rm); - + pub fn add(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { - sf: rd.num_bits.into(), - op: Op::Add, - s: S::LeaveFlags, - shift: Shift::LSL, - rm: rm.reg_no, + rd, + rn, imm6: 0, - rn: rn.reg_no, - rd: rd.reg_no + rm, + shift: Shift::LSL, + s: S::LeaveFlags, + op: Op::Add, + sf: num_bits.into() } } /// ADDS (shifted register, set flags) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--shifted-register---Add--shifted-register---setting-flags-?lang=en - pub fn adds(rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) -> Self { - let (rd, rn, rm) = Self::unwrap(rd, rn, rm); - + pub fn adds(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { - sf: rd.num_bits.into(), - op: Op::Add, - s: S::UpdateFlags, - shift: Shift::LSL, - rm: rm.reg_no, + rd, + rn, imm6: 0, - rn: rn.reg_no, - rd: rd.reg_no + rm, + shift: Shift::LSL, + s: S::UpdateFlags, + op: Op::Add, + sf: num_bits.into() } } /// SUB (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en - pub fn sub(rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) -> Self { - let (rd, rn, rm) = Self::unwrap(rd, rn, rm); - + pub fn sub(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { - sf: rd.num_bits.into(), - op: Op::Sub, - s: S::LeaveFlags, - shift: Shift::LSL, - rm: rm.reg_no, + rd, + rn, imm6: 0, - rn: rn.reg_no, - rd: rd.reg_no + rm, + shift: Shift::LSL, + s: S::LeaveFlags, + op: Op::Sub, + sf: num_bits.into() } } /// SUBS (shifted register, set flags) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--shifted-register---Subtract--shifted-register---setting-flags-?lang=en - pub fn subs(rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) -> Self { - let (rd, rn, rm) = Self::unwrap(rd, rn, rm); - + pub fn subs(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { Self { - sf: rd.num_bits.into(), - op: Op::Sub, - s: S::UpdateFlags, - shift: Shift::LSL, - rm: rm.reg_no, + rd, + rn, imm6: 0, - rn: rn.reg_no, - rd: rd.reg_no - } - } - - /// Extract out three registers from the given operands. Panic if any of the - /// operands are not registers or if they are not the same size. - fn unwrap<'a>(rd: &'a A64Opnd, rn: &'a A64Opnd, rm: &'a A64Opnd) -> (&'a A64Reg, &'a A64Reg, &'a A64Reg) { - match (rd, rn, rm) { - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { - assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "All operands to a data processing register instruction must be of the same size."); - (rd, rn, rm) - }, - _ => { - panic!("Expected 3 register operands for a data processing register instruction."); - } + rm, + shift: Shift::LSL, + s: S::UpdateFlags, + op: Op::Sub, + sf: num_bits.into() } } } impl From for u32 { - /// Convert a data processing instruction into a 32-bit value. + /// Convert an instruction into a 32-bit value. fn from(inst: DataProcessingRegister) -> Self { + let imm6 = (inst.imm6 as u32) & ((1 << 6) - 1); + 0 - | (inst.sf as u32).wrapping_shl(31) - | (inst.op as u32).wrapping_shl(30) - | (inst.s as u32).wrapping_shl(29) - | (Family::DataProcessingRegister as u32).wrapping_shl(25) - | (0b1 << 24) - | (inst.shift as u32).wrapping_shl(22) - | (inst.rm as u32).wrapping_shl(16) - | (inst.imm6 as u32).wrapping_shl(10) - | (inst.rn as u32).wrapping_shl(5) + | ((inst.sf as u32) << 31) + | ((inst.op as u32) << 30) + | ((inst.s as u32) << 29) + | ((Family::DataProcessingRegister as u32) << 25) + | (1 << 24) + | ((inst.shift as u32) << 22) + | ((inst.rm as u32) << 16) + | (imm6 << 10) + | ((inst.rn as u32) << 5) | inst.rd as u32 } } impl From for [u8; 4] { - /// Convert a data processing instruction into a 4 byte array. + /// Convert an instruction into a 4 byte array. fn from(inst: DataProcessingRegister) -> [u8; 4] { let result: u32 = inst.into(); result.to_le_bytes() @@ -174,28 +150,28 @@ mod tests { #[test] fn test_add() { - let inst = DataProcessingRegister::add(&X0, &X1, &X2); + let inst = DataProcessingRegister::add(0, 1, 2, 64); let result: u32 = inst.into(); assert_eq!(0x8b020020, result); } #[test] fn test_adds() { - let inst = DataProcessingRegister::adds(&X0, &X1, &X2); + let inst = DataProcessingRegister::adds(0, 1, 2, 64); let result: u32 = inst.into(); assert_eq!(0xab020020, result); } #[test] fn test_sub() { - let inst = DataProcessingRegister::sub(&X0, &X1, &X2); + let inst = DataProcessingRegister::sub(0, 1, 2, 64); let result: u32 = inst.into(); assert_eq!(0xcb020020, result); } #[test] fn test_subs() { - let inst = DataProcessingRegister::subs(&X0, &X1, &X2); + let inst = DataProcessingRegister::subs(0, 1, 2, 64); let result: u32 = inst.into(); assert_eq!(0xeb020020, result); } diff --git a/yjit/src/asm/arm64/inst/loads_and_stores.rs b/yjit/src/asm/arm64/inst/loads_and_stores.rs new file mode 100644 index 00000000000000..5fb8b7a6fbf45a --- /dev/null +++ b/yjit/src/asm/arm64/inst/loads_and_stores.rs @@ -0,0 +1,99 @@ +use super::family::Family; + +/// The size of the operands being operated on. +enum Size { + Size32 = 0b10, + Size64 = 0b11, +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 data processing -- immediate instruction +/// that can be encoded. +/// +/// LDUR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 0 1 0 0 0 | +/// | size. imm9.......................... rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadsAndStores { + /// The number of the register to load the value into. + rt: u8, + + /// The base register with which to form the address. + rn: u8, + + /// The optional signed immediate byte offset from the base register. + imm9: i16, + + /// The size of the operands being operated on. + size: Size +} + +impl LoadsAndStores { + /// LDUR (load register, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en + pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { + rt, + rn, + imm9, + size: num_bits.into() + } + } +} + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadsAndStores) -> Self { + let imm9 = (inst.imm9 as u32) & ((1 << 9) - 1); + + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | ((Family::LoadsAndStores as u32) << 25) + | (1 << 22) + | (imm9 << 12) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadsAndStores) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldur() { + let inst = LoadsAndStores::ldur(0, 1, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8400020, result); + } + + #[test] + fn test_ldur_with_imm() { + let inst = LoadsAndStores::ldur(0, 1, 123, 64); + let result: u32 = inst.into(); + assert_eq!(0xf847b020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index becf4251bd016d..9c5b53b0aca3b0 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -2,31 +2,130 @@ mod branches_and_system; mod data_processing_immediate; mod data_processing_register; mod family; +mod loads_and_stores; mod sf; +use branches_and_system::BranchesAndSystem; use data_processing_immediate::DataProcessingImmediate; use data_processing_register::DataProcessingRegister; +use loads_and_stores::LoadsAndStores; -use crate::asm::CodeBlock; +use crate::asm::{CodeBlock, imm_num_bits}; use super::opnd::*; /// ADD -pub fn add(cb: &mut CodeBlock, rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) { - let bytes: [u8; 4] = match rm { - A64Opnd::UImm(_) => DataProcessingImmediate::add(rd, rn, rm).into(), - A64Opnd::Reg(_) => DataProcessingRegister::add(rd, rn, rm).into(), - _ => panic!("Invalid operand combination to add.") +pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataProcessingRegister::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); + + DataProcessingImmediate::add(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to add instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ADDS +pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataProcessingRegister::adds(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); + + DataProcessingImmediate::adds(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to adds instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDUR +pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(imm_num_bits(rn.disp.into()) <= 9, "Expected displacement to be 9 bits or less"); + + LoadsAndStores::ldur(rt.reg_no, rn.base_reg_no, rn.disp.try_into().unwrap(), rt.num_bits).into() + }, + _ => panic!("Invalid operands for LDUR") }; cb.write_bytes(&bytes); } /// SUB -pub fn sub(cb: &mut CodeBlock, rd: &A64Opnd, rn: &A64Opnd, rm: &A64Opnd) { - let bytes: [u8; 4] = match rm { - A64Opnd::UImm(_) => DataProcessingImmediate::sub(rd, rn, rm).into(), - A64Opnd::Reg(_) => DataProcessingRegister::sub(rd, rn, rm).into(), - _ => panic!("Invalid operand combination to add.") +pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataProcessingRegister::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); + + DataProcessingImmediate::sub(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to sub instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// SUBS +pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataProcessingRegister::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); + + DataProcessingImmediate::subs(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to subs instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// RET +pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::None => BranchesAndSystem::ret(30).into(), + A64Opnd::Reg(reg) => BranchesAndSystem::ret(reg.reg_no).into(), + _ => panic!("Invalid operand for RET") }; cb.write_bytes(&bytes); diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index 3f86a89fd5ec45..ba8ecd166dcbb4 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -13,7 +13,7 @@ pub struct A64Imm impl A64Imm { pub fn new(value: i64) -> Self { - A64Imm { num_bits: imm_num_bits(value), value } + Self { num_bits: imm_num_bits(value), value } } } @@ -30,7 +30,7 @@ pub struct A64UImm impl A64UImm { pub fn new(value: u64) -> Self { - A64UImm { num_bits: uimm_num_bits(value), value } + Self { num_bits: uimm_num_bits(value), value } } } @@ -45,6 +45,34 @@ pub struct A64Reg pub reg_no: u8, } +#[derive(Clone, Copy, Debug)] +pub struct A64Mem +{ + // Size in bits + pub num_bits: u8, + + /// Base register number + pub base_reg_no: u8, + + /// Constant displacement from the base, not scaled + pub disp: i32, +} + +impl A64Mem { + pub fn new(reg: A64Opnd, disp: i32) -> Self { + match reg { + A64Opnd::Reg(reg) => { + Self { + num_bits: reg.num_bits, + base_reg_no: reg.reg_no, + disp + } + }, + _ => panic!("Expected register operand") + } + } +} + #[derive(Clone, Copy, Debug)] pub enum A64Opnd { @@ -59,6 +87,9 @@ pub enum A64Opnd // Register Reg(A64Reg), + + // Memory + Mem(A64Mem) } impl A64Opnd { @@ -72,6 +103,11 @@ impl A64Opnd { A64Opnd::UImm(A64UImm::new(value)) } + /// Creates a new memory operand. + pub fn new_mem(reg: A64Opnd, disp: i32) -> Self { + A64Opnd::Mem(A64Mem::new(reg, disp)) + } + /// Convenience function to check if this operand is a register. pub fn is_reg(&self) -> bool { match self { From 77383b3958a90c3e6c257e3c4431fed54a9de10b Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 8 Jun 2022 15:01:13 -0400 Subject: [PATCH 318/546] Add conditional jumps --- yjit/src/backend/ir.rs | 29 +++++++++++++++++------------ yjit/src/codegen.rs | 25 ++++--------------------- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 09ce6b4d6c8991..e5bcd78932157f 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -78,8 +78,9 @@ pub enum Op Cmp, // Low-level conditional jump instructions - Jnz, Jbe, + Je, + Jnz, // C function call with N arguments (variadic) CCall, @@ -636,17 +637,6 @@ impl fmt::Debug for Assembler { impl Assembler { - // Jump if not zero - pub fn jnz(&mut self, target: Target) - { - self.push_insn(Op::Jnz, vec![], Some(target)); - } - - pub fn jbe(&mut self, target: Target) - { - self.push_insn(Op::Jbe, vec![], Some(target)); - } - pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { let target = Target::FunPtr(fptr); @@ -654,6 +644,18 @@ impl Assembler } } +macro_rules! def_push_jcc { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + pub fn $op_name(&mut self, target: Target) + { + self.push_insn($opcode, vec![], Some(target)); + } + } + }; +} + macro_rules! def_push_1_opnd { ($op_name:ident, $opcode:expr) => { impl Assembler @@ -702,6 +704,9 @@ macro_rules! def_push_2_opnd_no_out { }; } +def_push_jcc!(je, Op::Je); +def_push_jcc!(jbe, Op::Jbe); +def_push_jcc!(jnz, Op::Jnz); def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 6584e0d1272edb..59c6773fcc3f60 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -142,24 +142,6 @@ pub fn jit_get_arg(jit: &JITState, arg_idx: isize) -> VALUE { unsafe { *(jit.pc.offset(arg_idx + 1)) } } -/* -// Load a VALUE into a register and keep track of the reference if it is on the GC heap. -pub fn jit_mov_gc_ptr(jit: &mut JITState, cb: &mut CodeBlock, reg: X86Opnd, ptr: VALUE) { - assert!(matches!(reg, X86Opnd::Reg(_))); - assert!(reg.num_bits() == 64); - - // Load the pointer constant into the specified register - mov(cb, reg, const_ptr_opnd(ptr.as_ptr())); - - // The pointer immediate is encoded as the last part of the mov written out - let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - - if !ptr.special_const_p() { - jit.add_gc_obj_offset(ptr_offset); - } -} -*/ - // Get the index of the next instruction fn jit_next_insn_idx(jit: &JITState) -> u32 { jit.insn_idx + insn_len(jit.get_opcode()) @@ -523,11 +505,10 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) { // When a function with optional parameters is called, the entry // PC for the method isn't necessarily 0. fn gen_pc_guard(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) { - //RUBY_ASSERT(cb != NULL); - let pc_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC); let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; let expected_pc_opnd = const_ptr_opnd(expected_pc as *const u8); + mov(cb, REG0, pc_opnd); mov(cb, REG1, expected_pc_opnd); cmp(cb, REG0, REG1); @@ -619,12 +600,14 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O let mut asm = Assembler::new(); + // TODO: on arm, we need to push the return address here? + // FIXME //push(cb, REG_CFP); //push(cb, REG_EC); //push(cb, REG_SP); - // We are passed EC and CFP + // We are passed EC and CFP as arguments asm.mov(EC, C_ARG_REGS[0].into()); asm.mov(CFP, C_ARG_REGS[1].into()); From ea9abe547da383f30bd0afe73c6693ed1ff68765 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 8 Jun 2022 16:09:16 -0400 Subject: [PATCH 319/546] Add cpush and cpop IR instructions --- yjit/src/backend/ir.rs | 6 +++ yjit/src/backend/x86_64/mod.rs | 20 +++++---- yjit/src/codegen.rs | 74 +++++++++++++++++----------------- 3 files changed, 54 insertions(+), 46 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index e5bcd78932157f..514ac4a67e4042 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -82,6 +82,10 @@ pub enum Op Je, Jnz, + // Push and pop registers to/from the C stack + CPush, + CPop, + // C function call with N arguments (variadic) CCall, @@ -710,6 +714,8 @@ def_push_jcc!(jnz, Op::Jnz); def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); +def_push_1_opnd_no_out!(cpush, Op::CPush); +def_push_1_opnd_no_out!(cpop, Op::CPop); def_push_1_opnd_no_out!(cret, Op::CRet); def_push_1_opnd!(load, Op::Load); def_push_1_opnd!(lea, Op::Lea); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 2d425c2fe06cb2..a40bc2a980d1c1 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -136,14 +136,9 @@ impl Assembler // Load effective address Op::Lea => lea(cb, insn.out.into(), insn.opnds[0].into()), - // Test and set flags - Op::Test => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), - - /* - Cmp, - Jnz, - Jbe, - */ + // Push and pop to the C stack + Op::CPush => push(cb, insn.opnds[0].into()), + Op::CPop => pop(cb, insn.opnds[0].into()), // C function call Op::CCall => { @@ -165,6 +160,15 @@ impl Assembler ret(cb); } + // Test and set flags + Op::Test => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), + + /* + Cmp, + Jnz, + Jbe, + */ + _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn.op) }; } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 59c6773fcc3f60..be051c39a6d7b9 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -499,38 +499,6 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) { } } -// Generate a runtime guard that ensures the PC is at the expected -// instruction index in the iseq, otherwise takes a side-exit. -// This is to handle the situation of optional parameters. -// When a function with optional parameters is called, the entry -// PC for the method isn't necessarily 0. -fn gen_pc_guard(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) { - let pc_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC); - let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; - let expected_pc_opnd = const_ptr_opnd(expected_pc as *const u8); - - mov(cb, REG0, pc_opnd); - mov(cb, REG1, expected_pc_opnd); - cmp(cb, REG0, REG1); - - let pc_match = cb.new_label("pc_match".to_string()); - je_label(cb, pc_match); - - // We're not starting at the first PC, so we need to exit. - gen_counter_incr!(cb, leave_start_pc_non_zero); - - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); - - mov(cb, RAX, imm_opnd(Qundef.into())); - ret(cb); - - // PC should match the expected insn_idx - cb.write_label(pc_match); - cb.link_labels(); -} - // Landing code for when c_return tracing is enabled. See full_cfunc_return(). fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { let cb = ocb.unwrap(); @@ -579,6 +547,38 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { return code_ptr; } +// Generate a runtime guard that ensures the PC is at the expected +// instruction index in the iseq, otherwise takes a side-exit. +// This is to handle the situation of optional parameters. +// When a function with optional parameters is called, the entry +// PC for the method isn't necessarily 0. +fn gen_pc_guard(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) { + let pc_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC); + let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + let expected_pc_opnd = const_ptr_opnd(expected_pc as *const u8); + + mov(cb, REG0, pc_opnd); + mov(cb, REG1, expected_pc_opnd); + cmp(cb, REG0, REG1); + + let pc_match = cb.new_label("pc_match".to_string()); + je_label(cb, pc_match); + + // We're not starting at the first PC, so we need to exit. + gen_counter_incr!(cb, leave_start_pc_non_zero); + + pop(cb, REG_SP); + pop(cb, REG_EC); + pop(cb, REG_CFP); + + mov(cb, RAX, imm_opnd(Qundef.into())); + ret(cb); + + // PC should match the expected insn_idx + cb.write_label(pc_match); + cb.link_labels(); +} + /// Compile an interpreter entry block to be inserted into an iseq /// Returns None if compilation fails. pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option { @@ -600,12 +600,10 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O let mut asm = Assembler::new(); - // TODO: on arm, we need to push the return address here? - - // FIXME - //push(cb, REG_CFP); - //push(cb, REG_EC); - //push(cb, REG_SP); + // Save the CFP, EC, SP registers to the C stack + asm.cpush(CFP); + asm.cpush(EC); + asm.cpush(SP); // We are passed EC and CFP as arguments asm.mov(EC, C_ARG_REGS[0].into()); From d75c346c1cb5d67fd4c6582274a3ff4f1450af15 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 9 Jun 2022 15:40:27 -0400 Subject: [PATCH 320/546] Port gen_leave_exit(), add support for labels to backend --- yjit/src/asm/mod.rs | 6 +-- yjit/src/backend/ir.rs | 68 +++++++++++++++++++++++++++------- yjit/src/backend/x86_64/mod.rs | 47 ++++++++++++++++------- yjit/src/codegen.rs | 38 ++++++++----------- 4 files changed, 106 insertions(+), 53 deletions(-) diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 751f9fce0baf4e..9f518398b78979 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -214,16 +214,12 @@ impl CodeBlock { /// Write a label at the current address pub fn write_label(&mut self, label_idx: usize) { - // TODO: make sure that label_idx is valid - // TODO: add an asseer here - self.label_addrs[label_idx] = self.write_pos; } // Add a label reference at the current write position pub fn label_ref(&mut self, label_idx: usize) { - // TODO: make sure that label_idx is valid - // TODO: add an asseer here + assert!(label_idx < self.label_addrs.len()); // Keep track of the reference self.label_refs.push(LabelRef { diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 514ac4a67e4042..63bf85f3a0b596 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -256,6 +256,13 @@ impl Opnd } } +impl From for Opnd { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + Opnd::UImm(uimm as u64) + } +} + /// NOTE: this is useful during the port but can probably be removed once /// Context returns ir::Opnd instead of X86Opnd /// @@ -290,13 +297,22 @@ impl From for Opnd { /// Branch target (something that we can jump to) /// for branch instructions -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum Target { CodePtr(CodePtr), // Pointer to a piece of YJIT-generated code (e.g. side-exit) FunPtr(*const u8), // Pointer to a C function - LabelName(String), // A label without an index in the output - LabelIdx(usize), // A label that has been indexed + Label(usize), // A label within the generated code +} + +impl Target +{ + pub fn unwrap_label_idx(&self) -> usize { + match self { + Target::Label(idx) => *idx, + _ => unreachable!() + } + } } /// YJIT IR instruction @@ -332,6 +348,9 @@ pub struct Assembler /// Parallel vec with insns /// Index of the last insn using the output of this insn pub(super) live_ranges: Vec, + + /// Names of labels + pub(super) label_names: Vec, } impl Assembler @@ -340,6 +359,7 @@ impl Assembler Assembler { insns: Vec::default(), live_ranges: Vec::default(), + label_names: Vec::default(), } } @@ -387,30 +407,42 @@ impl Assembler self.live_ranges.push(self.insns.len()); } + /// Create a new label instance that we can jump to + pub fn new_label(&mut self, name: &str) -> Target + { + let label_idx = self.label_names.len(); + dbg!(label_idx); + + self.label_names.push(name.to_string()); + Target::Label(label_idx) + } + /// Add a label at the current position - pub fn label(&mut self, name: &str) -> Target + pub fn write_label(&mut self, label: Target) { - let insn_idx = self.insns.len(); + assert!(label.unwrap_label_idx() < self.label_names.len()); let insn = Insn { op: Op::Label, - text: Some(name.to_owned()), + text: None, opnds: vec![], out: Opnd::None, - target: None, + target: Some(label), pos: None }; self.insns.push(insn); self.live_ranges.push(self.insns.len()); - - Target::LabelIdx(insn_idx) } /// Transform input instructions, consumes the input assembler pub(super) fn transform_insns(mut self, mut map_insn: F) -> Assembler where F: FnMut(&mut Assembler, usize, Op, Vec, Option) { - let mut asm = Assembler::new(); + let mut asm = Assembler { + insns: Vec::default(), + live_ranges: Vec::default(), + label_names: self.label_names, + }; // indices maps from the old instruction index to the new instruction // index. @@ -435,9 +467,6 @@ impl Assembler Op::Comment => { asm.comment(insn.text.unwrap().as_str()); }, - Op::Label => { - asm.label(insn.text.unwrap().as_str()); - }, _ => { map_insn(&mut asm, index, insn.op, opnds, insn.target); } @@ -931,4 +960,17 @@ mod tests { asm.compile_with_regs(&mut cb, regs); } + + #[test] + fn test_jcc_label() + { + let (mut asm, mut cb, regs) = setup_asm(1); + + let label = asm.new_label("foo"); + asm.cmp(EC, EC); + asm.je(label); + asm.write_label(label); + + asm.compile_with_regs(&mut cb, regs); + } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index a40bc2a980d1c1..467a347b01f801 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -107,11 +107,12 @@ impl Assembler } }, - Op::Label => {}, + // Write the label at the current position + Op::Label => { + cb.write_label(insn.target.unwrap().unwrap_label_idx()); + }, Op::Add => { - // FIXME: this fails because insn.out is none sometimes - //assert_eq!(insn.out, insn.opnds[0]); add(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, @@ -160,14 +161,23 @@ impl Assembler ret(cb); } + // Compare + Op::Cmp => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), + // Test and set flags Op::Test => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), - /* - Cmp, - Jnz, - Jbe, - */ + Op::Je => { + match insn.target.unwrap() { + Target::Label(idx) => { + + dbg!(idx); + je_label(cb, idx); + + }, + _ => unimplemented!() + } + } _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn.op) }; @@ -179,10 +189,21 @@ impl Assembler /// Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Vec { - self - .x86_split() - .split_loads() - .alloc_regs(regs) - .x86_emit(cb) + let mut asm = self.x86_split(); + let mut asm = asm.split_loads(); + let mut asm = asm.alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + dbg!("creating label, idx={}", idx); + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let gc_offsets = asm.x86_emit(cb); + + cb.link_labels(); + + gc_offsets } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index be051c39a6d7b9..01c0b7ee85a04d 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -535,8 +535,9 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { // Note, gen_leave() fully reconstructs interpreter state and leaves the // return value in RAX before coming here. + // FIXME // Every exit to the interpreter should be counted - gen_counter_incr!(ocb, leave_interp_return); + //gen_counter_incr!(ocb, leave_interp_return); pop(ocb, REG_SP); pop(ocb, REG_EC); @@ -552,31 +553,28 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { // This is to handle the situation of optional parameters. // When a function with optional parameters is called, the entry // PC for the method isn't necessarily 0. -fn gen_pc_guard(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) { - let pc_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC); +fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) { + let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC); let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; - let expected_pc_opnd = const_ptr_opnd(expected_pc as *const u8); + let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8); - mov(cb, REG0, pc_opnd); - mov(cb, REG1, expected_pc_opnd); - cmp(cb, REG0, REG1); + asm.cmp(pc_opnd, expected_pc_opnd); - let pc_match = cb.new_label("pc_match".to_string()); - je_label(cb, pc_match); + let pc_match = asm.new_label("pc_match"); + asm.je(pc_match); + // FIXME // We're not starting at the first PC, so we need to exit. - gen_counter_incr!(cb, leave_start_pc_non_zero); + //gen_counter_incr!(cb, leave_start_pc_non_zero); - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); + asm.cpop(SP); + asm.cpop(EC); + asm.cpop(CFP); - mov(cb, RAX, imm_opnd(Qundef.into())); - ret(cb); + asm.cret(Qundef.into()); // PC should match the expected insn_idx - cb.write_label(pc_match); - cb.link_labels(); + asm.write_label(pc_match); } /// Compile an interpreter entry block to be inserted into an iseq @@ -625,11 +623,7 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O // compiled for is the same PC that the interpreter wants us to run with. // If they don't match, then we'll take a side exit. if unsafe { get_iseq_flags_has_opt(iseq) } { - - // FIXME - todo!(); - - //gen_pc_guard(cb, iseq, insn_idx); + gen_pc_guard(&mut asm, iseq, insn_idx); } asm.compile(cb); From 0000984fed1be885ad51845477f4e475d1b07fab Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 9 Jun 2022 16:29:55 -0400 Subject: [PATCH 321/546] Port over putnil, putobject, and gen_leave() * Remove x86-64 dependency from codegen.rs * Port over putnil and putobject * Port over gen_leave() * Complete port of gen_leave() * Fix bug in x86 instruction splitting --- yjit/src/asm/x86_64/mod.rs | 2 +- yjit/src/backend/ir.rs | 148 +++++++----------------- yjit/src/backend/x86_64/mod.rs | 34 +++--- yjit/src/codegen.rs | 205 +++++++++++++++++---------------- 4 files changed, 168 insertions(+), 221 deletions(-) diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index fdfac82f921f95..399c2e8c7e821d 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -245,7 +245,7 @@ pub const R13B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType:: pub const R14B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 14 }); pub const R15B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 15 }); -// C argument registers +// C argument registers on this platform pub const C_ARG_REGS: [X86Opnd; 6] = [RDI, RSI, RDX, RCX, R8, R9]; //=========================================================================== diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 63bf85f3a0b596..a077eb1945940f 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -6,7 +6,7 @@ use std::fmt; use std::convert::From; use crate::cruby::{VALUE}; use crate::virtualmem::{CodePtr}; -use crate::asm::{CodeBlock}; +use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; use crate::asm::x86_64::{X86Opnd, X86Imm, X86UImm, X86Reg, X86Mem, RegType}; use crate::core::{Context, Type, TempMapping}; use crate::codegen::{JITState}; @@ -21,6 +21,9 @@ pub const EC: Opnd = _EC; pub const CFP: Opnd = _CFP; pub const SP: Opnd = _SP; +pub const C_ARG_OPNDS: [Opnd; 6] = _C_ARG_OPNDS; +pub const C_RET_OPND: Opnd = _C_RET_OPND; + /// Instruction opcodes #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum Op @@ -77,6 +80,9 @@ pub enum Op // Compare two operands Cmp, + // Unconditional jump which takes an address operand + JmpOpnd, + // Low-level conditional jump instructions Jbe, Je, @@ -92,110 +98,8 @@ pub enum Op // C function return CRet, - /* - // The following are conditional jump instructions. They all accept as their - // first operand an EIR_LABEL_NAME, which is used as the target of the jump. - // - // The OP_JUMP_EQ instruction accepts two additional operands, to be - // compared for equality. If they're equal, then the generated code jumps to - // the target label. If they're not, then it continues on to the next - // instruction. - JumpEq, - - // The OP_JUMP_NE instruction is very similar to the OP_JUMP_EQ instruction, - // except it compares for inequality instead. - JumpNe, - - // Checks the overflow flag and conditionally jumps to the target if it is - // currently set. - JumpOvf, - - // A low-level call instruction for calling a function by a pointer. It - // accepts one operand of type EIR_IMM that should be a pointer to the - // function. Usually this is done by first casting the function to a void*, - // as in: ir_const_ptr((void *)&my_function)). - Call, - - // Calls a function by a pointer and returns an operand that contains the - // result of the function. Accepts as its operands a pointer to a function - // of type EIR_IMM (usually generated from ir_const_ptr) and a variable - // number of arguments to the function being called. - // - // This is the higher-level instruction that should be used when you want to - // call a function with arguments, as opposed to OP_CALL which is - // lower-level and just calls a function without moving arguments into - // registers for you. - CCall, - - // Returns from the function being generated immediately. This is different - // from OP_RETVAL in that it does nothing with the return value register - // (whatever is in there is what will get returned). Accepts no operands. - Ret, - - // First, moves a value into the return value register. Then, returns from - // the generated function. Accepts as its only operand the value that should - // be returned from the generated function. - RetVal, - - // A conditional move instruction that should be preceeded at some point by - // an OP_CMP instruction that would have set the requisite comparison flags. - // Accepts 2 operands, both of which are expected to be of the EIR_REG type. - // - // If the comparison indicates the left compared value is greater than or - // equal to the right compared value, then the conditional move is executed, - // otherwise we just continue on to the next instruction. - // - // This is considered a low-level instruction, and the OP_SELECT_* variants - // should be preferred if possible. - CMovGE, - - // The same as OP_CMOV_GE, except the comparison is greater than. - CMovGT, - - // The same as OP_CMOV_GE, except the comparison is less than or equal. - CMovLE, - - // The same as OP_CMOV_GE, except the comparison is less than. - CMovLT, - - // Selects between two different values based on a comparison of two other - // values. Accepts 4 operands. The first two are the basis of the - // comparison. The second two are the "then" case and the "else" case. You - // can effectively think of this instruction as a ternary operation, where - // the first two values are being compared. - // - // OP_SELECT_GE performs the described ternary using a greater than or equal - // comparison, that is if the first operand is greater than or equal to the - // second operand. - SelectGE, - - // The same as OP_SELECT_GE, except the comparison is greater than. - SelectGT, - - // The same as OP_SELECT_GE, except the comparison is less than or equal. - SelectLE, - - // The same as OP_SELECT_GE, except the comparison is less than. - SelectLT, - - // For later: - // These encode Ruby true/false semantics - // Can be used to enable op fusion of Ruby compare + branch. - // OP_JUMP_TRUE, // (opnd, target) - // OP_JUMP_FALSE, // (opnd, target) - - // For later: - // OP_GUARD_HEAP, // (opnd, target) - // OP_GUARD_IMM, // (opnd, target) - // OP_GUARD_FIXNUM, // (opnd, target) - - // For later: - // OP_COUNTER_INC, (counter_name) - - // For later: - // OP_LEA, - // OP_TEST, - */ + // Trigger a debugger breakpoint + Breakpoint, } // Memory location @@ -256,6 +160,12 @@ impl Opnd } } +impl From for Opnd { + fn from(value: usize) -> Self { + Opnd::UImm(value.try_into().unwrap()) + } +} + impl From for Opnd { fn from(value: VALUE) -> Self { let VALUE(uimm) = value; @@ -522,6 +432,18 @@ impl Assembler let opnd1 = asm.load(opnds[1]); asm.push_insn(op, vec![opnds[0], opnd1], None); }, + + [Opnd::Mem(_), Opnd::UImm(val)] => { + if uimm_num_bits(*val) > 32 { + let opnd1 = asm.load(opnds[1]); + asm.push_insn(op, vec![opnds[0], opnd1], None); + } + else + { + asm.push_insn(op, opnds, target); + } + }, + _ => { asm.push_insn(op, opnds, target); } @@ -609,7 +531,7 @@ impl Assembler // C return values need to be mapped to the C return register if op == Op::CCall { - out_reg = Opnd::Reg(take_reg(&mut pool, ®s, &RET_REG)) + out_reg = Opnd::Reg(take_reg(&mut pool, ®s, &C_RET_REG)) } // If this instruction's first operand maps to a register and @@ -689,6 +611,18 @@ macro_rules! def_push_jcc { }; } +macro_rules! def_push_0_opnd_no_out { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + pub fn $op_name(&mut self) + { + self.push_insn($opcode, vec![], None); + } + } + }; +} + macro_rules! def_push_1_opnd { ($op_name:ident, $opcode:expr) => { impl Assembler @@ -737,6 +671,7 @@ macro_rules! def_push_2_opnd_no_out { }; } +def_push_1_opnd_no_out!(jmp_opnd, Op::JmpOpnd); def_push_jcc!(je, Op::Je); def_push_jcc!(jbe, Op::Jbe); def_push_jcc!(jnz, Op::Jnz); @@ -752,6 +687,7 @@ def_push_2_opnd_no_out!(store, Op::Store); def_push_2_opnd_no_out!(mov, Op::Mov); def_push_2_opnd_no_out!(cmp, Op::Cmp); def_push_2_opnd_no_out!(test, Op::Test); +def_push_0_opnd_no_out!(breakpoint, Op::Breakpoint); // NOTE: these methods are temporary and will likely move // to context.rs later diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 467a347b01f801..748bf5aea04f1e 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -6,7 +6,7 @@ use crate::asm::{CodeBlock}; use crate::asm::x86_64::*; use crate::codegen::{JITState}; use crate::cruby::*; -use crate::backend::ir::*; +use crate::backend::ir::{Assembler, Opnd, Target, Op, Mem}; // Use the x86 register type for this platform pub type Reg = X86Reg; @@ -16,8 +16,19 @@ pub const _CFP: Opnd = Opnd::Reg(R13_REG); pub const _EC: Opnd = Opnd::Reg(R12_REG); pub const _SP: Opnd = Opnd::Reg(RBX_REG); +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(RDI_REG), + Opnd::Reg(RSI_REG), + Opnd::Reg(RDX_REG), + Opnd::Reg(RCX_REG), + Opnd::Reg(R8_REG), + Opnd::Reg(R9_REG) +]; + // C return value register on this platform -pub const RET_REG: Reg = RAX_REG; +pub const C_RET_REG: Reg = RAX_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG); /// Map Opnd to X86Opnd impl From for X86Opnd { @@ -58,7 +69,7 @@ impl Assembler ] } - /// Emit platform-specific machine code + /// Split IR instructions for the x86 platform fn x86_split(mut self) -> Assembler { let live_ranges: Vec = std::mem::take(&mut self.live_ranges); @@ -76,7 +87,8 @@ impl Assembler } }, - [Opnd::Mem(_), _] => { + // We have to load memory and register operands to avoid corrupting them + [Opnd::Mem(_) | Opnd::Reg(_), _] => { let opnd0 = asm.load(opnds[0]); asm.push_insn(op, vec![opnd0, opnds[1]], None); return; @@ -154,7 +166,7 @@ impl Assembler Op::CRet => { // TODO: bias allocation towards return register - if insn.opnds[0] != Opnd::Reg(RET_REG) { + if insn.opnds[0] != Opnd::Reg(C_RET_REG) { mov(cb, RAX, insn.opnds[0].into()); } @@ -167,17 +179,11 @@ impl Assembler // Test and set flags Op::Test => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), - Op::Je => { - match insn.target.unwrap() { - Target::Label(idx) => { + Op::JmpOpnd => jmp_rm(cb, insn.opnds[0].into()), - dbg!(idx); - je_label(cb, idx); + Op::Je => je_label(cb, insn.target.unwrap().unwrap_label_idx()), - }, - _ => unimplemented!() - } - } + Op::Breakpoint => int3(cb), _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn.op) }; diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 01c0b7ee85a04d..4d5e73686def42 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1,7 +1,7 @@ // We use the YARV bytecode constants which have a CRuby-style name #![allow(non_upper_case_globals)] -use crate::asm::x86_64::*; +//use crate::asm::x86_64::*; use crate::asm::*; use crate::backend::ir::*; use crate::core::*; @@ -25,15 +25,15 @@ use std::slice; pub use crate::virtualmem::CodePtr; // Callee-saved registers -pub const REG_CFP: X86Opnd = R13; -pub const REG_EC: X86Opnd = R12; -pub const REG_SP: X86Opnd = RBX; +//pub const REG_CFP: X86Opnd = R13; +//pub const REG_EC: X86Opnd = R12; +//pub const REG_SP: X86Opnd = RBX; // Scratch registers used by YJIT -pub const REG0: X86Opnd = RAX; -pub const REG0_32: X86Opnd = EAX; -pub const REG0_8: X86Opnd = AL; -pub const REG1: X86Opnd = RCX; +//pub const REG0: X86Opnd = RAX; +//pub const REG0_32: X86Opnd = EAX; +//pub const REG0_8: X86Opnd = AL; +//pub const REG1: X86Opnd = RCX; // A block that can be invalidated needs space to write a jump. // We'll reserve a minimum size for any block that could @@ -210,19 +210,25 @@ fn add_comment(cb: &mut CodeBlock, comment_str: &str) { /// Increment a profiling counter with counter_name #[cfg(not(feature = "stats"))] macro_rules! gen_counter_incr { - ($cb:tt, $counter_name:ident) => {}; + ($asm:tt, $counter_name:ident) => {}; } #[cfg(feature = "stats")] macro_rules! gen_counter_incr { - ($cb:tt, $counter_name:ident) => { + ($asm:tt, $counter_name:ident) => { if (get_option!(gen_stats)) { // Get a pointer to the counter variable let ptr = ptr_to_counter!($counter_name); - // Use REG1 because there might be return value in REG0 - mov($cb, REG1, const_ptr_opnd(ptr as *const u8)); - write_lock_prefix($cb); // for ractors. - add($cb, mem_opnd(64, REG1, 0), imm_opnd(1)); + // Load the pointer into a register + let ptr_reg = $asm.load(Opnd::const_ptr(ptr as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // FIXME: do we want an atomic add, or an atomic store or swap for arm? + //write_lock_prefix($cb); // for ractors. + + // Increment and store the updated value + let incr_opnd = $asm.add(counter_opnd, 1.into()); + $asm.store(counter_opnd, incr_opnd); } }; } @@ -292,8 +298,7 @@ fn gen_save_sp(jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { fn jit_prepare_routine_call( jit: &mut JITState, ctx: &mut Context, - asm: &mut Assembler, - scratch_reg: X86Opnd, + asm: &mut Assembler ) { jit.record_boundary_patch_point = true; jit_save_pc(jit, asm); @@ -396,6 +401,9 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr { let code_ptr = cb.get_write_ptr(); + todo!(); + + /* add_comment(cb, "exit to interpreter"); // Generate the code to exit to the interpreters @@ -432,6 +440,7 @@ fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr { ret(cb); return code_ptr; + */ } // Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit @@ -442,16 +451,20 @@ fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { let ocb = ocb.unwrap(); let code_ptr = ocb.get_write_ptr(); + todo!(); + + /* gen_counter_incr!(ocb, exit_from_branch_stub); - pop(ocb, REG_SP); - pop(ocb, REG_EC); - pop(ocb, REG_CFP); + cpop(ocb, REG_SP); + cpop(ocb, REG_EC); + cpop(ocb, REG_CFP); mov(ocb, RAX, uimm_opnd(Qundef.into())); ret(ocb); return code_ptr; + */ } // :side-exit: @@ -504,6 +517,9 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { let cb = ocb.unwrap(); let code_ptr = cb.get_write_ptr(); + todo!(); + + /* // This chunk of code expect REG_EC to be filled properly and // RAX to contain the return value of the C method. @@ -524,6 +540,7 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { ret(cb); return code_ptr; + */ } /// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc. @@ -531,6 +548,7 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { let ocb = ocb.unwrap(); let code_ptr = ocb.get_write_ptr(); + let mut asm = Assembler::new(); // Note, gen_leave() fully reconstructs interpreter state and leaves the // return value in RAX before coming here. @@ -539,11 +557,22 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { // Every exit to the interpreter should be counted //gen_counter_incr!(ocb, leave_interp_return); - pop(ocb, REG_SP); - pop(ocb, REG_EC); - pop(ocb, REG_CFP); + asm.cpop(SP); + asm.cpop(EC); + asm.cpop(CFP); - ret(ocb); + // FIXME: we're currently assuming that the return value is in RAX, + // left there by gen_leave() ... + // + // What are our options? + // We could put the return value in C_RET_REG? + // Then call asm.ret with C_RET_REG? + + + + asm.cret(C_RET_OPND); + + asm.compile(ocb); return code_ptr; } @@ -604,8 +633,8 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O asm.cpush(SP); // We are passed EC and CFP as arguments - asm.mov(EC, C_ARG_REGS[0].into()); - asm.mov(CFP, C_ARG_REGS[1].into()); + asm.mov(EC, C_ARG_OPNDS[0]); + asm.mov(CFP, C_ARG_OPNDS[1]); // Load the current SP from the CFP into REG_SP asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); @@ -765,7 +794,7 @@ pub fn gen_single_block( // :count-placement: // Count bytecode instructions that execute in generated code. // Note that the increment happens even when the output takes side exit. - gen_counter_incr!(cb, exec_instruction); + gen_counter_incr!(asm, exec_instruction); // Add a comment for the name of the YARV instruction asm.comment(&insn_name(opcode)); @@ -919,7 +948,7 @@ fn gen_swap( asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - stack_swap(jit, ctx, asm, 0, 1, REG0, REG1); + stack_swap(jit, ctx, asm, 0, 1); KeepCompiling } @@ -929,8 +958,6 @@ fn stack_swap( asm: &mut Assembler, offset0: u16, offset1: u16, - _reg0: X86Opnd, - _reg1: X86Opnd, ) { let stack0_mem = ctx.ir_stack_opnd(offset0 as i32); let stack1_mem = ctx.ir_stack_opnd(offset1 as i32); @@ -947,56 +974,26 @@ fn stack_swap( ctx.set_opnd_mapping(StackOpnd(offset1), mapping0); } - - - - - - - - - -/* fn gen_putnil( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - jit_putobject(jit, ctx, cb, Qnil); + jit_putobject(jit, ctx, asm, Qnil); KeepCompiling } -fn jit_putobject(jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, arg: VALUE) { +fn jit_putobject(jit: &mut JITState, ctx: &mut Context, asm: &mut Assembler, arg: VALUE) { let val_type: Type = Type::from(arg); let stack_top = ctx.stack_push(val_type); - - if arg.special_const_p() { - // Immediates will not move and do not need to be tracked for GC - // Thanks to this we can mov directly to memory when possible. - let imm = imm_opnd(arg.as_i64()); - - // 64-bit immediates can't be directly written to memory - if imm.num_bits() <= 32 { - mov(cb, stack_top, imm); - } else { - mov(cb, REG0, imm); - mov(cb, stack_top, REG0); - } - } else { - // Load the value to push into REG0 - // Note that this value may get moved by the GC - jit_mov_gc_ptr(jit, cb, REG0, arg); - - // Write argument at SP - mov(cb, stack_top, REG0); - } + asm.mov(stack_top, arg.into()); } fn gen_putobject_int2fix( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let opcode = jit.opcode; @@ -1006,22 +1003,23 @@ fn gen_putobject_int2fix( 1 }; - jit_putobject(jit, ctx, cb, VALUE::fixnum_from_usize(cst_val)); + jit_putobject(jit, ctx, asm, VALUE::fixnum_from_usize(cst_val)); KeepCompiling } fn gen_putobject( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let arg: VALUE = jit_get_arg(jit, 0); - jit_putobject(jit, ctx, cb, arg); + jit_putobject(jit, ctx, asm, arg); KeepCompiling } +/* fn gen_putself( _jit: &mut JITState, ctx: &mut Context, @@ -5250,48 +5248,53 @@ fn gen_invokesuper( _ => unreachable!(), } } +*/ fn gen_leave( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Only the return value should be on the stack assert!(ctx.get_stack_size() == 1); + // FIXME + /* // Create a side-exit to fall back to the interpreter - let side_exit = get_side_exit(jit, ocb, ctx); - - // Load environment pointer EP from CFP - mov(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); + //let side_exit = get_side_exit(jit, ocb, ctx); // Check for interrupts - add_comment(cb, "check for interrupts"); - gen_check_ints(cb, counted_exit!(ocb, side_exit, leave_se_interrupt)); - - // Load the return value - mov(cb, REG0, ctx.stack_pop(1)); + //gen_check_ints(cb, counted_exit!(ocb, side_exit, leave_se_interrupt)); + */ // Pop the current frame (ec->cfp++) // Note: the return PC is already in the previous CFP - add_comment(cb, "pop stack frame"); - add(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); - mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP); + asm.comment("pop stack frame"); + let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); + asm.mov(CFP, incr_cfp); + asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), incr_cfp); + + // Load the return value + let retval_opnd = ctx.stack_pop(1); + + // Move the return value into the C return register for gen_leave_exit() + asm.mov(C_RET_OPND, retval_opnd); // Reload REG_SP for the caller and write the return value. // Top of the stack is REG_SP[0] since the caller has sp_offset=1. - mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP)); - mov(cb, mem_opnd(64, REG_SP, 0), REG0); + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + asm.mov(Opnd::mem(64, SP, 0), C_RET_OPND); // Jump to the JIT return address on the frame that was just popped let offset_to_jit_return = -(RUBY_SIZEOF_CONTROL_FRAME as i32) + (RUBY_OFFSET_CFP_JIT_RETURN as i32); - jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return)); + asm.jmp_opnd(Opnd::mem(64, CFP, offset_to_jit_return)); EndBlock } +/* fn gen_getglobal( jit: &mut JITState, ctx: &mut Context, @@ -5958,21 +5961,21 @@ fn get_gen_fn(opcode: VALUE) -> Option { match opcode { YARVINSN_nop => Some(gen_nop), - //YARVINSN_pop => Some(gen_pop), + YARVINSN_pop => Some(gen_pop), YARVINSN_dup => Some(gen_dup), YARVINSN_dupn => Some(gen_dupn), YARVINSN_swap => Some(gen_swap), - - /* YARVINSN_putnil => Some(gen_putnil), YARVINSN_putobject => Some(gen_putobject), YARVINSN_putobject_INT2FIX_0_ => Some(gen_putobject_int2fix), YARVINSN_putobject_INT2FIX_1_ => Some(gen_putobject_int2fix), - YARVINSN_putself => Some(gen_putself), - YARVINSN_putspecialobject => Some(gen_putspecialobject), - YARVINSN_setn => Some(gen_setn), - YARVINSN_topn => Some(gen_topn), - YARVINSN_adjuststack => Some(gen_adjuststack), + //YARVINSN_putself => Some(gen_putself), + //YARVINSN_putspecialobject => Some(gen_putspecialobject), + //YARVINSN_setn => Some(gen_setn), + //YARVINSN_topn => Some(gen_topn), + //YARVINSN_adjuststack => Some(gen_adjuststack), + + /* YARVINSN_getlocal => Some(gen_getlocal), YARVINSN_getlocal_WC_0 => Some(gen_getlocal_wc0), YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), @@ -6028,14 +6031,16 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_branchunless => Some(gen_branchunless), YARVINSN_branchnil => Some(gen_branchnil), YARVINSN_jump => Some(gen_jump), + */ - YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), - YARVINSN_getblockparam => Some(gen_getblockparam), - YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), - YARVINSN_send => Some(gen_send), - YARVINSN_invokesuper => Some(gen_invokesuper), + //YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), + //YARVINSN_getblockparam => Some(gen_getblockparam), + //YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), + //YARVINSN_send => Some(gen_send), + //YARVINSN_invokesuper => Some(gen_invokesuper), YARVINSN_leave => Some(gen_leave), + /* YARVINSN_getglobal => Some(gen_getglobal), YARVINSN_setglobal => Some(gen_setglobal), YARVINSN_anytostring => Some(gen_anytostring), @@ -6176,10 +6181,10 @@ impl CodegenGlobals { let leave_exit_code = gen_leave_exit(&mut ocb); - let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb); + //let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb); // Generate full exit code for C func - let cfunc_exit_code = gen_full_cfunc_return(&mut ocb); + //let cfunc_exit_code = gen_full_cfunc_return(&mut ocb); // Mark all code memory as executable cb.mark_all_executable(); @@ -6189,8 +6194,8 @@ impl CodegenGlobals { inline_cb: cb, outlined_cb: ocb, leave_exit_code: leave_exit_code, - stub_exit_code: stub_exit_code, - outline_full_cfunc_return_pos: cfunc_exit_code, + stub_exit_code: /*stub_exit_code*/CodePtr::from(1 as *mut u8), + outline_full_cfunc_return_pos: /*cfunc_exit_code*/CodePtr::from(1 as *mut u8), global_inval_patches: Vec::new(), inline_frozen_bytes: 0, method_codegen_table: HashMap::new(), From 1daa5942b83ede3e504f9952a1f705b763e59893 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 8 Jun 2022 15:19:53 -0400 Subject: [PATCH 322/546] MOVK, MOVZ, BR (https://github.com/Shopify/ruby/pull/296) * MOVK instruction * More tests for the A64 entrypoints * Finish testing entrypoints * MOVZ * BR instruction --- yjit/src/asm/arm64/inst/branch.rs | 85 +++++++++ .../src/asm/arm64/inst/branches_and_system.rs | 62 ------- ...ta_processing_immediate.rs => data_imm.rs} | 27 +-- ...ata_processing_register.rs => data_reg.rs} | 27 +-- yjit/src/asm/arm64/inst/family.rs | 34 ---- .../inst/{loads_and_stores.rs => load.rs} | 23 +-- yjit/src/asm/arm64/inst/mod.rs | 175 +++++++++++++++--- yjit/src/asm/arm64/inst/mov.rs | 155 ++++++++++++++++ yjit/src/asm/mod.rs | 12 ++ 9 files changed, 442 insertions(+), 158 deletions(-) create mode 100644 yjit/src/asm/arm64/inst/branch.rs delete mode 100644 yjit/src/asm/arm64/inst/branches_and_system.rs rename yjit/src/asm/arm64/inst/{data_processing_immediate.rs => data_imm.rs} (87%) rename yjit/src/asm/arm64/inst/{data_processing_register.rs => data_reg.rs} (88%) delete mode 100644 yjit/src/asm/arm64/inst/family.rs rename yjit/src/asm/arm64/inst/{loads_and_stores.rs => load.rs} (86%) create mode 100644 yjit/src/asm/arm64/inst/mov.rs diff --git a/yjit/src/asm/arm64/inst/branch.rs b/yjit/src/asm/arm64/inst/branch.rs new file mode 100644 index 00000000000000..7f93f5e201d88f --- /dev/null +++ b/yjit/src/asm/arm64/inst/branch.rs @@ -0,0 +1,85 @@ +/// Which operation to perform. +enum Op { + /// Perform a BR instruction. + Br = 0b00, + + /// Perform a RET instruction. + Ret = 0b10 +} + +/// The struct that represents an A64 branch instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 | +/// | op... rn.............. rm.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Branch { + /// The register holding the address to be branched to. + rn: u8, + + /// The operation to perform. + op: Op +} + +impl Branch { + /// BR + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BR--Branch-to-Register-?lang=en + pub fn br(rn: u8) -> Self { + Self { rn, op: Op::Br } + } + + /// RET + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en + pub fn ret(rn: u8) -> Self { + Self { rn, op: Op::Ret } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Branch) -> Self { + 0 + | (0b11 << 30) + | (FAMILY << 26) + | (1 << 25) + | ((inst.op as u32) << 21) + | (0b11111 << 16) + | ((inst.rn as u32) << 5) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Branch) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_br() { + let result: u32 = Branch::br(0).into(); + assert_eq!(0xd61f0000, result); + } + + #[test] + fn test_ret() { + let result: u32 = Branch::ret(30).into(); + assert_eq!(0xd65f03C0, result); + } + + #[test] + fn test_ret_rn() { + let result: u32 = Branch::ret(20).into(); + assert_eq!(0xd65f0280, result); + } +} diff --git a/yjit/src/asm/arm64/inst/branches_and_system.rs b/yjit/src/asm/arm64/inst/branches_and_system.rs deleted file mode 100644 index 77e99c112ae64b..00000000000000 --- a/yjit/src/asm/arm64/inst/branches_and_system.rs +++ /dev/null @@ -1,62 +0,0 @@ -use super::family::Family; - -/// The struct that represents an A64 branches and system instruction that can -/// be encoded. -/// -/// RET -/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ -/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | -/// | 1 1 0 1 0 1 1 0 0 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 | -/// | rn.............. rm.............. | -/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ -/// -pub struct BranchesAndSystem { - /// The register holding the address to be branched to. - rn: u8 -} - -impl BranchesAndSystem { - /// RET - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en - pub fn ret(rn: u8) -> Self { - Self { rn } - } -} - -impl From for u32 { - /// Convert an instruction into a 32-bit value. - fn from(inst: BranchesAndSystem) -> Self { - 0 - | (0b11 << 30) - | ((Family::BranchesAndSystem as u32) << 25) - | (0b1001011111 << 16) - | ((inst.rn as u32) << 5) - } -} - -impl From for [u8; 4] { - /// Convert an instruction into a 4 byte array. - fn from(inst: BranchesAndSystem) -> [u8; 4] { - let result: u32 = inst.into(); - result.to_le_bytes() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_ret() { - let inst = BranchesAndSystem::ret(30); - let result: u32 = inst.into(); - assert_eq!(0xd65f03C0, result); - } - - #[test] - fn test_ret_rn() { - let inst = BranchesAndSystem::ret(20); - let result: u32 = inst.into(); - assert_eq!(0xd65f0280, result); - } -} diff --git a/yjit/src/asm/arm64/inst/data_processing_immediate.rs b/yjit/src/asm/arm64/inst/data_imm.rs similarity index 87% rename from yjit/src/asm/arm64/inst/data_processing_immediate.rs rename to yjit/src/asm/arm64/inst/data_imm.rs index 25117efc22c762..0d0a6ff3254437 100644 --- a/yjit/src/asm/arm64/inst/data_processing_immediate.rs +++ b/yjit/src/asm/arm64/inst/data_imm.rs @@ -1,4 +1,4 @@ -use super::{family::Family, sf::Sf}; +use super::sf::Sf; /// The operation being performed by this instruction. enum Op { @@ -28,7 +28,7 @@ enum Shift { /// | sf op S sh imm12.................................... rn.............. rd.............. | /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// -pub struct DataProcessingImmediate { +pub struct DataImm { /// The register number of the destination register. rd: u8, @@ -51,7 +51,7 @@ pub struct DataProcessingImmediate { sf: Sf } -impl DataProcessingImmediate { +impl DataImm { /// ADD (immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en pub fn add(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { @@ -109,16 +109,19 @@ impl DataProcessingImmediate { } } -impl From for u32 { +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +const FAMILY: u32 = 0b1000; + +impl From for u32 { /// Convert an instruction into a 32-bit value. - fn from(inst: DataProcessingImmediate) -> Self { + fn from(inst: DataImm) -> Self { let imm12 = (inst.imm12 as u32) & ((1 << 12) - 1); 0 | ((inst.sf as u32) << 31) | ((inst.op as u32) << 30) | ((inst.s as u32) << 29) - | ((Family::DataProcessingImmediate as u32) << 25) + | (FAMILY << 25) | (1 << 24) | ((inst.shift as u32) << 22) | (imm12 << 10) @@ -127,9 +130,9 @@ impl From for u32 { } } -impl From for [u8; 4] { +impl From for [u8; 4] { /// Convert an instruction into a 4 byte array. - fn from(inst: DataProcessingImmediate) -> [u8; 4] { + fn from(inst: DataImm) -> [u8; 4] { let result: u32 = inst.into(); result.to_le_bytes() } @@ -141,28 +144,28 @@ mod tests { #[test] fn test_add() { - let inst = DataProcessingImmediate::add(0, 1, 7, 64); + let inst = DataImm::add(0, 1, 7, 64); let result: u32 = inst.into(); assert_eq!(0x91001c20, result); } #[test] fn test_adds() { - let inst = DataProcessingImmediate::adds(0, 1, 7, 64); + let inst = DataImm::adds(0, 1, 7, 64); let result: u32 = inst.into(); assert_eq!(0xb1001c20, result); } #[test] fn test_sub() { - let inst = DataProcessingImmediate::sub(0, 1, 7, 64); + let inst = DataImm::sub(0, 1, 7, 64); let result: u32 = inst.into(); assert_eq!(0xd1001c20, result); } #[test] fn test_subs() { - let inst = DataProcessingImmediate::subs(0, 1, 7, 64); + let inst = DataImm::subs(0, 1, 7, 64); let result: u32 = inst.into(); assert_eq!(0xf1001c20, result); } diff --git a/yjit/src/asm/arm64/inst/data_processing_register.rs b/yjit/src/asm/arm64/inst/data_reg.rs similarity index 88% rename from yjit/src/asm/arm64/inst/data_processing_register.rs rename to yjit/src/asm/arm64/inst/data_reg.rs index 7e9f37ab8e7382..8635ab804b4646 100644 --- a/yjit/src/asm/arm64/inst/data_processing_register.rs +++ b/yjit/src/asm/arm64/inst/data_reg.rs @@ -1,4 +1,4 @@ -use super::{family::Family, sf::Sf}; +use super::sf::Sf; /// The operation being performed by this instruction. enum Op { @@ -29,7 +29,7 @@ enum Shift { /// | sf op S shift rm.............. imm6............... rn.............. rd.............. | /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// -pub struct DataProcessingRegister { +pub struct DataReg { /// The register number of the destination register. rd: u8, @@ -55,7 +55,7 @@ pub struct DataProcessingRegister { sf: Sf } -impl DataProcessingRegister { +impl DataReg { /// ADD (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en pub fn add(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { @@ -117,16 +117,19 @@ impl DataProcessingRegister { } } -impl From for u32 { +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en +const FAMILY: u32 = 0b0101; + +impl From for u32 { /// Convert an instruction into a 32-bit value. - fn from(inst: DataProcessingRegister) -> Self { + fn from(inst: DataReg) -> Self { let imm6 = (inst.imm6 as u32) & ((1 << 6) - 1); 0 | ((inst.sf as u32) << 31) | ((inst.op as u32) << 30) | ((inst.s as u32) << 29) - | ((Family::DataProcessingRegister as u32) << 25) + | (FAMILY << 25) | (1 << 24) | ((inst.shift as u32) << 22) | ((inst.rm as u32) << 16) @@ -136,9 +139,9 @@ impl From for u32 { } } -impl From for [u8; 4] { +impl From for [u8; 4] { /// Convert an instruction into a 4 byte array. - fn from(inst: DataProcessingRegister) -> [u8; 4] { + fn from(inst: DataReg) -> [u8; 4] { let result: u32 = inst.into(); result.to_le_bytes() } @@ -150,28 +153,28 @@ mod tests { #[test] fn test_add() { - let inst = DataProcessingRegister::add(0, 1, 2, 64); + let inst = DataReg::add(0, 1, 2, 64); let result: u32 = inst.into(); assert_eq!(0x8b020020, result); } #[test] fn test_adds() { - let inst = DataProcessingRegister::adds(0, 1, 2, 64); + let inst = DataReg::adds(0, 1, 2, 64); let result: u32 = inst.into(); assert_eq!(0xab020020, result); } #[test] fn test_sub() { - let inst = DataProcessingRegister::sub(0, 1, 2, 64); + let inst = DataReg::sub(0, 1, 2, 64); let result: u32 = inst.into(); assert_eq!(0xcb020020, result); } #[test] fn test_subs() { - let inst = DataProcessingRegister::subs(0, 1, 2, 64); + let inst = DataReg::subs(0, 1, 2, 64); let result: u32 = inst.into(); assert_eq!(0xeb020020, result); } diff --git a/yjit/src/asm/arm64/inst/family.rs b/yjit/src/asm/arm64/inst/family.rs deleted file mode 100644 index ff5a335406e66d..00000000000000 --- a/yjit/src/asm/arm64/inst/family.rs +++ /dev/null @@ -1,34 +0,0 @@ -/// These are the top-level encodings. They're effectively the family of -/// instructions, as each instruction within those groups shares these same -/// bits (28-25). -/// -/// In the documentation, you can see that some of the bits are -/// optional (e.g., x1x0 for loads and stores). We represent that here as 0100 -/// since we're bitwise ORing the family into the resulting encoding. -/// -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding?lang=en -pub enum Family { - /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Reserved?lang=en - Reserved = 0b0000, - - /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/SME-encodings?lang=en - SMEEncodings = 0b0001, - - /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/SVE-encodings?lang=en - SVEEncodings = 0b0010, - - /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en - DataProcessingImmediate = 0b1000, - - /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en - BranchesAndSystem = 0b1010, - - /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en - LoadsAndStores = 0b0100, - - /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en - DataProcessingRegister = 0b0101, - - /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en - DataProcessingScalar = 0b0111 -} diff --git a/yjit/src/asm/arm64/inst/loads_and_stores.rs b/yjit/src/asm/arm64/inst/load.rs similarity index 86% rename from yjit/src/asm/arm64/inst/loads_and_stores.rs rename to yjit/src/asm/arm64/inst/load.rs index 5fb8b7a6fbf45a..727dad52f7cfb2 100644 --- a/yjit/src/asm/arm64/inst/loads_and_stores.rs +++ b/yjit/src/asm/arm64/inst/load.rs @@ -1,5 +1,3 @@ -use super::family::Family; - /// The size of the operands being operated on. enum Size { Size32 = 0b10, @@ -28,7 +26,7 @@ impl From for Size { /// | size. imm9.......................... rn.............. rt.............. | /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// -pub struct LoadsAndStores { +pub struct Load { /// The number of the register to load the value into. rt: u8, @@ -42,7 +40,7 @@ pub struct LoadsAndStores { size: Size } -impl LoadsAndStores { +impl Load { /// LDUR (load register, unscaled) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { @@ -55,15 +53,18 @@ impl LoadsAndStores { } } -impl From for u32 { +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { /// Convert an instruction into a 32-bit value. - fn from(inst: LoadsAndStores) -> Self { + fn from(inst: Load) -> Self { let imm9 = (inst.imm9 as u32) & ((1 << 9) - 1); 0 | ((inst.size as u32) << 30) | (0b11 << 28) - | ((Family::LoadsAndStores as u32) << 25) + | (FAMILY << 25) | (1 << 22) | (imm9 << 12) | ((inst.rn as u32) << 5) @@ -71,9 +72,9 @@ impl From for u32 { } } -impl From for [u8; 4] { +impl From for [u8; 4] { /// Convert an instruction into a 4 byte array. - fn from(inst: LoadsAndStores) -> [u8; 4] { + fn from(inst: Load) -> [u8; 4] { let result: u32 = inst.into(); result.to_le_bytes() } @@ -85,14 +86,14 @@ mod tests { #[test] fn test_ldur() { - let inst = LoadsAndStores::ldur(0, 1, 0, 64); + let inst = Load::ldur(0, 1, 0, 64); let result: u32 = inst.into(); assert_eq!(0xf8400020, result); } #[test] fn test_ldur_with_imm() { - let inst = LoadsAndStores::ldur(0, 1, 123, 64); + let inst = Load::ldur(0, 1, 123, 64); let result: u32 = inst.into(); assert_eq!(0xf847b020, result); } diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 9c5b53b0aca3b0..eec9d116b2b056 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -1,19 +1,20 @@ -mod branches_and_system; -mod data_processing_immediate; -mod data_processing_register; -mod family; -mod loads_and_stores; +mod branch; +mod data_imm; +mod data_reg; +mod load; +mod mov; mod sf; -use branches_and_system::BranchesAndSystem; -use data_processing_immediate::DataProcessingImmediate; -use data_processing_register::DataProcessingRegister; -use loads_and_stores::LoadsAndStores; +use branch::Branch; +use data_imm::DataImm; +use data_reg::DataReg; +use load::Load; +use mov::Mov; use crate::asm::{CodeBlock, imm_num_bits}; use super::opnd::*; -/// ADD +/// ADD - add rn and rm, put the result in rd, don't update flags pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rn, rm) { (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { @@ -22,13 +23,13 @@ pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { "All operands must be of the same size." ); - DataProcessingRegister::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + DataReg::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); - DataProcessingImmediate::add(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + DataImm::add(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() }, _ => panic!("Invalid operand combination to add instruction."), }; @@ -36,7 +37,7 @@ pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } -/// ADDS +/// ADDS - add rn and rm, put the result in rd, update flags pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rn, rm) { (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { @@ -45,13 +46,13 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { "All operands must be of the same size." ); - DataProcessingRegister::adds(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + DataReg::adds(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); - DataProcessingImmediate::adds(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + DataImm::adds(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() }, _ => panic!("Invalid operand combination to adds instruction."), }; @@ -59,14 +60,24 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } -/// LDUR +/// BR - branch to a register +pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::Reg(rn) => Branch::br(rn.reg_no).into(), + _ => panic!("Invalid operand to br instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDUR - load a memory address into a register pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); assert!(imm_num_bits(rn.disp.into()) <= 9, "Expected displacement to be 9 bits or less"); - LoadsAndStores::ldur(rt.reg_no, rn.base_reg_no, rn.disp.try_into().unwrap(), rt.num_bits).into() + Load::ldur(rt.reg_no, rn.base_reg_no, rn.disp.try_into().unwrap(), rt.num_bits).into() }, _ => panic!("Invalid operands for LDUR") }; @@ -74,7 +85,35 @@ pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { cb.write_bytes(&bytes); } -/// SUB +/// MOVK - move a 16 bit immediate into a register, keep the other bits in place +pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(imm16.num_bits <= 16, "The immediate operand must be 16 bits or less."); + + Mov::movk(rd.reg_no, imm16.value as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movk instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// MOVZ - move a 16 bit immediate into a register, zero the other bits +pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(imm16.num_bits <= 16, "The immediate operand must be 16 bits or less."); + + Mov::movz(rd.reg_no, imm16.value as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// SUB - subtract rm from rn, put the result in rd, don't update flags pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rn, rm) { (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { @@ -83,13 +122,13 @@ pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { "All operands must be of the same size." ); - DataProcessingRegister::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + DataReg::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); - DataProcessingImmediate::sub(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + DataImm::sub(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() }, _ => panic!("Invalid operand combination to sub instruction."), }; @@ -97,7 +136,7 @@ pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } -/// SUBS +/// SUBS - subtract rm from rn, put the result in rd, update flags pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rn, rm) { (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { @@ -106,13 +145,13 @@ pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { "All operands must be of the same size." ); - DataProcessingRegister::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + DataReg::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); - DataProcessingImmediate::subs(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + DataImm::subs(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() }, _ => panic!("Invalid operand combination to subs instruction."), }; @@ -120,13 +159,95 @@ pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } -/// RET +/// RET - unconditionally return to a location in a register, defaults to X30 pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) { let bytes: [u8; 4] = match rn { - A64Opnd::None => BranchesAndSystem::ret(30).into(), - A64Opnd::Reg(reg) => BranchesAndSystem::ret(reg.reg_no).into(), - _ => panic!("Invalid operand for RET") + A64Opnd::None => Branch::ret(30).into(), + A64Opnd::Reg(reg) => Branch::ret(reg.reg_no).into(), + _ => panic!("Invalid operand to ret instruction.") }; cb.write_bytes(&bytes); } + +#[cfg(test)] +mod tests { + use super::*; + + /// Check that the bytes for an instruction sequence match a hex string + fn check_bytes(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) { + let mut cb = super::CodeBlock::new_dummy(128); + run(&mut cb); + assert_eq!(format!("{:x}", cb), bytes); + } + + #[test] + fn test_add_register() { + check_bytes("2000028b", |cb| add(cb, X0, X1, X2)); + } + + #[test] + fn test_add_immediate() { + check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_adds_register() { + check_bytes("200002ab", |cb| adds(cb, X0, X1, X2)); + } + + #[test] + fn test_adds_immediate() { + check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_br() { + check_bytes("80021fd6", |cb| br(cb, X20)); + } + + #[test] + fn test_ldur() { + check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(X1, 123))); + } + + #[test] + fn test_movk() { + check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); + } + + #[test] + fn test_movz() { + check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16)); + } + + #[test] + fn test_ret_none() { + check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None)); + } + + #[test] + fn test_ret_register() { + check_bytes("80025fd6", |cb| ret(cb, X20)); + } + + #[test] + fn test_sub_register() { + check_bytes("200002cb", |cb| sub(cb, X0, X1, X2)); + } + + #[test] + fn test_sub_immediate() { + check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_subs_register() { + check_bytes("200002eb", |cb| subs(cb, X0, X1, X2)); + } + + #[test] + fn test_subs_immediate() { + check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7))); + } +} diff --git a/yjit/src/asm/arm64/inst/mov.rs b/yjit/src/asm/arm64/inst/mov.rs new file mode 100644 index 00000000000000..0d68ffd206ac8c --- /dev/null +++ b/yjit/src/asm/arm64/inst/mov.rs @@ -0,0 +1,155 @@ +use super::sf::Sf; + +/// Which operation is being performed. +enum Op { + /// A movz operation which zeroes out the other bits. + MOVZ = 0b10, + + /// A movk operation which keeps the other bits in place. + MOVK = 0b11 +} + +/// How much to shift the immediate by. +enum Hw { + LSL0 = 0b00, + LSL16 = 0b01, + LSL32 = 0b10, + LSL48 = 0b11 +} + +impl From for Hw { + fn from(shift: u8) -> Self { + match shift { + 0 => Hw::LSL0, + 16 => Hw::LSL16, + 32 => Hw::LSL32, + 48 => Hw::LSL48, + _ => panic!("Invalid value for shift: {}", shift) + } + } +} + +/// The struct that represents a MOVK or MOVZ instruction. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 0 1 | +/// | sf op... hw... imm16.................................................. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Mov { + /// The register number of the destination register. + rd: u8, + + /// The value to move into the register. + imm16: u16, + + /// The shift of the value to move. + hw: Hw, + + /// Which operation is being performed. + op: Op, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl Mov { + /// MOVK + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVK--Move-wide-with-keep-?lang=en + pub fn movk(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { + Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() } + } + + /// MOVZ + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en + pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { + Self { rd, imm16, hw: hw.into(), op: Op::MOVZ, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +const FAMILY: u32 = 0b1000; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Mov) -> Self { + 0 + | ((inst.sf as u32) << 31) + | ((inst.op as u32) << 29) + | (FAMILY << 25) + | (0b101 << 23) + | ((inst.hw as u32) << 21) + | ((inst.imm16 as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Mov) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_movk_unshifted() { + let inst = Mov::movk(0, 123, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2800f60, result); + } + + #[test] + fn test_movk_shifted_16() { + let inst = Mov::movk(0, 123, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2A00f60, result); + } + + #[test] + fn test_movk_shifted_32() { + let inst = Mov::movk(0, 123, 32, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2C00f60, result); + } + + #[test] + fn test_movk_shifted_48() { + let inst = Mov::movk(0, 123, 48, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2e00f60, result); + } + + #[test] + fn test_movz_unshifted() { + let inst = Mov::movz(0, 123, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2800f60, result); + } + + #[test] + fn test_movz_shifted_16() { + let inst = Mov::movz(0, 123, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2a00f60, result); + } + + #[test] + fn test_movz_shifted_32() { + let inst = Mov::movz(0, 123, 32, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2c00f60, result); + } + + #[test] + fn test_movz_shifted_48() { + let inst = Mov::movz(0, 123, 48, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2e00f60, result); + } +} diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 9f518398b78979..b54fc362b4fac3 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -1,3 +1,4 @@ +use std::fmt; use std::mem; #[cfg(feature = "asm_comments")] @@ -276,6 +277,17 @@ impl CodeBlock { } } +/// Produce hex string output from the bytes in a code block +impl<'a> fmt::LowerHex for CodeBlock { + fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { + for pos in 0..self.write_pos { + let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() }; + fmtr.write_fmt(format_args!("{:02x}", byte))?; + } + Ok(()) + } +} + /// Wrapper struct so we can use the type system to distinguish /// Between the inlined and outlined code blocks pub struct OutlinedCb { From c10e018e1c2dd3351af1f40f9b20ea23cfeace36 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 13 Jun 2022 17:16:33 -0400 Subject: [PATCH 323/546] LDADDAL, STUR, BL (https://github.com/Shopify/ruby/pull/299) * LDADDAL instruction * STUR * BL instruction * Remove num_bits from imm and uimm * Tests for imm_fits_bits and uimm_fits_bits * Reorder arguments to LDADDAL --- yjit/src/asm/arm64/inst/atomic.rs | 86 +++++++++++++++++++ yjit/src/asm/arm64/inst/call.rs | 67 +++++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 137 ++++++++++++++++++++++++++---- yjit/src/asm/arm64/inst/store.rs | 105 +++++++++++++++++++++++ yjit/src/asm/arm64/opnd.rs | 42 +-------- 5 files changed, 384 insertions(+), 53 deletions(-) create mode 100644 yjit/src/asm/arm64/inst/atomic.rs create mode 100644 yjit/src/asm/arm64/inst/call.rs create mode 100644 yjit/src/asm/arm64/inst/store.rs diff --git a/yjit/src/asm/arm64/inst/atomic.rs b/yjit/src/asm/arm64/inst/atomic.rs new file mode 100644 index 00000000000000..5ce497209ceb12 --- /dev/null +++ b/yjit/src/asm/arm64/inst/atomic.rs @@ -0,0 +1,86 @@ +/// The size of the register operands to this instruction. +enum Size { + /// Using 32-bit registers. + Size32 = 0b10, + + /// Using 64-bit registers. + Size64 = 0b11 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into a Size enum variant. +impl From for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 atomic instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 1 1 1 0 0 0 0 0 0 | +/// | size rs.............. rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Atomic { + /// The register holding the value to be loaded. + rt: u8, + + /// The base register. + rn: u8, + + /// The register holding the data value to be operated on. + rs: u8, + + /// The size of the registers used in this instruction. + size: Size +} + +impl Atomic { + /// LDADDAL + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en + pub fn ldaddal(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self { + Self { rt, rn, rs, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Atomic) -> Self { + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | (0b111 << 21) + | ((inst.rs as u32) << 16) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Atomic) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldaddal() { + let result: u32 = Atomic::ldaddal(20, 21, 22, 64).into(); + assert_eq!(0xf8f402d5, result); + } +} diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs new file mode 100644 index 00000000000000..6f23acf9f57645 --- /dev/null +++ b/yjit/src/asm/arm64/inst/call.rs @@ -0,0 +1,67 @@ +/// The struct that represents an A64 branch with link instruction that can be +/// encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 0 1 | +/// | imm26.................................................................................... | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Call { + /// The PC-relative offset to jump to (which will be multiplied by 4). + imm26: i32 +} + +impl Call { + /// BL + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en + pub fn bl(imm26: i32) -> Self { + Self { imm26 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Call) -> Self { + let imm26 = (inst.imm26 as u32) & ((1 << 26) - 1); + + 0 + | (1 << 31) + | (FAMILY << 26) + | imm26 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Call) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bl() { + let result: u32 = Call::bl(0).into(); + assert_eq!(0x94000000, result); + } + + #[test] + fn test_bl_positive() { + let result: u32 = Call::bl(256).into(); + assert_eq!(0x94000100, result); + } + + #[test] + fn test_bl_negative() { + let result: u32 = Call::bl(-256).into(); + assert_eq!(0x97ffff00, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index eec9d116b2b056..c96e9328ff04f0 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -1,19 +1,42 @@ +mod atomic; mod branch; +mod call; mod data_imm; mod data_reg; mod load; mod mov; mod sf; +mod store; +use core::num; + +use atomic::Atomic; use branch::Branch; +use call::Call; use data_imm::DataImm; use data_reg::DataReg; use load::Load; use mov::Mov; +use store::Store; -use crate::asm::{CodeBlock, imm_num_bits}; +use crate::asm::CodeBlock; use super::opnd::*; +/// Checks that a signed value fits within the specified number of bits. +const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { + let minimum = if num_bits == 64 { i64::MIN } else { -2_i64.pow((num_bits as u32) - 1) }; + let maximum = if num_bits == 64 { i64::MAX } else { 2_i64.pow((num_bits as u32) - 1) - 1 }; + + imm >= minimum && imm <= maximum +} + +/// Checks that an unsigned value fits within the specified number of bits. +const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool { + let maximum = if num_bits == 64 { u64::MAX } else { 2_u64.pow(num_bits as u32) - 1 }; + + uimm <= maximum +} + /// ADD - add rn and rm, put the result in rd, don't update flags pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rn, rm) { @@ -27,9 +50,9 @@ pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::add(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() }, _ => panic!("Invalid operand combination to add instruction."), }; @@ -50,9 +73,9 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::adds(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() }, _ => panic!("Invalid operand combination to adds instruction."), }; @@ -60,6 +83,20 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// BL - branch with link (offset is number of instructions to jump) +pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) { + let bytes: [u8; 4] = match imm26 { + A64Opnd::Imm(imm26) => { + assert!(imm_fits_bits(imm26, 26), "The immediate operand must be 26 bits or less."); + + Call::bl(imm26 as i32).into() + }, + _ => panic!("Invalid operand combination to bl instruction.") + }; + + cb.write_bytes(&bytes); +} + /// BR - branch to a register pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { let bytes: [u8; 4] = match rn { @@ -70,14 +107,31 @@ pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// LDADDAL - atomic add with acquire and release semantics +pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rs, rt, rn) { + (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert!( + rs.num_bits == rt.num_bits && rt.num_bits == rn.num_bits, + "All operands must be of the same size." + ); + + Atomic::ldaddal(rs.reg_no, rt.reg_no, rn.reg_no, rs.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldaddal instruction."), + }; + + cb.write_bytes(&bytes); +} + /// LDUR - load a memory address into a register pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); - assert!(imm_num_bits(rn.disp.into()) <= 9, "Expected displacement to be 9 bits or less"); + assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); - Load::ldur(rt.reg_no, rn.base_reg_no, rn.disp.try_into().unwrap(), rt.num_bits).into() + Load::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, _ => panic!("Invalid operands for LDUR") }; @@ -89,9 +143,9 @@ pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { let bytes: [u8; 4] = match (rd, imm16) { (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { - assert!(imm16.num_bits <= 16, "The immediate operand must be 16 bits or less."); + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); - Mov::movk(rd.reg_no, imm16.value as u16, shift, rd.num_bits).into() + Mov::movk(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() }, _ => panic!("Invalid operand combination to movk instruction.") }; @@ -103,9 +157,9 @@ pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { let bytes: [u8; 4] = match (rd, imm16) { (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { - assert!(imm16.num_bits <= 16, "The immediate operand must be 16 bits or less."); + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); - Mov::movz(rd.reg_no, imm16.value as u16, shift, rd.num_bits).into() + Mov::movz(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() }, _ => panic!("Invalid operand combination to movz instruction.") }; @@ -113,6 +167,21 @@ pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { cb.write_bytes(&bytes); } +/// STUR - store a value in a register at a memory address +pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + + Store::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to stur instruction.") + }; + + cb.write_bytes(&bytes); +} + /// SUB - subtract rm from rn, put the result in rd, don't update flags pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rn, rm) { @@ -126,9 +195,9 @@ pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::sub(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() }, _ => panic!("Invalid operand combination to sub instruction."), }; @@ -149,9 +218,9 @@ pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(imm12.num_bits <= 12, "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::subs(rd.reg_no, rn.reg_no, imm12.value as u16, rd.num_bits).into() + DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() }, _ => panic!("Invalid operand combination to subs instruction."), }; @@ -181,6 +250,29 @@ mod tests { assert_eq!(format!("{:x}", cb), bytes); } + #[test] + fn test_imm_fits_bits() { + assert!(imm_fits_bits(i8::MAX.into(), 8)); + assert!(imm_fits_bits(i8::MIN.into(), 8)); + + assert!(imm_fits_bits(i16::MAX.into(), 16)); + assert!(imm_fits_bits(i16::MIN.into(), 16)); + + assert!(imm_fits_bits(i32::MAX.into(), 32)); + assert!(imm_fits_bits(i32::MIN.into(), 32)); + + assert!(imm_fits_bits(i64::MAX.into(), 64)); + assert!(imm_fits_bits(i64::MIN.into(), 64)); + } + + #[test] + fn test_uimm_fits_bits() { + assert!(uimm_fits_bits(u8::MAX.into(), 8)); + assert!(uimm_fits_bits(u16::MAX.into(), 16)); + assert!(uimm_fits_bits(u32::MAX.into(), 32)); + assert!(uimm_fits_bits(u64::MAX.into(), 64)); + } + #[test] fn test_add_register() { check_bytes("2000028b", |cb| add(cb, X0, X1, X2)); @@ -201,11 +293,21 @@ mod tests { check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7))); } + #[test] + fn test_bl() { + check_bytes("00040094", |cb| bl(cb, A64Opnd::new_imm(1024))); + } + #[test] fn test_br() { check_bytes("80021fd6", |cb| br(cb, X20)); } + #[test] + fn test_ldaddal() { + check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); + } + #[test] fn test_ldur() { check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(X1, 123))); @@ -231,6 +333,11 @@ mod tests { check_bytes("80025fd6", |cb| ret(cb, X20)); } + #[test] + fn test_stur() { + check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(X11, 128))); + } + #[test] fn test_sub_register() { check_bytes("200002cb", |cb| sub(cb, X0, X1, X2)); diff --git a/yjit/src/asm/arm64/inst/store.rs b/yjit/src/asm/arm64/inst/store.rs new file mode 100644 index 00000000000000..42b9055ae80622 --- /dev/null +++ b/yjit/src/asm/arm64/inst/store.rs @@ -0,0 +1,105 @@ +/// The size of the operands being operated on. +enum Size { + Size32 = 0b10, + Size64 = 0b11, +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 store instruction that can be encoded. +/// +/// STUR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 0 0 0 0 0 | +/// | size. imm9.......................... rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Store { + /// The number of the register to be transferred. + rt: u8, + + /// The register holding the memory location. + rn: u8, + + /// The optional signed immediate byte offset from the base register. + imm9: i16, + + /// The size of the operands being operated on. + size: Size +} + +impl Store { + /// STUR (store register, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en + pub fn stur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { + rt, + rn, + imm9, + size: num_bits.into() + } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Store) -> Self { + let imm9 = (inst.imm9 as u32) & ((1 << 9) - 1); + + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | (imm9 << 12) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Store) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_stur() { + let inst = Store::stur(0, 1, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8000020, result); + } + + #[test] + fn test_stur_negative_offset() { + let inst = Store::stur(0, 1, -1, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81ff020, result); + } + + #[test] + fn test_stur_positive_offset() { + let inst = Store::stur(0, 1, 255, 64); + let result: u32 = inst.into(); + assert_eq!(0xf80ff020, result); + } +} diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index ba8ecd166dcbb4..aa73d438fe7278 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -1,39 +1,5 @@ use crate::asm::{imm_num_bits, uimm_num_bits}; -/// This operand represents a signed immediate value. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct A64Imm -{ - // Size in bits - pub num_bits: u8, - - // The value of the immediate - pub value: i64 -} - -impl A64Imm { - pub fn new(value: i64) -> Self { - Self { num_bits: imm_num_bits(value), value } - } -} - -/// This operand represents an unsigned immediate value. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct A64UImm -{ - // Size in bits - pub num_bits: u8, - - // The value of the immediate - pub value: u64 -} - -impl A64UImm { - pub fn new(value: u64) -> Self { - Self { num_bits: uimm_num_bits(value), value } - } -} - /// This operand represents a register. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct A64Reg @@ -80,10 +46,10 @@ pub enum A64Opnd None, // Immediate value - Imm(A64Imm), + Imm(i64), // Unsigned immediate - UImm(A64UImm), + UImm(u64), // Register Reg(A64Reg), @@ -95,12 +61,12 @@ pub enum A64Opnd impl A64Opnd { /// Create a new immediate value operand. pub fn new_imm(value: i64) -> Self { - A64Opnd::Imm(A64Imm::new(value)) + A64Opnd::Imm(value) } /// Create a new unsigned immediate value operand. pub fn new_uimm(value: u64) -> Self { - A64Opnd::UImm(A64UImm::new(value)) + A64Opnd::UImm(value) } /// Creates a new memory operand. From 27fcab995e6dde19deb91dc6e291bdb72100af68 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 14 Jun 2022 13:41:53 -0400 Subject: [PATCH 324/546] Get side exits working, get miniruby to boot with threshold=1 --- yjit/src/codegen.rs | 110 ++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 45 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 4d5e73686def42..3589aaf579d463 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -397,74 +397,86 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { } } -/// Generate an exit to return to the interpreter -fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr { - let code_ptr = cb.get_write_ptr(); +// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit +// to the interpreter when it cannot service a stub by generating new code. +// Before coming here, branch_stub_hit() takes care of fully reconstructing +// interpreter state. +fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { + let ocb = ocb.unwrap(); + let code_ptr = ocb.get_write_ptr(); todo!(); /* - add_comment(cb, "exit to interpreter"); + gen_counter_incr!(ocb, exit_from_branch_stub); + + cpop(ocb, REG_SP); + cpop(ocb, REG_EC); + cpop(ocb, REG_CFP); + + mov(ocb, RAX, uimm_opnd(Qundef.into())); + ret(ocb); + + return code_ptr; + */ +} + +/// Generate an exit to return to the interpreter +fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, asm: &mut Assembler) { + asm.comment("exit to interpreter"); // Generate the code to exit to the interpreters // Write the adjusted SP back into the CFP if ctx.get_sp_offset() != 0 { - let stack_pointer = ctx.sp_opnd(0); - lea(cb, REG_SP, stack_pointer.into()); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG_SP); + let sp_opnd = asm.lea(ctx.sp_opnd(0)); + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), + sp_opnd + ); } // Update CFP->PC - mov(cb, RAX, const_ptr_opnd(exit_pc as *const u8)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), RAX); + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), + Opnd::const_ptr(exit_pc as *const u8) + ); // Accumulate stats about interpreter exits #[cfg(feature = "stats")] if get_option!(gen_stats) { - mov(cb, RDI, const_ptr_opnd(exit_pc as *const u8)); - call_ptr(cb, RSI, rb_yjit_count_side_exit_op as *const u8); + asm.ccall( + rb_yjit_count_side_exit_op as *const u8, + vec![Opnd::const_ptr(exit_pc as *const u8)] + ); // If --yjit-trace-exits option is enabled, record the exit stack // while recording the side exits. if get_option!(gen_trace_exits) { - mov(cb, C_ARG_REGS[0], const_ptr_opnd(exit_pc as *const u8)); - call_ptr(cb, REG0, rb_yjit_record_exit_stack as *const u8); + asm.ccall( + rb_yjit_record_exit_stack as *const u8, + vec![Opnd::const_ptr(exit_pc as *const u8)] + ); } } - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); - - mov(cb, RAX, uimm_opnd(Qundef.into())); - ret(cb); + asm.cpop(SP); + asm.cpop(EC); + asm.cpop(CFP); - return code_ptr; - */ + asm.cret(Qundef.into()); } -// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit -// to the interpreter when it cannot service a stub by generating new code. -// Before coming here, branch_stub_hit() takes care of fully reconstructing -// interpreter state. -fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { - let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); - - todo!(); - - /* - gen_counter_incr!(ocb, exit_from_branch_stub); +/// Generate an exit to the interpreter in the outlined code block +fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) -> CodePtr { + let mut cb = ocb.unwrap(); + let exit_code = cb.get_write_ptr(); + let mut asm = Assembler::new(); - cpop(ocb, REG_SP); - cpop(ocb, REG_EC); - cpop(ocb, REG_CFP); + gen_exit(exit_pc, ctx, &mut asm); - mov(ocb, RAX, uimm_opnd(Qundef.into())); - ret(ocb); + asm.compile(&mut cb); - return code_ptr; - */ + exit_code } // :side-exit: @@ -482,7 +494,7 @@ fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { fn get_side_exit(jit: &mut JITState, ocb: &mut OutlinedCb, ctx: &Context) -> CodePtr { match jit.side_exit_for_pc { None => { - let exit_code = gen_exit(jit.pc, ctx, ocb.unwrap()); + let exit_code = gen_outlined_exit(jit.pc, ctx, ocb); jit.side_exit_for_pc = Some(exit_code); exit_code } @@ -502,13 +514,13 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) { return; } + // If we're compiling the first instruction in the block. if jit.insn_idx == blockid.idx { - // We are compiling the first instruction in the block. // Generate the exit with the cache in jitstate. block.entry_exit = Some(get_side_exit(jit, ocb, &block_ctx)); } else { let pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx) }; - block.entry_exit = Some(gen_exit(pc, &block_ctx, ocb.unwrap())); + block.entry_exit = Some(gen_outlined_exit(jit.pc, &block_ctx, ocb)); } } @@ -705,10 +717,14 @@ fn jump_to_next_insn( // We are at the end of the current instruction. Record the boundary. if jit.record_boundary_patch_point { + todo!(); + + /* let next_insn = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) }; let exit_pos = gen_exit(next_insn, &reset_depth, ocb.unwrap()); record_global_inval_patch(cb, exit_pos); jit.record_boundary_patch_point = false; + */ } // Generate the jump instruction @@ -777,10 +793,14 @@ pub fn gen_single_block( // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { + todo!("record_boundary_patch_point"); + + /* // Generate an exit to this instruction and record it let exit_pos = gen_exit(jit.pc, &ctx, ocb.unwrap()); record_global_inval_patch(cb, exit_pos); jit.record_boundary_patch_point = false; + */ } // In debug mode, verify our existing assumption @@ -817,12 +837,12 @@ pub fn gen_single_block( // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE, // the exit this generates would be wrong. We could save a copy of the entry context // and assert that ctx is the same here. - let exit = gen_exit(jit.pc, &ctx, cb); + gen_exit(jit.pc, &ctx, &mut asm); // If this is the first instruction in the block, then we can use // the exit for block->entry_exit. if insn_idx == block.get_blockid().idx { - block.entry_exit = Some(exit); + block.entry_exit = block.get_start_addr(); } break; From e72dab304e423ad8c98656c146d52f6a8fa4a2c2 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 14 Jun 2022 15:45:41 -0400 Subject: [PATCH 325/546] Add atomic counter increment instruction --- yjit/src/backend/ir.rs | 4 ++++ yjit/src/backend/x86_64/mod.rs | 7 +++++++ yjit/src/codegen.rs | 29 +++++++---------------------- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index a077eb1945940f..10e04c8e2b0c53 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -98,6 +98,9 @@ pub enum Op // C function return CRet, + // Atomically increment a counter + IncrCounter, + // Trigger a debugger breakpoint Breakpoint, } @@ -688,6 +691,7 @@ def_push_2_opnd_no_out!(mov, Op::Mov); def_push_2_opnd_no_out!(cmp, Op::Cmp); def_push_2_opnd_no_out!(test, Op::Test); def_push_0_opnd_no_out!(breakpoint, Op::Breakpoint); +def_push_2_opnd_no_out!(incr_counter, Op::IncrCounter); // NOTE: these methods are temporary and will likely move // to context.rs later diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 748bf5aea04f1e..17571dd45b6f7e 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -183,6 +183,13 @@ impl Assembler Op::Je => je_label(cb, insn.target.unwrap().unwrap_label_idx()), + // Atomically increment a counter at a given memory location + Op::IncrCounter => { + assert!(matches!(insn.opnds[0], Opnd::Mem(_))); + write_lock_prefix(cb); + add(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Breakpoint => int3(cb), _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn.op) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 3589aaf579d463..580595d499cde2 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -223,12 +223,8 @@ macro_rules! gen_counter_incr { let ptr_reg = $asm.load(Opnd::const_ptr(ptr as *const u8)); let counter_opnd = Opnd::mem(64, ptr_reg, 0); - // FIXME: do we want an atomic add, or an atomic store or swap for arm? - //write_lock_prefix($cb); // for ractors. - // Increment and store the updated value - let incr_opnd = $asm.add(counter_opnd, 1.into()); - $asm.store(counter_opnd, incr_opnd); + $asm.incr_counter(counter_opnd, 1.into() ); } }; } @@ -408,7 +404,7 @@ fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { todo!(); /* - gen_counter_incr!(ocb, exit_from_branch_stub); + gen_counter_incr!(asm, exit_from_branch_stub); cpop(ocb, REG_SP); cpop(ocb, REG_EC); @@ -541,7 +537,7 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { call_ptr(cb, REG0, rb_full_cfunc_return as *const u8); // Count the exit - gen_counter_incr!(cb, traced_cfunc_return); + gen_counter_incr!(asm, traced_cfunc_return); // Return to the interpreter pop(cb, REG_SP); @@ -562,26 +558,16 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { let code_ptr = ocb.get_write_ptr(); let mut asm = Assembler::new(); - // Note, gen_leave() fully reconstructs interpreter state and leaves the - // return value in RAX before coming here. + // NOTE: gen_leave() fully reconstructs interpreter state and leaves the + // return value in C_RET_OPND before coming here. - // FIXME // Every exit to the interpreter should be counted - //gen_counter_incr!(ocb, leave_interp_return); + gen_counter_incr!(asm, leave_interp_return); asm.cpop(SP); asm.cpop(EC); asm.cpop(CFP); - // FIXME: we're currently assuming that the return value is in RAX, - // left there by gen_leave() ... - // - // What are our options? - // We could put the return value in C_RET_REG? - // Then call asm.ret with C_RET_REG? - - - asm.cret(C_RET_OPND); asm.compile(ocb); @@ -604,9 +590,8 @@ fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) { let pc_match = asm.new_label("pc_match"); asm.je(pc_match); - // FIXME // We're not starting at the first PC, so we need to exit. - //gen_counter_incr!(cb, leave_start_pc_non_zero); + gen_counter_incr!(asm, leave_start_pc_non_zero); asm.cpop(SP); asm.cpop(EC); From 9d18e6c300eacfdf11e166326156677e56fc6392 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 14 Jun 2022 15:50:48 -0400 Subject: [PATCH 326/546] Port gen_code_for_exit_from_stub() --- yjit/src/codegen.rs | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 580595d499cde2..95d29689fbd443 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -400,21 +400,19 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { let ocb = ocb.unwrap(); let code_ptr = ocb.get_write_ptr(); + let mut asm = Assembler::new(); - todo!(); - - /* gen_counter_incr!(asm, exit_from_branch_stub); - cpop(ocb, REG_SP); - cpop(ocb, REG_EC); - cpop(ocb, REG_CFP); + asm.cpop(SP); + asm.cpop(EC); + asm.cpop(CFP); + + asm.cret(Qundef.into()); - mov(ocb, RAX, uimm_opnd(Qundef.into())); - ret(ocb); + asm.compile(ocb); - return code_ptr; - */ + code_ptr } /// Generate an exit to return to the interpreter @@ -6186,7 +6184,7 @@ impl CodegenGlobals { let leave_exit_code = gen_leave_exit(&mut ocb); - //let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb); + let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb); // Generate full exit code for C func //let cfunc_exit_code = gen_full_cfunc_return(&mut ocb); From 4dbc1e1d825b4a50e3847de788da0ab6a8d860ae Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 14 Jun 2022 16:02:43 -0400 Subject: [PATCH 327/546] Port bitwise not, gen_check_ints() --- yjit/src/backend/ir.rs | 1 + yjit/src/backend/x86_64/mod.rs | 10 +++++----- yjit/src/codegen.rs | 25 ++++++++++--------------- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 10e04c8e2b0c53..86e590e846f324 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -681,6 +681,7 @@ def_push_jcc!(jnz, Op::Jnz); def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); +def_push_1_opnd!(not, Op::Not); def_push_1_opnd_no_out!(cpush, Op::CPush); def_push_1_opnd_no_out!(cpop, Op::CPop); def_push_1_opnd_no_out!(cret, Op::CRet); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 17571dd45b6f7e..a67cddb98ecf76 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -76,11 +76,11 @@ impl Assembler self.transform_insns(|asm, index, op, opnds, target| { match op { - Op::Add | Op::Sub | Op::And => { - match opnds.as_slice() { + Op::Add | Op::Sub | Op::And | Op::Not => { + match opnds[0] { // Instruction output whose live range spans beyond this instruction - [Opnd::InsnOut(out_idx), _] => { - if live_ranges[*out_idx] > index { + Opnd::InsnOut(out_idx) => { + if live_ranges[out_idx] > index { let opnd0 = asm.load(opnds[0]); asm.push_insn(op, vec![opnd0, opnds[1]], None); return; @@ -88,7 +88,7 @@ impl Assembler }, // We have to load memory and register operands to avoid corrupting them - [Opnd::Mem(_) | Opnd::Reg(_), _] => { + Opnd::Mem(_) | Opnd::Reg(_) => { let opnd0 = asm.load(opnds[0]); asm.push_insn(op, vec![opnd0, opnds[1]], None); return; diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 95d29689fbd443..c1ed715f0f0f9c 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -658,27 +658,22 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O return Some(code_ptr); } -/* // Generate code to check for interrupts and take a side-exit. // Warning: this function clobbers REG0 -fn gen_check_ints(cb: &mut CodeBlock, side_exit: CodePtr) { +fn gen_check_ints(asm: &mut Assembler, side_exit: CodePtr) { // Check for interrupts // see RUBY_VM_CHECK_INTS(ec) macro - add_comment(cb, "RUBY_VM_CHECK_INTS(ec)"); - mov( - cb, - REG0_32, - mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_MASK), - ); - not(cb, REG0_32); - test( - cb, - mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), - REG0_32, + asm.comment("RUBY_VM_CHECK_INTS(ec)"); + + let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); + + asm.test( + Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), + not_mask, ); - jnz_ptr(cb, side_exit); + + asm.jnz(Target::CodePtr(side_exit)); } -*/ // Generate a stubbed unconditional jump to the next bytecode instruction. // Blocks that are part of a guard chain can use this to share the same successor. From e743e3bf20a38d44888383393823b8776c2d1e90 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 14 Jun 2022 16:32:54 -0400 Subject: [PATCH 328/546] Remove unused code, add backend asm test --- yjit/src/backend/ir.rs | 56 +++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 86e590e846f324..1fca3a5b87bc43 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -153,7 +153,7 @@ impl Opnd disp: disp, }) }, - _ => unreachable!() + _ => unreachable!("memory operand with non-register base") } } @@ -169,42 +169,22 @@ impl From for Opnd { } } -impl From for Opnd { - fn from(value: VALUE) -> Self { - let VALUE(uimm) = value; - Opnd::UImm(uimm as u64) +impl From for Opnd { + fn from(value: u64) -> Self { + Opnd::UImm(value.try_into().unwrap()) } } -/// NOTE: this is useful during the port but can probably be removed once -/// Context returns ir::Opnd instead of X86Opnd -/// -/// Method to convert from an X86Opnd to an IR Opnd -impl From for Opnd { - fn from(opnd: X86Opnd) -> Self { - match opnd { - X86Opnd::None => Opnd::None, - X86Opnd::UImm(X86UImm{ value, .. }) => Opnd::UImm(value), - X86Opnd::Imm(X86Imm{ value, .. }) => Opnd::Imm(value), - - // General-purpose register - X86Opnd::Reg(reg) => { - Opnd::Reg(reg) - } - - // Memory operand with displacement - X86Opnd::Mem(X86Mem{ num_bits, base_reg_no, disp, idx_reg_no: None, scale_exp: 0 }) => { - let base_reg = Reg { num_bits: 64, reg_no: base_reg_no, reg_type: RegType::GP }; - - Opnd::Mem(Mem { - base_reg: base_reg, - disp, - num_bits - }) - } +impl From for Opnd { + fn from(value: i32) -> Self { + Opnd::Imm(value.try_into().unwrap()) + } +} - _ => panic!("unsupported x86 operand type") - } +impl From for Opnd { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + Opnd::UImm(uimm as u64) } } @@ -874,6 +854,16 @@ mod tests { asm.compile_with_regs(&mut cb, regs); } + // 64-bit values can't be written directly to memory, + // need to be split into one or more register movs first + #[test] + fn test_store_u64() + { + let (mut asm, mut cb, regs) = setup_asm(1); + asm.store(Opnd::mem(64, SP, 0), u64::MAX.into()); + asm.compile_with_regs(&mut cb, regs); + } + #[test] fn test_c_call() { From ae9bcfec8c237266e324fc1ae6a7a28390045b7e Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 15 Jun 2022 13:03:20 -0400 Subject: [PATCH 329/546] Add assert --- yjit/src/backend/x86_64/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index a67cddb98ecf76..8c34b55832bfc6 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -186,6 +186,7 @@ impl Assembler // Atomically increment a counter at a given memory location Op::IncrCounter => { assert!(matches!(insn.opnds[0], Opnd::Mem(_))); + assert!(matches!(insn.opnds[0], Opnd::UImm(_))); write_lock_prefix(cb); add(cb, insn.opnds[0].into(), insn.opnds[1].into()); }, From 401521ca14da1b740be9004cc4a344925dbf5fff Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 15 Jun 2022 13:24:21 -0400 Subject: [PATCH 330/546] Rename transform_insns to forward_pass --- yjit/src/backend/ir.rs | 6 +++--- yjit/src/backend/x86_64/mod.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 1fca3a5b87bc43..d14e3485aa598a 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -328,7 +328,7 @@ impl Assembler } /// Transform input instructions, consumes the input assembler - pub(super) fn transform_insns(mut self, mut map_insn: F) -> Assembler + pub(super) fn forward_pass(mut self, mut map_insn: F) -> Assembler where F: FnMut(&mut Assembler, usize, Op, Vec, Option) { let mut asm = Assembler { @@ -400,7 +400,7 @@ impl Assembler opnds.into_iter().map(|opnd| map_opnd(opnd, asm)).collect() } - self.transform_insns(|asm, _, op, opnds, target| { + self.forward_pass(|asm, _, op, opnds, target| { // Load heap object operands into registers because most // instructions can't directly work with 64-bit constants let opnds = load_gc_opnds(op, opnds, asm); @@ -478,7 +478,7 @@ impl Assembler let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - let asm = self.transform_insns(|asm, index, op, opnds, target| { + let asm = self.forward_pass(|asm, index, op, opnds, target| { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 8c34b55832bfc6..daa8005088ca45 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -74,7 +74,7 @@ impl Assembler { let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - self.transform_insns(|asm, index, op, opnds, target| { + self.forward_pass(|asm, index, op, opnds, target| { match op { Op::Add | Op::Sub | Op::And | Op::Not => { match opnds[0] { From 59b818ec8757348e3f7fa463ace36489c5ec75ac Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 15 Jun 2022 14:13:04 -0400 Subject: [PATCH 331/546] Add support for using InsnOut as memory operand base --- yjit/src/backend/ir.rs | 98 +++++++++++++++++++++++++++------- yjit/src/backend/x86_64/mod.rs | 14 +++-- 2 files changed, 88 insertions(+), 24 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index d14e3485aa598a..1f6307db9e4c40 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -105,12 +105,20 @@ pub enum Op Breakpoint, } +// Memory operand base +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum MemBase +{ + Reg(u8), + InsnOut(usize), +} + // Memory location #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub struct Mem { - // Base register - pub(super) base_reg: Reg, + // Base register number or instruction index + pub(super) base: MemBase, // Offset relative to the base pointer pub(super) disp: i32, @@ -148,11 +156,20 @@ impl Opnd Opnd::Reg(base_reg) => { assert!(base_reg.num_bits == 64); Opnd::Mem(Mem { + base: MemBase::Reg(base_reg.reg_no), + disp: disp, num_bits: num_bits, - base_reg: base_reg, + }) + }, + + Opnd::InsnOut(idx) => { + Opnd::Mem(Mem { + base: MemBase::InsnOut(idx), disp: disp, + num_bits: num_bits, }) }, + _ => unreachable!("memory operand with non-register base") } } @@ -161,6 +178,13 @@ impl Opnd pub fn const_ptr(ptr: *const u8) -> Self { Opnd::UImm(ptr as u64) } + + pub fn unwrap_reg(&self) -> Reg { + match self { + Opnd::Reg(reg) => *reg, + _ => unreachable!("trying to unwrap {:?} into reg", self) + } + } } impl From for Opnd { @@ -264,8 +288,14 @@ impl Assembler // one. let insn_idx = self.insns.len(); for opnd in &opnds { - if let Opnd::InsnOut(idx) = opnd { - self.live_ranges[*idx] = insn_idx; + match opnd { + Opnd::InsnOut(idx) => { + self.live_ranges[*idx] = insn_idx; + } + Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { + self.live_ranges[*idx] = insn_idx; + } + _ => {} } } @@ -483,22 +513,26 @@ impl Assembler // spans more than one instruction. In that case, return the // allocated register to the pool. for opnd in &opnds { - if let Opnd::InsnOut(idx) = opnd { - // Since we have an InsnOut, we know it spans more that one - // instruction. - let start_index = *idx; - assert!(start_index < index); - - // We're going to check if this is the last instruction that - // uses this operand. If it is, we can return the allocated - // register to the pool. - if live_ranges[start_index] == index { - if let Opnd::Reg(reg) = asm.insns[start_index].out { - dealloc_reg(&mut pool, ®s, ®); - } else { - unreachable!("no register allocated for insn"); + match opnd { + Opnd::InsnOut(idx) | Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { + // Since we have an InsnOut, we know it spans more that one + // instruction. + let start_index = *idx; + assert!(start_index < index); + + // We're going to check if this is the last instruction that + // uses this operand. If it is, we can return the allocated + // register to the pool. + if live_ranges[start_index] == index { + if let Opnd::Reg(reg) = asm.insns[start_index].out { + dealloc_reg(&mut pool, ®s, ®); + } else { + unreachable!("no register allocated for insn"); + } } } + + _ => {} } } @@ -541,7 +575,15 @@ impl Assembler // Replace InsnOut operands by their corresponding register let reg_opnds = opnds.into_iter().map(|opnd| match opnd { - Opnd::InsnOut(idx) => asm.insns[idx].out, + Opnd::InsnOut(idx) => asm.insns[idx].out, + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { + let out_reg = asm.insns[idx].out.unwrap_reg(); + Opnd::Mem(Mem { + base: MemBase::Reg(out_reg.reg_no), + disp, + num_bits + }) + } _ => opnd, } ).collect(); @@ -864,6 +906,22 @@ mod tests { asm.compile_with_regs(&mut cb, regs); } + // Use instruction output as base register for memory operand + #[test] + fn test_base_insn_out() + { + let (mut asm, mut cb, regs) = setup_asm(1); + + // Load the pointer into a register + let ptr_reg = asm.load(Opnd::const_ptr(0 as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // Increment and store the updated value + asm.incr_counter(counter_opnd, 1.into() ); + + asm.compile_with_regs(&mut cb, regs); + } + #[test] fn test_c_call() { diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index daa8005088ca45..0c23781e2009c9 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -6,7 +6,7 @@ use crate::asm::{CodeBlock}; use crate::asm::x86_64::*; use crate::codegen::{JITState}; use crate::cruby::*; -use crate::backend::ir::{Assembler, Opnd, Target, Op, Mem}; +use crate::backend::ir::{Assembler, Opnd, Target, Op, MemBase, Mem}; // Use the x86 register type for this platform pub type Reg = X86Reg; @@ -49,8 +49,14 @@ impl From for X86Opnd { Opnd::Reg(reg) => X86Opnd::Reg(reg), // Memory operand with displacement - Opnd::Mem(Mem{ num_bits, base_reg, disp }) => { - mem_opnd(num_bits, X86Opnd::Reg(base_reg), disp) + Opnd::Mem(Mem{ base: MemBase::Reg(reg_no), num_bits, disp }) => { + let reg = X86Reg { + reg_no, + num_bits: 64, + reg_type: RegType::GP + }; + + mem_opnd(num_bits, X86Opnd::Reg(reg), disp) } _ => panic!("unsupported x86 operand type") @@ -186,7 +192,7 @@ impl Assembler // Atomically increment a counter at a given memory location Op::IncrCounter => { assert!(matches!(insn.opnds[0], Opnd::Mem(_))); - assert!(matches!(insn.opnds[0], Opnd::UImm(_))); + assert!(matches!(insn.opnds[1], Opnd::UImm(_) | Opnd::Imm(_) ) ); write_lock_prefix(cb); add(cb, insn.opnds[0].into(), insn.opnds[1].into()); }, From 1923842b3dd97cf00d1511b7962a509dd650f06b Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 15 Jun 2022 16:07:38 -0400 Subject: [PATCH 332/546] Move backend tests to their own file --- yjit/src/backend/arm64/mod.rs | 5 +- yjit/src/backend/ir.rs | 234 ++------------------------------- yjit/src/backend/mod.rs | 4 +- yjit/src/backend/tests.rs | 221 +++++++++++++++++++++++++++++++ yjit/src/backend/x86_64/mod.rs | 2 +- 5 files changed, 238 insertions(+), 228 deletions(-) create mode 100644 yjit/src/backend/tests.rs diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 8685117c5f6dc3..be67e2384d1617 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -36,7 +36,10 @@ impl Assembler /// Get the list of registers from which we can allocate on this platform pub fn get_scratch_regs() -> Vec { - vec![X12_REG, X13_REG] + vec![ + X12_REG, + X13_REG + ] } /// Split platform-specific instructions diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 1f6307db9e4c40..4f1aafef998acb 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -604,8 +604,16 @@ impl Assembler /// compiling multiple blocks at a time? pub fn compile(self, cb: &mut CodeBlock) -> Vec { - let scratch_regs = Self::get_scratch_regs(); - self.compile_with_regs(cb, scratch_regs) + let alloc_regs = Self::get_alloc_regs(); + self.compile_with_regs(cb, alloc_regs) + } + + /// Compile with a limited number of registers + pub fn compile_with_num_regs(self, cb: &mut CodeBlock, num_regs: usize) -> Vec + { + let mut alloc_regs = Self::get_alloc_regs(); + let alloc_regs = alloc_regs.drain(0..num_regs).collect(); + self.compile_with_regs(cb, alloc_regs) } } @@ -741,225 +749,3 @@ impl Context self.stack_push_mapping((mapping, temp_type)).into() } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::cruby::*; - use crate::core::*; - use InsnOpnd::*; - - // Test that this function type checks - fn gen_dup( - ctx: &mut Context, - asm: &mut Assembler, - ) { - let dup_val = ctx.ir_stack_pop(0); - let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); - - let loc0 = ctx.ir_stack_push_mapping((mapping, tmp_type)); - asm.mov(loc0, dup_val); - } - - fn guard_object_is_heap( - asm: &mut Assembler, - object_opnd: Opnd, - ctx: &mut Context, - side_exit: CodePtr, - ) { - asm.comment("guard object is heap"); - - // Test that the object is not an immediate - asm.test(object_opnd.clone(), Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); - asm.jnz(Target::CodePtr(side_exit)); - - // Test that the object is not false or nil - asm.cmp(object_opnd.clone(), Opnd::UImm(Qnil.into())); - asm.jbe(Target::CodePtr(side_exit)); - } - - #[test] - fn test_add() { - let mut asm = Assembler::new(); - let out = asm.add(SP, Opnd::UImm(1)); - asm.add(out, Opnd::UImm(2)); - } - - #[test] - fn test_split_loads() { - let mut asm = Assembler::new(); - - let regs = Assembler::get_scratch_regs(); - - asm.add( - Opnd::mem(64, Opnd::Reg(regs[0]), 0), - Opnd::mem(64, Opnd::Reg(regs[1]), 0) - ); - - let result = asm.split_loads(); - assert_eq!(result.insns.len(), 2); - assert_eq!(result.insns[0].op, Op::Load); - } - - #[test] - fn test_alloc_regs() { - let mut asm = Assembler::new(); - - // Get the first output that we're going to reuse later. - let out1 = asm.add(EC, Opnd::UImm(1)); - - // Pad some instructions in to make sure it can handle that. - asm.add(EC, Opnd::UImm(2)); - - // Get the second output we're going to reuse. - let out2 = asm.add(EC, Opnd::UImm(3)); - - // Pad another instruction. - asm.add(EC, Opnd::UImm(4)); - - // Reuse both the previously captured outputs. - asm.add(out1, out2); - - // Now get a third output to make sure that the pool has registers to - // allocate now that the previous ones have been returned. - let out3 = asm.add(EC, Opnd::UImm(5)); - asm.add(out3, Opnd::UImm(6)); - - // Here we're going to allocate the registers. - let result = asm.alloc_regs(Assembler::get_scratch_regs()); - - // Now we're going to verify that the out field has been appropriately - // updated for each of the instructions that needs it. - let regs = Assembler::get_scratch_regs(); - assert_eq!(result.insns[0].out, Opnd::Reg(regs[0])); - assert_eq!(result.insns[2].out, Opnd::Reg(regs[1])); - assert_eq!(result.insns[5].out, Opnd::Reg(regs[0])); - } - - fn setup_asm(num_regs: usize) -> (Assembler, CodeBlock, Vec) { - let mut regs = Assembler::get_scratch_regs(); - - return ( - Assembler::new(), - CodeBlock::new_dummy(1024), - regs.drain(0..num_regs).collect() - ); - } - - // Test full codegen pipeline - #[test] - fn test_compile() - { - let (mut asm, mut cb, regs) = setup_asm(1); - - let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); - asm.add(out, Opnd::UImm(2)); - - asm.compile(&mut cb); - } - - // Test memory-to-memory move - #[test] - fn test_mov_mem2mem() - { - let (mut asm, mut cb, regs) = setup_asm(1); - - asm.comment("check that comments work too"); - asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); - - asm.compile_with_regs(&mut cb, regs); - } - - // Test load of register into new register - #[test] - fn test_load_reg() - { - let (mut asm, mut cb, regs) = setup_asm(1); - - let out = asm.load(SP); - asm.mov(Opnd::mem(64, SP, 0), out); - - asm.compile_with_regs(&mut cb, regs); - } - - // Multiple registers needed and register reuse - #[test] - fn test_reuse_reg() - { - let (mut asm, mut cb, regs) = setup_asm(2); - - let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); - let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); - let v2 = asm.add(v0, Opnd::UImm(1)); - asm.add(v0, v2); - - asm.compile_with_regs(&mut cb, regs); - } - - // 64-bit values can't be written directly to memory, - // need to be split into one or more register movs first - #[test] - fn test_store_u64() - { - let (mut asm, mut cb, regs) = setup_asm(1); - asm.store(Opnd::mem(64, SP, 0), u64::MAX.into()); - asm.compile_with_regs(&mut cb, regs); - } - - // Use instruction output as base register for memory operand - #[test] - fn test_base_insn_out() - { - let (mut asm, mut cb, regs) = setup_asm(1); - - // Load the pointer into a register - let ptr_reg = asm.load(Opnd::const_ptr(0 as *const u8)); - let counter_opnd = Opnd::mem(64, ptr_reg, 0); - - // Increment and store the updated value - asm.incr_counter(counter_opnd, 1.into() ); - - asm.compile_with_regs(&mut cb, regs); - } - - #[test] - fn test_c_call() - { - extern "sysv64" fn dummy_c_fun(v0: usize, v1: usize) - { - } - - let (mut asm, mut cb, regs) = setup_asm(2); - - asm.ccall( - dummy_c_fun as *const u8, - vec![Opnd::mem(64, SP, 0), Opnd::UImm(1)] - ); - - asm.compile_with_regs(&mut cb, regs); - } - - #[test] - fn test_lea_ret() - { - let (mut asm, mut cb, regs) = setup_asm(1); - - let addr = asm.lea(Opnd::mem(64, SP, 0)); - asm.cret(addr); - - asm.compile_with_regs(&mut cb, regs); - } - - #[test] - fn test_jcc_label() - { - let (mut asm, mut cb, regs) = setup_asm(1); - - let label = asm.new_label("foo"); - asm.cmp(EC, EC); - asm.je(label); - asm.write_label(label); - - asm.compile_with_regs(&mut cb, regs); - } -} diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs index a83fe4f69ebc1f..0841c9ffa5859c 100644 --- a/yjit/src/backend/mod.rs +++ b/yjit/src/backend/mod.rs @@ -1,3 +1,3 @@ pub mod x86_64; - -pub mod ir; \ No newline at end of file +pub mod ir; +mod tests; \ No newline at end of file diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs new file mode 100644 index 00000000000000..45b8fdfb8ad0ae --- /dev/null +++ b/yjit/src/backend/tests.rs @@ -0,0 +1,221 @@ +#![cfg(test)] + +use crate::asm::{CodeBlock}; +use crate::virtualmem::{CodePtr}; +use crate::backend::ir::*; +use crate::cruby::*; +use crate::core::*; +use InsnOpnd::*; + +// Test that this function type checks +fn gen_dup( + ctx: &mut Context, + asm: &mut Assembler, +) { + let dup_val = ctx.ir_stack_pop(0); + let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); + + let loc0 = ctx.ir_stack_push_mapping((mapping, tmp_type)); + asm.mov(loc0, dup_val); +} + +fn guard_object_is_heap( + asm: &mut Assembler, + object_opnd: Opnd, + ctx: &mut Context, + side_exit: CodePtr, +) { + asm.comment("guard object is heap"); + + // Test that the object is not an immediate + asm.test(object_opnd.clone(), Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); + asm.jnz(Target::CodePtr(side_exit)); + + // Test that the object is not false or nil + asm.cmp(object_opnd.clone(), Opnd::UImm(Qnil.into())); + asm.jbe(Target::CodePtr(side_exit)); +} + +#[test] +fn test_add() { + let mut asm = Assembler::new(); + let out = asm.add(SP, Opnd::UImm(1)); + asm.add(out, Opnd::UImm(2)); +} + +#[test] +fn test_split_loads() { + let mut asm = Assembler::new(); + + let regs = Assembler::get_alloc_regs(); + + asm.add( + Opnd::mem(64, Opnd::Reg(regs[0]), 0), + Opnd::mem(64, Opnd::Reg(regs[1]), 0) + ); + + let result = asm.split_loads(); + assert_eq!(result.insns.len(), 2); + assert_eq!(result.insns[0].op, Op::Load); +} + +#[test] +fn test_alloc_regs() { + let mut asm = Assembler::new(); + + // Get the first output that we're going to reuse later. + let out1 = asm.add(EC, Opnd::UImm(1)); + + // Pad some instructions in to make sure it can handle that. + asm.add(EC, Opnd::UImm(2)); + + // Get the second output we're going to reuse. + let out2 = asm.add(EC, Opnd::UImm(3)); + + // Pad another instruction. + asm.add(EC, Opnd::UImm(4)); + + // Reuse both the previously captured outputs. + asm.add(out1, out2); + + // Now get a third output to make sure that the pool has registers to + // allocate now that the previous ones have been returned. + let out3 = asm.add(EC, Opnd::UImm(5)); + asm.add(out3, Opnd::UImm(6)); + + // Here we're going to allocate the registers. + let result = asm.alloc_regs(Assembler::get_alloc_regs()); + + // Now we're going to verify that the out field has been appropriately + // updated for each of the instructions that needs it. + let regs = Assembler::get_alloc_regs(); + assert_eq!(result.insns[0].out, Opnd::Reg(regs[0])); + assert_eq!(result.insns[2].out, Opnd::Reg(regs[1])); + assert_eq!(result.insns[5].out, Opnd::Reg(regs[0])); +} + +fn setup_asm() -> (Assembler, CodeBlock) { + return ( + Assembler::new(), + CodeBlock::new_dummy(1024) + ); +} + +// Test full codegen pipeline +#[test] +fn test_compile() +{ + let (mut asm, mut cb) = setup_asm(); + let regs = Assembler::get_alloc_regs(); + + let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); + asm.add(out, Opnd::UImm(2)); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Test memory-to-memory move +#[test] +fn test_mov_mem2mem() +{ + let (mut asm, mut cb) = setup_asm(); + + asm.comment("check that comments work too"); + asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Test load of register into new register +#[test] +fn test_load_reg() +{ + let (mut asm, mut cb) = setup_asm(); + + let out = asm.load(SP); + asm.mov(Opnd::mem(64, SP, 0), out); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Multiple registers needed and register reuse +#[test] +fn test_reuse_reg() +{ + let (mut asm, mut cb) = setup_asm(); + + let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); + let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); + let v2 = asm.add(v0, Opnd::UImm(1)); + asm.add(v0, v2); + + asm.compile_with_num_regs(&mut cb, 2); +} + +// 64-bit values can't be written directly to memory, +// need to be split into one or more register movs first +#[test] +fn test_store_u64() +{ + let (mut asm, mut cb) = setup_asm(); + asm.store(Opnd::mem(64, SP, 0), u64::MAX.into()); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Use instruction output as base register for memory operand +#[test] +fn test_base_insn_out() +{ + let (mut asm, mut cb) = setup_asm(); + + // Load the pointer into a register + let ptr_reg = asm.load(Opnd::const_ptr(0 as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // Increment and store the updated value + asm.incr_counter(counter_opnd, 1.into() ); + + asm.compile_with_num_regs(&mut cb, 1); +} + +#[test] +fn test_c_call() +{ + extern "sysv64" fn dummy_c_fun(v0: usize, v1: usize) + { + } + + let (mut asm, mut cb) = setup_asm(); + + asm.ccall( + dummy_c_fun as *const u8, + vec![Opnd::mem(64, SP, 0), Opnd::UImm(1)] + ); + + asm.compile_with_num_regs(&mut cb, 2); +} + +#[test] +fn test_lea_ret() +{ + let (mut asm, mut cb) = setup_asm(); + + let addr = asm.lea(Opnd::mem(64, SP, 0)); + asm.cret(addr); + + asm.compile_with_num_regs(&mut cb, 1); +} + +#[test] +fn test_jcc_label() +{ + let (mut asm, mut cb) = setup_asm(); + + let label = asm.new_label("foo"); + asm.cmp(EC, EC); + asm.je(label); + asm.write_label(label); + + asm.compile_with_num_regs(&mut cb, 1); +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 0c23781e2009c9..4d2437837049e7 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -67,7 +67,7 @@ impl From for X86Opnd { impl Assembler { /// Get the list of registers from which we can allocate on this platform - pub fn get_scratch_regs() -> Vec + pub fn get_alloc_regs() -> Vec { vec![ RAX_REG, From abea8c8983aef411f7e6a2afbfbc25e31b2f5486 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 15 Jun 2022 16:16:33 -0400 Subject: [PATCH 333/546] Add stores to one of the tests --- yjit/src/backend/ir.rs | 2 ++ yjit/src/backend/tests.rs | 8 ++++++-- yjit/src/backend/x86_64/mod.rs | 3 +++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 4f1aafef998acb..f1146f152e6109 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -86,6 +86,7 @@ pub enum Op // Low-level conditional jump instructions Jbe, Je, + Jz, Jnz, // Push and pop registers to/from the C stack @@ -707,6 +708,7 @@ macro_rules! def_push_2_opnd_no_out { def_push_1_opnd_no_out!(jmp_opnd, Op::JmpOpnd); def_push_jcc!(je, Op::Je); def_push_jcc!(jbe, Op::Jbe); +def_push_jcc!(jz, Op::Jz); def_push_jcc!(jnz, Op::Jnz); def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 45b8fdfb8ad0ae..afb4c42fa89cfe 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -146,8 +146,12 @@ fn test_reuse_reg() let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); - let v2 = asm.add(v0, Opnd::UImm(1)); - asm.add(v0, v2); + + let v2 = asm.add(v0, Opnd::UImm(1)); // Reuse v1 register + let v3 = asm.add(v0, v2); + + asm.store(Opnd::mem(64, SP, 0), v2); + asm.store(Opnd::mem(64, SP, 8), v3); asm.compile_with_num_regs(&mut cb, 2); } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 4d2437837049e7..7dcf41600d1f28 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -187,7 +187,10 @@ impl Assembler Op::JmpOpnd => jmp_rm(cb, insn.opnds[0].into()), + // Conditional jump to a label Op::Je => je_label(cb, insn.target.unwrap().unwrap_label_idx()), + Op::Jz => jz_label(cb, insn.target.unwrap().unwrap_label_idx()), + Op::Jnz => jnz_label(cb, insn.target.unwrap().unwrap_label_idx()), // Atomically increment a counter at a given memory location Op::IncrCounter => { From 40ac79ada89953f5ed9e36bdb5b3d0d3ae8d8f3e Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 15 Jun 2022 16:17:15 -0400 Subject: [PATCH 334/546] Add bitwise and to x86 backend --- yjit/src/backend/x86_64/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 7dcf41600d1f28..e4f8745583d934 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -134,6 +134,10 @@ impl Assembler add(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, + Op::And => { + and(cb, insn.opnds[0].into(), insn.opnds[1].into()) + }, + Op::Store => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), // This assumes only load instructions can contain references to GC'd Value operands From b8fc9909bfa18677d5ad2cb2535ca1f0537216e5 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 15 Jun 2022 16:30:40 -0400 Subject: [PATCH 335/546] Get rid of temporary context methods --- yjit/src/backend/ir.rs | 26 -------------------------- yjit/src/backend/tests.rs | 4 ++-- yjit/src/codegen.rs | 18 +++++++++--------- 3 files changed, 11 insertions(+), 37 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index f1146f152e6109..a441303d64cb98 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -725,29 +725,3 @@ def_push_2_opnd_no_out!(cmp, Op::Cmp); def_push_2_opnd_no_out!(test, Op::Test); def_push_0_opnd_no_out!(breakpoint, Op::Breakpoint); def_push_2_opnd_no_out!(incr_counter, Op::IncrCounter); - -// NOTE: these methods are temporary and will likely move -// to context.rs later -// They are just wrappers to convert from X86Opnd into the IR Opnd type -impl Context -{ - pub fn ir_sp_opnd(&mut self, idx: isize) -> Opnd { - self.sp_opnd(idx).into() - } - - pub fn ir_stack_opnd(&mut self, idx: i32) -> Opnd { - self.stack_opnd(idx).into() - } - - pub fn ir_stack_pop(&mut self, n: usize) -> Opnd { - self.stack_pop(n).into() - } - - pub fn ir_stack_push(&mut self, val_type: Type) -> Opnd { - self.stack_push(val_type).into() - } - - pub fn ir_stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> Opnd { - self.stack_push_mapping((mapping, temp_type)).into() - } -} diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index afb4c42fa89cfe..7b2f3574558937 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -12,10 +12,10 @@ fn gen_dup( ctx: &mut Context, asm: &mut Assembler, ) { - let dup_val = ctx.ir_stack_pop(0); + let dup_val = ctx.stack_pop(0); let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); - let loc0 = ctx.ir_stack_push_mapping((mapping, tmp_type)); + let loc0 = ctx.stack_push_mapping((mapping, tmp_type)); asm.mov(loc0, dup_val); } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index c1ed715f0f0f9c..b58caa0984cd31 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -277,7 +277,7 @@ fn jit_save_pc(jit: &JITState, asm: &mut Assembler) { /// which could invalidate memory operands fn gen_save_sp(jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { if ctx.get_sp_offset() != 0 { - let stack_pointer = ctx.ir_sp_opnd(0); + let stack_pointer = ctx.sp_opnd(0); let sp_addr = asm.lea(stack_pointer); asm.mov(SP, sp_addr); let cfp_sp_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); @@ -897,10 +897,10 @@ fn gen_dup( _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let dup_val = ctx.ir_stack_pop(0); + let dup_val = ctx.stack_pop(0); let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); - let loc0 = ctx.ir_stack_push_mapping((mapping, tmp_type)); + let loc0 = ctx.stack_push_mapping((mapping, tmp_type)); asm.mov(loc0, dup_val); KeepCompiling @@ -924,16 +924,16 @@ fn gen_dupn( return CantCompile; } - let opnd1: Opnd = ctx.ir_stack_opnd(1); - let opnd0: Opnd = ctx.ir_stack_opnd(0); + let opnd1: Opnd = ctx.stack_opnd(1); + let opnd0: Opnd = ctx.stack_opnd(0); let mapping1 = ctx.get_opnd_mapping(StackOpnd(1)); let mapping0 = ctx.get_opnd_mapping(StackOpnd(0)); - let dst1: Opnd = ctx.ir_stack_push_mapping(mapping1); + let dst1: Opnd = ctx.stack_push_mapping(mapping1); asm.mov(dst1, opnd1); - let dst0: Opnd = ctx.ir_stack_push_mapping(mapping0); + let dst0: Opnd = ctx.stack_push_mapping(mapping0); asm.mov(dst0, opnd0); KeepCompiling @@ -957,8 +957,8 @@ fn stack_swap( offset0: u16, offset1: u16, ) { - let stack0_mem = ctx.ir_stack_opnd(offset0 as i32); - let stack1_mem = ctx.ir_stack_opnd(offset1 as i32); + let stack0_mem = ctx.stack_opnd(offset0 as i32); + let stack1_mem = ctx.stack_opnd(offset1 as i32); let mapping0 = ctx.get_opnd_mapping(StackOpnd(offset0)); let mapping1 = ctx.get_opnd_mapping(StackOpnd(offset1)); From 4932a6ef755ae4cc473666c8757f7c51ac8c2902 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 16 Jun 2022 11:20:54 -0400 Subject: [PATCH 336/546] Fix small bug in x86_split --- yjit/src/backend/tests.rs | 19 +++++++++++++++++++ yjit/src/backend/x86_64/mod.rs | 10 ++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 7b2f3574558937..902d9eeebcab5b 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -223,3 +223,22 @@ fn test_jcc_label() asm.compile_with_num_regs(&mut cb, 1); } + +#[test] +fn test_jcc_ptr() +{ + let (mut asm, mut cb) = setup_asm(); + + // FIXME + /* + let side_exit = Target::CodePtr((5 as *mut u8).into()); + let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); + asm.test( + Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), + not_mask, + ); + asm.jnz(side_exit); + */ + + asm.compile_with_num_regs(&mut cb, 1); +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index e4f8745583d934..819dad1209099c 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -88,7 +88,9 @@ impl Assembler Opnd::InsnOut(out_idx) => { if live_ranges[out_idx] > index { let opnd0 = asm.load(opnds[0]); - asm.push_insn(op, vec![opnd0, opnds[1]], None); + let mut new_opnds = vec![opnd0]; + new_opnds.extend_from_slice(&opnds[1..]); + asm.push_insn(op, new_opnds, None); return; } }, @@ -96,7 +98,9 @@ impl Assembler // We have to load memory and register operands to avoid corrupting them Opnd::Mem(_) | Opnd::Reg(_) => { let opnd0 = asm.load(opnds[0]); - asm.push_insn(op, vec![opnd0, opnds[1]], None); + let mut new_opnds = vec![opnd0]; + new_opnds.extend_from_slice(&opnds[1..]); + asm.push_insn(op, new_opnds, None); return; }, @@ -113,6 +117,8 @@ impl Assembler /// Emit platform-specific machine code pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Vec { + //dbg!(&self.insns); + // List of GC offsets let mut gc_offsets: Vec = Vec::new(); From 084d4bb19228eed3e89663a2d24ab3accaa1a4ee Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 16 Jun 2022 15:25:27 -0400 Subject: [PATCH 337/546] Implement X86Reg::sub_reg() method --- yjit/src/asm/x86_64/mod.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 399c2e8c7e821d..ca2a2f6e1fa7e4 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -88,6 +88,25 @@ pub enum X86Opnd IPRel(i32) } +impl X86Reg { + fn sub_reg(&self, num_bits: u8) -> Self { + assert!( + num_bits == 8 || + num_bits == 16 || + num_bits == 32 || + num_bits == 64 + ); + + assert!(num_bits <= self.num_bits); + + Self { + num_bits, + reg_type: self.reg_type, + reg_no: self.reg_no + } + } +} + impl X86Opnd { fn rex_needed(&self) -> bool { match self { From 67de662c4419d4d115f40f8d3ed0c295b581dcf0 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 16 Jun 2022 15:34:17 -0400 Subject: [PATCH 338/546] Add Opnd.rm_num_bits() method --- yjit/src/backend/ir.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index a441303d64cb98..785ea7a9aabd12 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -175,17 +175,27 @@ impl Opnd } } - /// Constant pointer operand + /// Constructor for constant pointer operand pub fn const_ptr(ptr: *const u8) -> Self { Opnd::UImm(ptr as u64) } + /// Unwrap a register operand pub fn unwrap_reg(&self) -> Reg { match self { Opnd::Reg(reg) => *reg, _ => unreachable!("trying to unwrap {:?} into reg", self) } } + + /// Get the size in bits for register/memory operands + pub fn rm_num_bits(&self) -> u8 { + match self { + Opnd::Reg(reg) => reg.num_bits, + Opnd::Mem(mem) => mem.num_bits, + _ => unreachable!() + } + } } impl From for Opnd { From eb4c7b4ea55c5a4c593bea4bba9aa1e9093b3447 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 16 Jun 2022 14:28:51 -0400 Subject: [PATCH 339/546] AND/ANDS for A64 (https://github.com/Shopify/ruby/pull/300) --- yjit/src/asm/arm64/inst/bitmask_imm.rs | 250 +++++++++++++++++++++++++ yjit/src/asm/arm64/inst/logical_imm.rs | 97 ++++++++++ yjit/src/asm/arm64/inst/mod.rs | 2 + 3 files changed, 349 insertions(+) create mode 100644 yjit/src/asm/arm64/inst/bitmask_imm.rs create mode 100644 yjit/src/asm/arm64/inst/logical_imm.rs diff --git a/yjit/src/asm/arm64/inst/bitmask_imm.rs b/yjit/src/asm/arm64/inst/bitmask_imm.rs new file mode 100644 index 00000000000000..7e5a21c7b4dce5 --- /dev/null +++ b/yjit/src/asm/arm64/inst/bitmask_imm.rs @@ -0,0 +1,250 @@ +/// Immediates used by the logical immediate instructions are not actually the +/// immediate value, but instead are encoded into a 13-bit wide mask of 3 +/// elements. This allows many more values to be represented than 13 bits would +/// normally allow, at the expense of not being able to represent every possible +/// value. +/// +/// In order for a number to be encodeable in this form, the binary +/// representation must consist of a single set of contiguous 1s. That pattern +/// must then be replicatable across all of the bits either 1, 2, 4, 8, 16, or +/// 32 times (rotated or not). +/// +/// For example, 1 (0b1), 2 (0b10), 3 (0b11), and 4 (0b100) are all valid. +/// However, 5 (0b101) is invalid, because it contains 2 sets of 1s and cannot +/// be replicated across 64 bits. +/// +/// Some more examples to illustrate the idea of replication: +/// * 0x5555555555555555 is a valid value (0b0101...) because it consists of a +/// single set of 1s which can be replicated across all of the bits 32 times. +/// * 0xf0f0f0f0f0f0f0f0 is a valid value (0b1111000011110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 8 times (rotated by 4 bits). +/// * 0x0ff00ff00ff00ff0 is a valid value (0000111111110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 4 times (rotated by 12 bits). +/// +/// To encode the values, there are 3 elements: +/// * n = 1 if the pattern is 64-bits wide, 0 otherwise +/// * imms = the size of the pattern, a 0, and then one less than the number of +/// sequential 1s +/// * immr = the number of right rotations to apply to the pattern to get the +/// target value +/// +pub struct BitmaskImmediate { + n: u8, + imms: u8, + immr: u8 +} + +impl TryFrom for BitmaskImmediate { + type Error = (); + + /// Attempt to convert a u64 into a BitmaskImm. + fn try_from(value: u64) -> Result { + /// Is this number's binary representation all 1s? + fn is_mask(imm: u64) -> bool { + if imm == u64::MAX { true } else { ((imm + 1) & imm) == 0 } + } + + /// Is this number's binary representation one or more 1s followed by one or + /// more 0s? + fn is_shifted_mask(imm: u64) -> bool { + is_mask((imm - 1) | imm) + } + + let mut imm = value; + let mut size = 64; + + // First, determine the element size. + loop { + size >>= 1; + let mask = (1 << size) - 1; + + if (imm & mask) != ((imm >> size) & mask) { + size <<= 1; + break; + } + + if size <= 2 { + break; + } + } + + // Second, determine the rotation to make the pattern be aligned such + // that all of the least significant bits are 1. + let trailing_ones: u32; + let left_rotations: u32; + + let mask = u64::MAX >> (64 - size); + imm &= mask; + + if is_shifted_mask(imm) { + left_rotations = imm.trailing_zeros(); + assert!(left_rotations < 64); + trailing_ones = (imm >> left_rotations).trailing_ones(); + } else { + imm |= !mask; + if !is_shifted_mask(!imm) { + return Err(()); + } + + let leading_ones = imm.leading_ones(); + left_rotations = 64 - leading_ones; + trailing_ones = leading_ones + imm.trailing_ones() - (64 - size); + } + + // immr is the number of right rotations it takes to get from the + // matching unrotated pattern to the target value. + let immr = (size - left_rotations) & (size - 1); + assert!(size > left_rotations); + + // imms is encoded as the size of the pattern, a 0, and then one less + // than the number of sequential 1s. The table below shows how this is + // encoded. (Note that the way it works out, it's impossible for every x + // in a row to be 1 at the same time). + // +-------------+--------------+--------------+ + // | imms | element size | number of 1s | + // +-------------+--------------+--------------+ + // | 1 1 1 1 0 x | 2 bits | 1 | + // | 1 1 1 0 x x | 4 bits | 1-3 | + // | 1 1 0 x x x | 8 bits | 1-7 | + // | 1 0 x x x x | 16 bits | 1-15 | + // | 0 x x x x x | 32 bits | 1-31 | + // | x x x x x x | 64 bits | 1-63 | + // +-------------+--------------+--------------+ + let imms = (!(size - 1) << 1) | (trailing_ones - 1); + + // n is 1 if the element size is 64-bits, and 0 otherwise. + let n = ((imms >> 6) & 1) ^ 1; + + Ok(BitmaskImmediate { + n: n as u8, + imms: (imms & 0x3f) as u8, + immr: (immr & 0x3f) as u8 + }) + } +} + +impl From for u32 { + /// Encode a bitmask immediate into a 32-bit value. + fn from(bitmask: BitmaskImmediate) -> Self { + 0 + | (((bitmask.n as u32) & 1) << 12) + | (bitmask.immr << 6) as u32 + | bitmask.imms as u32 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_failures() { + vec![5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| { + assert!(BitmaskImmediate::try_from(imm).is_err()); + }); + } + + #[test] + fn test_size_2_minimum() { + let bitmask = BitmaskImmediate::try_from(0x5555555555555555); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111100 }))); + } + + #[test] + fn test_size_2_maximum() { + let bitmask = BitmaskImmediate::try_from(0xaaaaaaaaaaaaaaaa); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000001, imms: 0b111100 }))); + } + + #[test] + fn test_size_4_minimum() { + let bitmask = BitmaskImmediate::try_from(0x1111111111111111); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111000 }))); + } + + #[test] + fn test_size_4_rotated() { + let bitmask = BitmaskImmediate::try_from(0x6666666666666666); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111001 }))); + } + + #[test] + fn test_size_4_maximum() { + let bitmask = BitmaskImmediate::try_from(0xeeeeeeeeeeeeeeee); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111010 }))); + } + + #[test] + fn test_size_8_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0101010101010101); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b110000 }))); + } + + #[test] + fn test_size_8_rotated() { + let bitmask = BitmaskImmediate::try_from(0x1818181818181818); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000101, imms: 0b110001 }))); + } + + #[test] + fn test_size_8_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfefefefefefefefe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000111, imms: 0b110110 }))); + } + + #[test] + fn test_size_16_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0001000100010001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b100000 }))); + } + + #[test] + fn test_size_16_rotated() { + let bitmask = BitmaskImmediate::try_from(0xff8fff8fff8fff8f); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001001, imms: 0b101100 }))); + } + + #[test] + fn test_size_16_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffefffefffefffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001111, imms: 0b101110 }))); + } + + #[test] + fn test_size_32_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000100000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_32_rotated() { + let bitmask = BitmaskImmediate::try_from(0x3fffff003fffff00); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011000, imms: 0b010101 }))); + } + + #[test] + fn test_size_32_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffefffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011111, imms: 0b011110 }))); + } + + #[test] + fn test_size_64_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000000000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_64_rotated() { + let bitmask = BitmaskImmediate::try_from(0x0000001fffff0000); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b110000, imms: 0b010100 }))); + } + + #[test] + fn test_size_64_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffffffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b111111, imms: 0b111110 }))); + } +} diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs new file mode 100644 index 00000000000000..63a4556d852e6c --- /dev/null +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -0,0 +1,97 @@ +use super::bitmask_imm::BitmaskImmediate; +use super::sf::Sf; + +// Which operation to perform. +enum Opc { + /// The AND operation. + And = 0b00, + + /// The ANDS operation. + Ands = 0b11 +} + +/// The struct that represents an A64 bitwise immediate instruction that can be +/// encoded. +/// +/// AND/ANDS (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 0 0 | +/// | sf opc.. N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LogicalImm { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The immediate value to test. + imm: BitmaskImmediate, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl LogicalImm { + /// AND (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en + pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() } + } + + /// ANDS (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en + pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm +const FAMILY: u32 = 0b1001; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LogicalImm) -> Self { + let imm: u32 = inst.imm.into(); + + 0 + | ((inst.sf as u32) << 31) + | ((inst.opc as u32) << 29) + | (FAMILY << 25) + | (imm << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LogicalImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_and() { + let inst = LogicalImm::and(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0x92400820, result); + } + + #[test] + fn test_ands() { + let inst = LogicalImm::ands(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf2400820, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index c96e9328ff04f0..83cdd26d1dbcb9 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -1,9 +1,11 @@ mod atomic; +mod bitmask_imm; mod branch; mod call; mod data_imm; mod data_reg; mod load; +mod logical_imm; mod mov; mod sf; mod store; From 57e64f70c0af7a19b4cf68462ea2467286f4e9cb Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 20 Jun 2022 11:50:10 -0400 Subject: [PATCH 340/546] Make sure allocated reg size in bits matches insn out size --- yjit/src/asm/x86_64/mod.rs | 2 +- yjit/src/backend/ir.rs | 88 +++++++++++++++++++++++++--------- yjit/src/backend/tests.rs | 3 -- yjit/src/backend/x86_64/mod.rs | 18 +++++-- 4 files changed, 80 insertions(+), 31 deletions(-) diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index ca2a2f6e1fa7e4..9869b79e233849 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -89,7 +89,7 @@ pub enum X86Opnd } impl X86Reg { - fn sub_reg(&self, num_bits: u8) -> Self { + pub fn sub_reg(&self, num_bits: u8) -> Self { assert!( num_bits == 8 || num_bits == 16 || diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 785ea7a9aabd12..6789720238a1f8 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -100,6 +100,8 @@ pub enum Op CRet, // Atomically increment a counter + // Input: memory operand, increment value + // Produces no output IncrCounter, // Trigger a debugger breakpoint @@ -134,19 +136,17 @@ pub enum Opnd { None, // For insns with no output - // NOTE: for now Context directly returns memory operands, - // but eventually we'd like to have Stack and Local operand types - //Stack(u16), // Value on the temp stack (idx) - //Local(u16), // Local variable (idx, do we need depth too?) + // Immediate Ruby value, may be GC'd, movable + Value(VALUE), - Value(VALUE), // Immediate Ruby value, may be GC'd, movable - InsnOut(usize), // Output of a preceding instruction in this block + // Output of a preceding instruction in this block + InsnOut{ idx: usize, num_bits: u8 }, // Low-level operands, for lowering Imm(i64), // Raw signed immediate UImm(u64), // Raw unsigned immediate - Mem(Mem), // Memory location (num_bits, base_ptr, const_offset) - Reg(Reg), // Machine register (num_bits, idx) + Mem(Mem), // Memory location + Reg(Reg), // Machine register } impl Opnd @@ -163,7 +163,8 @@ impl Opnd }) }, - Opnd::InsnOut(idx) => { + Opnd::InsnOut{idx, num_bits } => { + assert!(num_bits == 64); Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp: disp, @@ -180,6 +181,13 @@ impl Opnd Opnd::UImm(ptr as u64) } + pub fn is_some(&self) -> bool { + match *self { + Opnd::None => false, + _ => true, + } + } + /// Unwrap a register operand pub fn unwrap_reg(&self) -> Reg { match self { @@ -190,9 +198,10 @@ impl Opnd /// Get the size in bits for register/memory operands pub fn rm_num_bits(&self) -> u8 { - match self { + match *self { Opnd::Reg(reg) => reg.num_bits, Opnd::Mem(mem) => mem.num_bits, + Opnd::InsnOut{ num_bits, .. } => num_bits, _ => unreachable!() } } @@ -294,13 +303,15 @@ impl Assembler /// Append an instruction to the list pub(super) fn push_insn(&mut self, op: Op, opnds: Vec, target: Option) -> Opnd { + // Index of this instruction + let insn_idx = self.insns.len(); + // If we find any InsnOut from previous instructions, we're going to // update the live range of the previous instruction to point to this // one. - let insn_idx = self.insns.len(); for opnd in &opnds { match opnd { - Opnd::InsnOut(idx) => { + Opnd::InsnOut{ idx, .. } => { self.live_ranges[*idx] = insn_idx; } Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { @@ -310,11 +321,36 @@ impl Assembler } } + let mut out_num_bits: u8 = 0; + + for opnd in &opnds { + match *opnd { + Opnd::InsnOut{ num_bits, .. } | + Opnd::Mem(Mem { num_bits, .. }) | + Opnd::Reg(Reg { num_bits, .. }) => { + if out_num_bits == 0 { + out_num_bits = num_bits + } + else if out_num_bits != num_bits { + panic!("operands of incompatible sizes"); + } + } + _ => {} + } + } + + if out_num_bits == 0 { + out_num_bits = 64; + } + + // Operand for the output of this instruction + let out_opnd = Opnd::InsnOut{ idx: insn_idx, num_bits: out_num_bits }; + let insn = Insn { op: op, text: None, opnds: opnds, - out: Opnd::None, + out: out_opnd, target: target, pos: None }; @@ -323,7 +359,7 @@ impl Assembler self.live_ranges.push(insn_idx); // Return an operand for the output of this instruction - Opnd::InsnOut(insn_idx) + out_opnd } /// Add a comment at the current position @@ -385,8 +421,8 @@ impl Assembler // Map an operand to the next set of instructions by correcting previous // InsnOut indices. fn map_opnd(opnd: Opnd, indices: &mut Vec) -> Opnd { - if let Opnd::InsnOut(index) = opnd { - Opnd::InsnOut(indices[index]) + if let Opnd::InsnOut{ idx, num_bits } = opnd { + Opnd::InsnOut{ idx: indices[idx], num_bits } } else { opnd } @@ -503,7 +539,7 @@ impl Assembler // Allocate a specific register fn take_reg(pool: &mut u32, regs: &Vec, reg: &Reg) -> Reg { - let reg_index = regs.iter().position(|elem| elem == reg).unwrap(); + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no).unwrap(); assert_eq!(*pool & (1 << reg_index), 0); *pool |= 1 << reg_index; return regs[reg_index]; @@ -513,7 +549,7 @@ impl Assembler // returned as it is no longer used by the instruction that previously // held it. fn dealloc_reg(pool: &mut u32, regs: &Vec, reg: &Reg) { - let reg_index = regs.iter().position(|elem| elem == reg).unwrap(); + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no).unwrap(); *pool &= !(1 << reg_index); } @@ -525,7 +561,8 @@ impl Assembler // allocated register to the pool. for opnd in &opnds { match opnd { - Opnd::InsnOut(idx) | Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { + Opnd::InsnOut{idx, .. } | + Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { // Since we have an InsnOut, we know it spans more that one // instruction. let start_index = *idx; @@ -568,7 +605,7 @@ impl Assembler // e.g. out = add(reg0, reg1) // reg0 = add(reg0, reg1) if opnds.len() > 0 { - if let Opnd::InsnOut(idx) = opnds[0] { + if let Opnd::InsnOut{idx, ..} = opnds[0] { if live_ranges[idx] == index { if let Opnd::Reg(reg) = asm.insns[idx].out { out_reg = Opnd::Reg(take_reg(&mut pool, ®s, ®)) @@ -584,9 +621,9 @@ impl Assembler } // Replace InsnOut operands by their corresponding register - let reg_opnds = opnds.into_iter().map(|opnd| + let reg_opnds: Vec = opnds.into_iter().map(|opnd| match opnd { - Opnd::InsnOut(idx) => asm.insns[idx].out, + Opnd::InsnOut{idx, ..} => asm.insns[idx].out, Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { let out_reg = asm.insns[idx].out.unwrap_reg(); Opnd::Mem(Mem { @@ -603,7 +640,12 @@ impl Assembler // Set the output register for this instruction let num_insns = asm.insns.len(); - asm.insns[num_insns - 1].out = out_reg; + let mut new_insn = &mut asm.insns[num_insns - 1]; + if let Opnd::Reg(reg) = out_reg { + let num_out_bits = new_insn.out.rm_num_bits(); + out_reg = Opnd::Reg(reg.sub_reg(num_out_bits)) + } + new_insn.out = out_reg; }); assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 902d9eeebcab5b..747e7eb2b5d8ee 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -229,8 +229,6 @@ fn test_jcc_ptr() { let (mut asm, mut cb) = setup_asm(); - // FIXME - /* let side_exit = Target::CodePtr((5 as *mut u8).into()); let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); asm.test( @@ -238,7 +236,6 @@ fn test_jcc_ptr() not_mask, ); asm.jnz(side_exit); - */ asm.compile_with_num_regs(&mut cb, 1); } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 819dad1209099c..bca1eda8552126 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -38,7 +38,7 @@ impl From for X86Opnd { //Value(VALUE), // Immediate Ruby value, may be GC'd, movable //InsnOut(usize), // Output of a preceding instruction in this block - Opnd::InsnOut(idx) => panic!("InsnOut operand made it past register allocation"), + Opnd::InsnOut{..} => panic!("InsnOut operand made it past register allocation"), Opnd::None => X86Opnd::None, @@ -85,8 +85,8 @@ impl Assembler Op::Add | Op::Sub | Op::And | Op::Not => { match opnds[0] { // Instruction output whose live range spans beyond this instruction - Opnd::InsnOut(out_idx) => { - if live_ranges[out_idx] > index { + Opnd::InsnOut{idx, ..} => { + if live_ranges[idx] > index { let opnd0 = asm.load(opnds[0]); let mut new_opnds = vec![opnd0]; new_opnds.extend_from_slice(&opnds[1..]); @@ -144,6 +144,10 @@ impl Assembler and(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, + Op::Not => { + not(cb, insn.opnds[0].into()) + }, + Op::Store => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), // This assumes only load instructions can contain references to GC'd Value operands @@ -200,7 +204,13 @@ impl Assembler // Conditional jump to a label Op::Je => je_label(cb, insn.target.unwrap().unwrap_label_idx()), Op::Jz => jz_label(cb, insn.target.unwrap().unwrap_label_idx()), - Op::Jnz => jnz_label(cb, insn.target.unwrap().unwrap_label_idx()), + Op::Jnz => { + match insn.target.unwrap() { + Target::CodePtr(code_ptr) => jnz_ptr(cb, code_ptr), + Target::Label(label_idx) => jnz_label(cb, label_idx), + _ => unreachable!() + } + } // Atomically increment a counter at a given memory location Op::IncrCounter => { From 27dd43bbc52eb2040d46370fb0170d4d420223e1 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 17 Jun 2022 17:26:13 -0400 Subject: [PATCH 341/546] TST, CMP, AND/ANDS with registers (https://github.com/Shopify/ruby/pull/301) * Add TST instruction and AND/ANDS entrypoints for immediates * TST/AND/ANDS for registers * CMP instruction --- yjit/src/asm/arm64/inst/data_imm.rs | 13 +++ yjit/src/asm/arm64/inst/data_reg.rs | 13 +++ yjit/src/asm/arm64/inst/logical_imm.rs | 13 +++ yjit/src/asm/arm64/inst/logical_reg.rs | 125 ++++++++++++++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 126 +++++++++++++++++++++++++ 5 files changed, 290 insertions(+) create mode 100644 yjit/src/asm/arm64/inst/logical_reg.rs diff --git a/yjit/src/asm/arm64/inst/data_imm.rs b/yjit/src/asm/arm64/inst/data_imm.rs index 0d0a6ff3254437..950cf3421efd55 100644 --- a/yjit/src/asm/arm64/inst/data_imm.rs +++ b/yjit/src/asm/arm64/inst/data_imm.rs @@ -80,6 +80,12 @@ impl DataImm { } } + /// CMP (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en + pub fn cmp(rn: u8, imm12: u16, num_bits: u8) -> Self { + Self::subs(31, rn, imm12, num_bits) + } + /// SUB (immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en pub fn sub(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { @@ -156,6 +162,13 @@ mod tests { assert_eq!(0xb1001c20, result); } + #[test] + fn test_cmp() { + let inst = DataImm::cmp(0, 7, 64); + let result: u32 = inst.into(); + assert_eq!(0xf1001c1f, result); + } + #[test] fn test_sub() { let inst = DataImm::sub(0, 1, 7, 64); diff --git a/yjit/src/asm/arm64/inst/data_reg.rs b/yjit/src/asm/arm64/inst/data_reg.rs index 8635ab804b4646..40f026d1fd96a9 100644 --- a/yjit/src/asm/arm64/inst/data_reg.rs +++ b/yjit/src/asm/arm64/inst/data_reg.rs @@ -86,6 +86,12 @@ impl DataReg { } } + /// CMP (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--shifted-register---Compare--shifted-register---an-alias-of-SUBS--shifted-register--?lang=en + pub fn cmp(rn: u8, rm: u8, num_bits: u8) -> Self { + Self::subs(31, rn, rm, num_bits) + } + /// SUB (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en pub fn sub(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { @@ -165,6 +171,13 @@ mod tests { assert_eq!(0xab020020, result); } + #[test] + fn test_cmp() { + let inst = DataReg::cmp(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xeb01001f, result); + } + #[test] fn test_sub() { let inst = DataReg::sub(0, 1, 2, 64); diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs index 63a4556d852e6c..88de8ba4a197b9 100644 --- a/yjit/src/asm/arm64/inst/logical_imm.rs +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -49,6 +49,12 @@ impl LogicalImm { pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() } } + + /// TST (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en + pub fn tst(rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self::ands(31, rn, imm, num_bits) + } } /// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm @@ -94,4 +100,11 @@ mod tests { let result: u32 = inst.into(); assert_eq!(0xf2400820, result); } + + #[test] + fn test_tst() { + let inst = LogicalImm::tst(1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf240083f, result); + } } diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs new file mode 100644 index 00000000000000..929d80b1a7001b --- /dev/null +++ b/yjit/src/asm/arm64/inst/logical_reg.rs @@ -0,0 +1,125 @@ +use super::sf::Sf; + +/// The type of shift to perform on the second operand register. +enum Shift { + LSL = 0b00, // logical shift left (unsigned) + LSR = 0b01, // logical shift right (unsigned) + ASR = 0b10, // arithmetic shift right (signed) + ROR = 0b11 // rotate right (unsigned) +} + +// Which operation to perform. +enum Opc { + /// The AND operation. + And = 0b00, + + /// The ANDS operation. + Ands = 0b11 +} + +/// The struct that represents an A64 logical register instruction that can be +/// encoded. +/// +/// AND/ANDS (shifted register) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 0 0 | +/// | sf opc.. shift rm.............. imm6............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LogicalReg { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The amount to shift the second operand register. + imm6: u8, + + /// The register number of the second operand register. + rm: u8, + + /// The type of shift to perform on the second operand register. + shift: Shift, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl LogicalReg { + /// AND (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en + pub fn and(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() } + } + + /// ANDS (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en + pub fn ands(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + } + + /// TST (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en + pub fn tst(rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd: 31, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en +const FAMILY: u32 = 0b0101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LogicalReg) -> Self { + let imm6 = (inst.imm6 as u32) & ((1 << 6) - 1); + + 0 + | ((inst.sf as u32) << 31) + | ((inst.opc as u32) << 29) + | (FAMILY << 25) + | ((inst.shift as u32) << 22) + | ((inst.rm as u32) << 16) + | (imm6 << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LogicalReg) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_and() { + let inst = LogicalReg::and(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0x8a020020, result); + } + + #[test] + fn test_ands() { + let inst = LogicalReg::ands(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xea020020, result); + } + + #[test] + fn test_tst() { + let inst = LogicalReg::tst(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xea01001f, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 83cdd26d1dbcb9..7d05f286047c9d 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -6,6 +6,7 @@ mod data_imm; mod data_reg; mod load; mod logical_imm; +mod logical_reg; mod mov; mod sf; mod store; @@ -18,6 +19,8 @@ use call::Call; use data_imm::DataImm; use data_reg::DataReg; use load::Load; +use logical_imm::LogicalImm; +use logical_reg::LogicalReg; use mov::Mov; use store::Store; @@ -85,6 +88,50 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// AND - and rn and rm, put the result in rd, don't update flags +pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::and(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::and(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to and instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ANDS - and rn and rm, put the result in rd, update flags +pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::ands(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::ands(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to ands instruction."), + }; + + cb.write_bytes(&bytes); +} + /// BL - branch with link (offset is number of instructions to jump) pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) { let bytes: [u8; 4] = match imm26 { @@ -109,6 +156,28 @@ pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// CMP - compare rn and rm, update flags +pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rn, rm) { + (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::cmp(rn.reg_no, rm.reg_no, rn.num_bits).into() + }, + (A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + DataImm::cmp(rn.reg_no, imm12 as u16, rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to cmp instruction."), + }; + + cb.write_bytes(&bytes); +} + /// LDADDAL - atomic add with acquire and release semantics pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rs, rt, rn) { @@ -241,6 +310,23 @@ pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// TST - test the bits of a register against a mask, then update flags +pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rn, rm) { + (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rn.num_bits == rm.num_bits, "All operands must be of the same size."); + + LogicalReg::tst(rn.reg_no, rm.reg_no, rn.num_bits).into() + }, + (A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + LogicalImm::tst(rn.reg_no, imm.try_into().unwrap(), rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to tst instruction."), + }; + + cb.write_bytes(&bytes); +} + #[cfg(test)] mod tests { use super::*; @@ -295,6 +381,26 @@ mod tests { check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7))); } + #[test] + fn test_and_register() { + check_bytes("2000028a", |cb| and(cb, X0, X1, X2)); + } + + #[test] + fn test_and_immediate() { + check_bytes("20084092", |cb| and(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_ands_register() { + check_bytes("200002ea", |cb| ands(cb, X0, X1, X2)); + } + + #[test] + fn test_ands_immediate() { + check_bytes("200840f2", |cb| ands(cb, X0, X1, A64Opnd::new_uimm(7))); + } + #[test] fn test_bl() { check_bytes("00040094", |cb| bl(cb, A64Opnd::new_imm(1024))); @@ -305,6 +411,16 @@ mod tests { check_bytes("80021fd6", |cb| br(cb, X20)); } + #[test] + fn test_cmp_register() { + check_bytes("5f010beb", |cb| cmp(cb, X10, X11)); + } + + #[test] + fn test_cmp_immediate() { + check_bytes("5f3900f1", |cb| cmp(cb, X10, A64Opnd::new_uimm(14))); + } + #[test] fn test_ldaddal() { check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); @@ -359,4 +475,14 @@ mod tests { fn test_subs_immediate() { check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7))); } + + #[test] + fn test_tst_register() { + check_bytes("1f0001ea", |cb| tst(cb, X0, X1)); + } + + #[test] + fn test_tst_immediate() { + check_bytes("3f0840f2", |cb| tst(cb, X1, A64Opnd::new_uimm(7))); + } } From c5ae52630f9a362579608f27cfbd7a1f08c4d77f Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 20 Jun 2022 13:02:20 -0400 Subject: [PATCH 342/546] Port gen_putself, log what can't be compiled in --yjit-dump-insns --- yjit/src/codegen.rs | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index b58caa0984cd31..63e8bbcf590687 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -24,17 +24,6 @@ use std::slice; pub use crate::virtualmem::CodePtr; -// Callee-saved registers -//pub const REG_CFP: X86Opnd = R13; -//pub const REG_EC: X86Opnd = R12; -//pub const REG_SP: X86Opnd = RBX; - -// Scratch registers used by YJIT -//pub const REG0: X86Opnd = RAX; -//pub const REG0_32: X86Opnd = EAX; -//pub const REG0_8: X86Opnd = AL; -//pub const REG1: X86Opnd = RCX; - // A block that can be invalidated needs space to write a jump. // We'll reserve a minimum size for any block that could // be invalidated. In this case the JMP takes 5 bytes, but @@ -810,6 +799,10 @@ pub fn gen_single_block( // If we can't compile this instruction // exit to the interpreter and stop compiling if status == CantCompile { + if get_option!(dump_insns) { + println!("can't compile {}", insn_name(opcode)); + } + let mut block = jit.block.borrow_mut(); // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE, @@ -1017,24 +1010,24 @@ fn gen_putobject( KeepCompiling } -/* fn gen_putself( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - // Load self from CFP - let cf_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG_CFP, RUBY_OFFSET_CFP_SELF); - mov(cb, REG0, cf_opnd); // Write it on the stack - let stack_top: X86Opnd = ctx.stack_push_self(); - mov(cb, stack_top, REG0); + let stack_top = ctx.stack_push_self(); + asm.mov( + stack_top, + Opnd::mem((8 * SIZEOF_VALUE) as u8, CFP, RUBY_OFFSET_CFP_SELF) + ); KeepCompiling } +/* fn gen_putspecialobject( jit: &mut JITState, ctx: &mut Context, @@ -5967,7 +5960,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_putobject => Some(gen_putobject), YARVINSN_putobject_INT2FIX_0_ => Some(gen_putobject_int2fix), YARVINSN_putobject_INT2FIX_1_ => Some(gen_putobject_int2fix), - //YARVINSN_putself => Some(gen_putself), + YARVINSN_putself => Some(gen_putself), //YARVINSN_putspecialobject => Some(gen_putspecialobject), //YARVINSN_setn => Some(gen_setn), //YARVINSN_topn => Some(gen_topn), @@ -6192,7 +6185,7 @@ impl CodegenGlobals { inline_cb: cb, outlined_cb: ocb, leave_exit_code: leave_exit_code, - stub_exit_code: /*stub_exit_code*/CodePtr::from(1 as *mut u8), + stub_exit_code: stub_exit_code, outline_full_cfunc_return_pos: /*cfunc_exit_code*/CodePtr::from(1 as *mut u8), global_inval_patches: Vec::new(), inline_frozen_bytes: 0, From 4c7d7080d2fdd07d84de63231618a090fa5addad Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 20 Jun 2022 13:13:46 -0400 Subject: [PATCH 343/546] Port over gen_putspecialobject --- yjit/src/codegen.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 63e8bbcf590687..ba080113f479e3 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1027,19 +1027,18 @@ fn gen_putself( KeepCompiling } -/* fn gen_putspecialobject( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let object_type = jit_get_arg(jit, 0); if object_type == VALUE(VM_SPECIAL_OBJECT_VMCORE.as_usize()) { - let stack_top: X86Opnd = ctx.stack_push(Type::UnknownHeap); - jit_mov_gc_ptr(jit, cb, REG0, unsafe { rb_mRubyVMFrozenCore }); - mov(cb, stack_top, REG0); + let stack_top = ctx.stack_push(Type::UnknownHeap); + let frozen_core = unsafe { rb_mRubyVMFrozenCore }; + asm.mov(stack_top, frozen_core.into()); KeepCompiling } else { // TODO: implement for VM_SPECIAL_OBJECT_CBASE and @@ -1048,6 +1047,7 @@ fn gen_putspecialobject( } } +/* // set Nth stack entry to stack top fn gen_setn( jit: &mut JITState, @@ -5961,7 +5961,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_putobject_INT2FIX_0_ => Some(gen_putobject_int2fix), YARVINSN_putobject_INT2FIX_1_ => Some(gen_putobject_int2fix), YARVINSN_putself => Some(gen_putself), - //YARVINSN_putspecialobject => Some(gen_putspecialobject), + YARVINSN_putspecialobject => Some(gen_putspecialobject), //YARVINSN_setn => Some(gen_setn), //YARVINSN_topn => Some(gen_topn), //YARVINSN_adjuststack => Some(gen_adjuststack), From b89d878ea61d2883838fdd466c6f432134ae1860 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 20 Jun 2022 14:50:29 -0400 Subject: [PATCH 344/546] Port getlocal_WC0 --- yjit/src/codegen.rs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ba080113f479e3..c3b4b848411231 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1466,11 +1466,12 @@ fn gen_expandarray( KeepCompiling } +*/ fn gen_getlocal_wc0( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Compute the offset from BP to the local @@ -1479,14 +1480,14 @@ fn gen_getlocal_wc0( let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx); // Load environment pointer EP (level 0) from CFP - gen_get_ep(cb, REG0, 0); + let ep_opnd = gen_get_ep(asm, 0); // Load the local from the EP - mov(cb, REG0, mem_opnd(64, REG0, offs)); + let local_opnd = Opnd::mem(64, ep_opnd, offs); // Write the local at SP let stack_top = ctx.stack_push_local(local_idx.as_usize()); - mov(cb, stack_top, REG0); + asm.mov(stack_top, local_opnd); KeepCompiling } @@ -1518,21 +1519,24 @@ fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 { } // Get EP at level from CFP -fn gen_get_ep(cb: &mut CodeBlock, reg: X86Opnd, level: u32) { - // Load environment pointer EP from CFP - let ep_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP); - mov(cb, reg, ep_opnd); +fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd { + // Load environment pointer EP from CFP into a register + let ep_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP); + let mut ep_opnd = asm.load(ep_opnd); for _ in (0..level).rev() { // Get the previous EP from the current EP // See GET_PREV_EP(ep) macro // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03)) let offs = (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32); - mov(cb, reg, mem_opnd(64, reg, offs)); - and(cb, reg, imm_opnd(!0x03)); + ep_opnd = asm.load(Opnd::mem(64, ep_opnd, offs)); + ep_opnd = asm.and(ep_opnd, Opnd::Imm(!0x03)); } + + ep_opnd } +/* fn gen_getlocal_generic( ctx: &mut Context, cb: &mut CodeBlock, @@ -5966,10 +5970,10 @@ fn get_gen_fn(opcode: VALUE) -> Option { //YARVINSN_topn => Some(gen_topn), //YARVINSN_adjuststack => Some(gen_adjuststack), - /* - YARVINSN_getlocal => Some(gen_getlocal), + //YARVINSN_getlocal => Some(gen_getlocal), YARVINSN_getlocal_WC_0 => Some(gen_getlocal_wc0), - YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), + //YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), + /* YARVINSN_setlocal => Some(gen_setlocal), YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), From 00ad14f8c92c7b3e305c015b9d4352f1b0c30d5a Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 20 Jun 2022 14:55:16 -0400 Subject: [PATCH 345/546] Port gen_full_cfunc_return --- yjit/src/codegen.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index c3b4b848411231..a1af23c9749369 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -509,33 +509,32 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) { // Landing code for when c_return tracing is enabled. See full_cfunc_return(). fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { - let cb = ocb.unwrap(); - let code_ptr = cb.get_write_ptr(); - - todo!(); + let ocb = ocb.unwrap(); + let code_ptr = ocb.get_write_ptr(); + let mut asm = Assembler::new(); - /* // This chunk of code expect REG_EC to be filled properly and // RAX to contain the return value of the C method. // Call full_cfunc_return() - mov(cb, C_ARG_REGS[0], REG_EC); - mov(cb, C_ARG_REGS[1], RAX); - call_ptr(cb, REG0, rb_full_cfunc_return as *const u8); + asm.ccall( + rb_full_cfunc_return as *const u8, + vec![EC, C_RET_OPND] + ); // Count the exit gen_counter_incr!(asm, traced_cfunc_return); // Return to the interpreter - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); + asm.cpop(SP); + asm.cpop(EC); + asm.cpop(CFP); - mov(cb, RAX, uimm_opnd(Qundef.into())); - ret(cb); + asm.cret(Qundef.into()); + + asm.compile(ocb); return code_ptr; - */ } /// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc. @@ -6179,7 +6178,7 @@ impl CodegenGlobals { let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb); // Generate full exit code for C func - //let cfunc_exit_code = gen_full_cfunc_return(&mut ocb); + let cfunc_exit_code = gen_full_cfunc_return(&mut ocb); // Mark all code memory as executable cb.mark_all_executable(); @@ -6190,7 +6189,7 @@ impl CodegenGlobals { outlined_cb: ocb, leave_exit_code: leave_exit_code, stub_exit_code: stub_exit_code, - outline_full_cfunc_return_pos: /*cfunc_exit_code*/CodePtr::from(1 as *mut u8), + outline_full_cfunc_return_pos: cfunc_exit_code, global_inval_patches: Vec::new(), inline_frozen_bytes: 0, method_codegen_table: HashMap::new(), From d0204e51e270a1dacdfa3ae775892840b0e7b192 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 20 Jun 2022 15:12:22 -0400 Subject: [PATCH 346/546] Port guard_two_fixnums --- yjit/src/backend/ir.rs | 12 +++++++++++- yjit/src/backend/x86_64/mod.rs | 10 ++++++++++ yjit/src/codegen.rs | 28 +++++++++++++++------------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 6789720238a1f8..437cc24286d320 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -80,7 +80,10 @@ pub enum Op // Compare two operands Cmp, - // Unconditional jump which takes an address operand + // Unconditional jump to a branch target + Jmp, + + // Unconditional jump which takes a reg/mem address operand JmpOpnd, // Low-level conditional jump instructions @@ -252,6 +255,12 @@ impl Target } } +impl From for Target { + fn from(code_ptr: CodePtr) -> Self { + Target::CodePtr(code_ptr) + } +} + /// YJIT IR instruction #[derive(Clone, Debug)] pub struct Insn @@ -758,6 +767,7 @@ macro_rules! def_push_2_opnd_no_out { } def_push_1_opnd_no_out!(jmp_opnd, Op::JmpOpnd); +def_push_jcc!(jmp, Op::Jmp); def_push_jcc!(je, Op::Je); def_push_jcc!(jbe, Op::Jbe); def_push_jcc!(jz, Op::Jz); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index bca1eda8552126..894ae279bd8a1c 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -202,8 +202,18 @@ impl Assembler Op::JmpOpnd => jmp_rm(cb, insn.opnds[0].into()), // Conditional jump to a label + Op::Jmp => { + match insn.target.unwrap() { + Target::CodePtr(code_ptr) => jmp_ptr(cb, code_ptr), + Target::Label(label_idx) => jmp_label(cb, label_idx), + _ => unreachable!() + } + } + Op::Je => je_label(cb, insn.target.unwrap().unwrap_label_idx()), + Op::Jz => jz_label(cb, insn.target.unwrap().unwrap_label_idx()), + Op::Jnz => { match insn.target.unwrap() { Target::CodePtr(code_ptr) => jnz_ptr(cb, code_ptr), diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index a1af23c9749369..61a2bc627b2a37 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2312,27 +2312,28 @@ fn gen_concatstrings( KeepCompiling } +*/ -fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) { +fn guard_two_fixnums(ctx: &mut Context, asm: &mut Assembler, side_exit: CodePtr) { // Get the stack operand types let arg1_type = ctx.get_opnd_type(StackOpnd(0)); let arg0_type = ctx.get_opnd_type(StackOpnd(1)); if arg0_type.is_heap() || arg1_type.is_heap() { - add_comment(cb, "arg is heap object"); - jmp_ptr(cb, side_exit); + asm.comment("arg is heap object"); + asm.jmp(side_exit.into()); return; } if arg0_type != Type::Fixnum && arg0_type.is_specific() { - add_comment(cb, "arg0 not fixnum"); - jmp_ptr(cb, side_exit); + asm.comment("arg0 not fixnum"); + asm.jmp(side_exit.into()); return; } if arg1_type != Type::Fixnum && arg1_type.is_specific() { - add_comment(cb, "arg1 not fixnum"); - jmp_ptr(cb, side_exit); + asm.comment("arg1 not fixnum"); + asm.jmp(side_exit.into()); return; } @@ -2347,14 +2348,14 @@ fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) // If not fixnums, fall back if arg0_type != Type::Fixnum { - add_comment(cb, "guard arg0 fixnum"); - test(cb, arg0, uimm_opnd(RUBY_FIXNUM_FLAG as u64)); - jz_ptr(cb, side_exit); + asm.comment("guard arg0 fixnum"); + asm.test(arg0, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + asm.jz(side_exit.into()); } if arg1_type != Type::Fixnum { - add_comment(cb, "guard arg1 fixnum"); - test(cb, arg1, uimm_opnd(RUBY_FIXNUM_FLAG as u64)); - jz_ptr(cb, side_exit); + asm.comment("guard arg1 fixnum"); + asm.test(arg1, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + asm.jz(side_exit.into()); } // Set stack types in context @@ -2362,6 +2363,7 @@ fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) ctx.upgrade_opnd_type(StackOpnd(1), Type::Fixnum); } +/* // Conditional move operation used by comparison operators type CmovFn = fn(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) -> (); From 8bb7421d8e222fdae6b51049993efc46cf494f15 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 20 Jun 2022 15:34:14 -0400 Subject: [PATCH 347/546] Port topn, adjuststack, most of opt_plus --- yjit/src/codegen.rs | 47 +++++++++++++++++++++++---------------------- yjit/src/cruby.rs | 9 ++++++++- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 61a2bc627b2a37..879323dfa199a8 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1067,23 +1067,21 @@ fn gen_setn( KeepCompiling } +*/ // get nth stack value, then push it fn gen_topn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let nval: VALUE = jit_get_arg(jit, 0); - let VALUE(n) = nval; - - let top_n_val = ctx.stack_opnd(n.try_into().unwrap()); - let mapping = ctx.get_opnd_mapping(StackOpnd(n.try_into().unwrap())); + let nval = jit_get_arg(jit, 0); + let top_n_val = ctx.stack_opnd(nval.into()); + let mapping = ctx.get_opnd_mapping(StackOpnd(nval.into())); let loc0 = ctx.stack_push_mapping(mapping); - mov(cb, REG0, top_n_val); - mov(cb, loc0, REG0); + asm.mov(loc0, top_n_val); KeepCompiling } @@ -1092,7 +1090,7 @@ fn gen_topn( fn gen_adjuststack( jit: &mut JITState, ctx: &mut Context, - _cb: &mut CodeBlock, + _cb: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let nval: VALUE = jit_get_arg(jit, 0); @@ -1102,10 +1100,11 @@ fn gen_adjuststack( KeepCompiling } +/* fn gen_opt_plus( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !jit_at_current_insn(jit) { @@ -1126,28 +1125,30 @@ fn gen_opt_plus( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands and destination from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Add arg0 + arg1 and test for overflow - mov(cb, REG0, arg0); - sub(cb, REG0, imm_opnd(1)); - add(cb, REG0, arg1); - jo_ptr(cb, side_exit); + let arg0_untag = asm.sub(arg0, Opnd::Imm(1)); + let out_val = asm.add(arg0_untag, arg1); + asm.jo(side_exit); // Push the output on the stack let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + asm.mov(dst, out_val); KeepCompiling } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + todo!(); + //gen_opt_send_without_block(jit, ctx, cb, ocb) } } +*/ +/* // new array initialized from top N values fn gen_newarray( jit: &mut JITState, @@ -5968,17 +5969,17 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_putself => Some(gen_putself), YARVINSN_putspecialobject => Some(gen_putspecialobject), //YARVINSN_setn => Some(gen_setn), - //YARVINSN_topn => Some(gen_topn), - //YARVINSN_adjuststack => Some(gen_adjuststack), + YARVINSN_topn => Some(gen_topn), + YARVINSN_adjuststack => Some(gen_adjuststack), //YARVINSN_getlocal => Some(gen_getlocal), YARVINSN_getlocal_WC_0 => Some(gen_getlocal_wc0), //YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), + //YARVINSN_setlocal => Some(gen_setlocal), + //YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), + //YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), + //YARVINSN_opt_plus => Some(gen_opt_plus), /* - YARVINSN_setlocal => Some(gen_setlocal), - YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), - YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), - YARVINSN_opt_plus => Some(gen_opt_plus), YARVINSN_opt_minus => Some(gen_opt_minus), YARVINSN_opt_and => Some(gen_opt_and), YARVINSN_opt_or => Some(gen_opt_or), diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 919557817218a1..8543b6d971edc6 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -596,7 +596,14 @@ impl From for i32 { fn from(value: VALUE) -> Self { let VALUE(uimm) = value; assert!(uimm <= (i32::MAX as usize)); - uimm as i32 + uimm.try_into().unwrap() + } +} + +impl From for u16 { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + uimm.try_into().unwrap() } } From 24db233fc70799642aad09be9170da61332ff010 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 20 Jun 2022 15:50:42 -0400 Subject: [PATCH 348/546] Add jo insn and test for jo --- yjit/src/backend/ir.rs | 2 ++ yjit/src/backend/tests.rs | 19 +++++++++++++++++++ yjit/src/backend/x86_64/mod.rs | 28 ++++++++++++++++++++++++++-- yjit/src/codegen.rs | 1 - 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 437cc24286d320..bacbbd541d3c56 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -91,6 +91,7 @@ pub enum Op Je, Jz, Jnz, + Jo, // Push and pop registers to/from the C stack CPush, @@ -772,6 +773,7 @@ def_push_jcc!(je, Op::Je); def_push_jcc!(jbe, Op::Jbe); def_push_jcc!(jz, Op::Jz); def_push_jcc!(jnz, Op::Jnz); +def_push_jcc!(jo, Op::Jo); def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 747e7eb2b5d8ee..a8ae1bc97aeb4e 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -239,3 +239,22 @@ fn test_jcc_ptr() asm.compile_with_num_regs(&mut cb, 1); } + +#[test] +fn test_jo() +{ + let (mut asm, mut cb) = setup_asm(); + + let side_exit = Target::CodePtr((5 as *mut u8).into()); + + let arg1 = Opnd::mem(64, SP, 0); + let arg0 = Opnd::mem(64, SP, 8); + + let arg0_untag = asm.sub(arg0, Opnd::Imm(1)); + let out_val = asm.add(arg0_untag, arg1); + asm.jo(side_exit); + + asm.mov(Opnd::mem(64, SP, 0), out_val); + + asm.compile_with_num_regs(&mut cb, 1); +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 894ae279bd8a1c..93e3e3f458bad1 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -140,6 +140,10 @@ impl Assembler add(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, + Op::Sub => { + sub(cb, insn.opnds[0].into(), insn.opnds[1].into()) + }, + Op::And => { and(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, @@ -210,9 +214,21 @@ impl Assembler } } - Op::Je => je_label(cb, insn.target.unwrap().unwrap_label_idx()), + Op::Je => { + match insn.target.unwrap() { + Target::CodePtr(code_ptr) => je_ptr(cb, code_ptr), + Target::Label(label_idx) => je_label(cb, label_idx), + _ => unreachable!() + } + } - Op::Jz => jz_label(cb, insn.target.unwrap().unwrap_label_idx()), + Op::Jz => { + match insn.target.unwrap() { + Target::CodePtr(code_ptr) => jz_ptr(cb, code_ptr), + Target::Label(label_idx) => jz_label(cb, label_idx), + _ => unreachable!() + } + } Op::Jnz => { match insn.target.unwrap() { @@ -222,6 +238,14 @@ impl Assembler } } + Op::Jo => { + match insn.target.unwrap() { + Target::CodePtr(code_ptr) => jo_ptr(cb, code_ptr), + Target::Label(label_idx) => jo_label(cb, label_idx), + _ => unreachable!() + } + } + // Atomically increment a counter at a given memory location Op::IncrCounter => { assert!(matches!(insn.opnds[0], Opnd::Mem(_))); diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 879323dfa199a8..b97bb01b1b1dbe 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1095,7 +1095,6 @@ fn gen_adjuststack( ) -> CodegenStatus { let nval: VALUE = jit_get_arg(jit, 0); let VALUE(n) = nval; - ctx.stack_pop(n); KeepCompiling } From 4254174ca76d8e64db29fbcbcfc99a81e2d50211 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 20 Jun 2022 16:06:39 -0400 Subject: [PATCH 349/546] Port over setn --- yjit/src/codegen.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index b97bb01b1b1dbe..291851c02e8aa4 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1046,28 +1046,27 @@ fn gen_putspecialobject( } } -/* // set Nth stack entry to stack top fn gen_setn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let nval: VALUE = jit_get_arg(jit, 0); - let VALUE(n) = nval; + let n: VALUE = jit_get_arg(jit, 0); - let top_val: X86Opnd = ctx.stack_pop(0); - let dst_opnd: X86Opnd = ctx.stack_opnd(n.try_into().unwrap()); - mov(cb, REG0, top_val); - mov(cb, dst_opnd, REG0); + let top_val = ctx.stack_pop(0); + let dst_opnd = ctx.stack_opnd(n.into()); + asm.mov( + dst_opnd, + top_val + ); let mapping = ctx.get_opnd_mapping(StackOpnd(0)); - ctx.set_opnd_mapping(StackOpnd(n.try_into().unwrap()), mapping); + ctx.set_opnd_mapping(StackOpnd(n.into()), mapping); KeepCompiling } -*/ // get nth stack value, then push it fn gen_topn( @@ -5967,7 +5966,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_putobject_INT2FIX_1_ => Some(gen_putobject_int2fix), YARVINSN_putself => Some(gen_putself), YARVINSN_putspecialobject => Some(gen_putspecialobject), - //YARVINSN_setn => Some(gen_setn), + YARVINSN_setn => Some(gen_setn), YARVINSN_topn => Some(gen_topn), YARVINSN_adjuststack => Some(gen_adjuststack), From 2eba6aef724f20162bd650d535be876aa4a19964 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 21 Jun 2022 11:05:20 -0400 Subject: [PATCH 350/546] Port over get_branch_target() --- yjit/src/backend/arm64/mod.rs | 16 +++++++++++++++- yjit/src/backend/tests.rs | 7 +++++-- yjit/src/core.rs | 24 ++++++++++++------------ 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index be67e2384d1617..4e4c553c9d1ede 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -34,7 +34,7 @@ impl From for A64Opnd { impl Assembler { /// Get the list of registers from which we can allocate on this platform - pub fn get_scratch_regs() -> Vec + pub fn get_alloc_regs() -> Vec { vec![ X12_REG, @@ -45,6 +45,11 @@ impl Assembler /// Split platform-specific instructions fn arm64_split(mut self) -> Assembler { + // The transformations done here are meant to make our lives simpler in later + // stages of the compilation pipeline. + // Here we may want to make sure that all instructions (except load and store) + // have no memory operands. + todo!(); } @@ -52,6 +57,15 @@ impl Assembler /// Returns a list of GC offsets pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec { + // NOTE: dear Kevin, + // for arm, you may want to reserve 1 or 2 caller-save registers + // to use as scracth registers (during the last phase of the codegen) + // These registers will not be allocated to anything by the register + // allocator, they're just useful because arm is slightly trickier + // than x86 to generate code for. + // For example, if you want to jump far away, you may want to store + // the jump target address in a register first. + todo!(); } diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index a8ae1bc97aeb4e..3a0f14e1f40028 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -192,12 +192,15 @@ fn test_c_call() let (mut asm, mut cb) = setup_asm(); - asm.ccall( + let ret_val = asm.ccall( dummy_c_fun as *const u8, vec![Opnd::mem(64, SP, 0), Opnd::UImm(1)] ); - asm.compile_with_num_regs(&mut cb, 2); + // Make sure that the call's return value is usable + asm.mov(Opnd::mem(64, SP, 0), ret_val); + + asm.compile_with_num_regs(&mut cb, 1); } #[test] diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 10ef9c5151499b..a2659b55fdbb21 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1768,29 +1768,29 @@ fn get_branch_target( // This means the branch stub owns its own reference to the branch let branch_ptr: *const RefCell = BranchRef::into_raw(branchref.clone()); + let mut asm = Assembler::new(); - - - - todo!("stub codegen with new assembler"); - - /* // Call branch_stub_hit(branch_idx, target_idx, ec) - mov(ocb, C_ARG_REGS[2], REG_EC); - mov(ocb, C_ARG_REGS[1], uimm_opnd(target_idx as u64)); - mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch_ptr as *const u8)); - call_ptr(ocb, REG0, branch_stub_hit as *mut u8); + let jump_addr = asm.ccall( + branch_stub_hit as *mut u8, + vec![ + EC, + Opnd::UImm(target_idx as u64), + Opnd::const_ptr(branch_ptr as *const u8) + ] + ); // Jump to the address returned by the // branch_stub_hit call - jmp_rm(ocb, RAX); + asm.jmp_opnd(jump_addr); + + asm.compile(ocb); if ocb.has_dropped_bytes() { None // No space } else { Some(stub_addr) } - */ } pub fn gen_branch( From 4c0a440b1828fd1cc1dba24ae1d0a384e98859aa Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 21 Jun 2022 11:30:08 -0400 Subject: [PATCH 351/546] Port over duphash and newarray --- yjit/src/codegen.rs | 64 ++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 291851c02e8aa4..c99fef5b0c4af1 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -513,7 +513,7 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { let code_ptr = ocb.get_write_ptr(); let mut asm = Assembler::new(); - // This chunk of code expect REG_EC to be filled properly and + // This chunk of code expects REG_EC to be filled properly and // RAX to contain the return value of the C method. // Call full_cfunc_return() @@ -759,14 +759,16 @@ pub fn gen_single_block( // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { - todo!("record_boundary_patch_point"); - /* + // FIXME: is this sound with the new assembler? + // Generate an exit to this instruction and record it - let exit_pos = gen_exit(jit.pc, &ctx, ocb.unwrap()); + let exit_pos = gen_outlined_exit(jit.pc, &ctx, ocb); record_global_inval_patch(cb, exit_pos); jit.record_boundary_patch_point = false; - */ + + + } // In debug mode, verify our existing assumption @@ -1146,36 +1148,40 @@ fn gen_opt_plus( } */ -/* // new array initialized from top N values fn gen_newarray( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let n = jit_get_arg(jit, 0).as_u32(); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); let offset_magnitude = SIZEOF_VALUE as u32 * n; - let values_ptr = ctx.sp_opnd(-(offset_magnitude as isize)); + let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); + let values_ptr = asm.lea(values_opnd); // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts); - mov(cb, C_ARG_REGS[0], REG_EC); - mov(cb, C_ARG_REGS[1], imm_opnd(n.into())); - lea(cb, C_ARG_REGS[2], values_ptr); - call_ptr(cb, REG0, rb_ec_ary_new_from_values as *const u8); + let new_ary = asm.ccall( + rb_ec_ary_new_from_values as *const u8, + vec![ + EC, + Opnd::UImm(n.into()), + values_ptr + ] + ); ctx.stack_pop(n.as_usize()); let stack_ret = ctx.stack_push(Type::Array); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, new_ary); KeepCompiling } - +/* // dup array fn gen_duparray( jit: &mut JITState, @@ -1197,46 +1203,30 @@ fn gen_duparray( KeepCompiling } - - - - -/* -let mut asm = Assembler::new(); - -//asm.ccall(rb_ary_resurrect as *const u8, vec![ary]); - -asm.compile(cb); */ - - - - - - // dup hash fn gen_duphash( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let hash = jit_get_arg(jit, 0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // call rb_hash_resurrect(VALUE hash); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash); - call_ptr(cb, REG0, rb_hash_resurrect as *const u8); + let hash = asm.ccall(rb_hash_resurrect as *const u8, vec![hash.into()]); let stack_ret = ctx.stack_push(Type::Hash); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, hash); KeepCompiling } +/* // call to_a on the array on the stack fn gen_splatarray( jit: &mut JITState, @@ -5982,8 +5972,10 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_and => Some(gen_opt_and), YARVINSN_opt_or => Some(gen_opt_or), YARVINSN_newhash => Some(gen_newhash), + */ YARVINSN_duphash => Some(gen_duphash), YARVINSN_newarray => Some(gen_newarray), + /* YARVINSN_duparray => Some(gen_duparray), YARVINSN_checktype => Some(gen_checktype), YARVINSN_opt_lt => Some(gen_opt_lt), From f1b188143b0255cef498ce4fb7a331daca64e063 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 21 Jun 2022 17:01:26 -0400 Subject: [PATCH 352/546] Fix backend transform bug, add test --- yjit/src/backend/ir.rs | 20 +++++++++++++------- yjit/src/backend/tests.rs | 11 +++++++++-- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index bacbbd541d3c56..66a498fb30e3a0 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -324,7 +324,7 @@ impl Assembler Opnd::InsnOut{ idx, .. } => { self.live_ranges[*idx] = insn_idx; } - Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { self.live_ranges[*idx] = insn_idx; } _ => {} @@ -424,17 +424,21 @@ impl Assembler label_names: self.label_names, }; - // indices maps from the old instruction index to the new instruction + // Indices maps from the old instruction index to the new instruction // index. let mut indices: Vec = Vec::default(); // Map an operand to the next set of instructions by correcting previous // InsnOut indices. fn map_opnd(opnd: Opnd, indices: &mut Vec) -> Opnd { - if let Opnd::InsnOut{ idx, num_bits } = opnd { - Opnd::InsnOut{ idx: indices[idx], num_bits } - } else { - opnd + match opnd { + Opnd::InsnOut{ idx, num_bits } => { + Opnd::InsnOut{ idx: indices[idx], num_bits } + } + Opnd::Mem(Mem{ base: MemBase::InsnOut(idx), disp, num_bits, }) => { + Opnd::Mem(Mem{ base:MemBase::InsnOut(indices[idx]), disp, num_bits }) + } + _ => opnd } } @@ -531,6 +535,8 @@ impl Assembler /// instruction. This is our implementation of the linear scan algorithm. pub(super) fn alloc_regs(mut self, regs: Vec) -> Assembler { + //dbg!(&self); + // First, create the pool of registers. let mut pool: u32 = 0; @@ -585,7 +591,7 @@ impl Assembler if let Opnd::Reg(reg) = asm.insns[start_index].out { dealloc_reg(&mut pool, ®s, ®); } else { - unreachable!("no register allocated for insn"); + unreachable!("no register allocated for insn {:?}", op); } } } diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 3a0f14e1f40028..6545d0151751f9 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -173,12 +173,19 @@ fn test_base_insn_out() { let (mut asm, mut cb) = setup_asm(); + // Forced register to be reused + // This also causes the insn sequence to change length + asm.mov( + Opnd::mem(64, SP, 8), + Opnd::mem(64, SP, 0) + ); + // Load the pointer into a register - let ptr_reg = asm.load(Opnd::const_ptr(0 as *const u8)); + let ptr_reg = asm.load(Opnd::const_ptr(4351776248 as *const u8)); let counter_opnd = Opnd::mem(64, ptr_reg, 0); // Increment and store the updated value - asm.incr_counter(counter_opnd, 1.into() ); + asm.incr_counter(counter_opnd, 1.into()); asm.compile_with_num_regs(&mut cb, 1); } From 0a96a3918970f1852e1bd691c826c904bccd60b8 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Tue, 21 Jun 2022 18:15:10 -0400 Subject: [PATCH 353/546] Delete dbg!() calls --- yjit/src/backend/ir.rs | 1 - yjit/src/backend/x86_64/mod.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 66a498fb30e3a0..96b314e9cbac07 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -391,7 +391,6 @@ impl Assembler pub fn new_label(&mut self, name: &str) -> Target { let label_idx = self.label_names.len(); - dbg!(label_idx); self.label_names.push(name.to_string()); Target::Label(label_idx) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 93e3e3f458bad1..f4e0d4f53aab16 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -272,7 +272,6 @@ impl Assembler // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { - dbg!("creating label, idx={}", idx); let label_idx = cb.new_label(name.to_string()); assert!(label_idx == idx); } From d9163280782086b57119abc9478580a6b3efd2c3 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Tue, 21 Jun 2022 19:41:06 -0400 Subject: [PATCH 354/546] Conscise IR disassembly (https://github.com/Shopify/ruby/pull/302) The output from `dbg!` was too verbose. For `test_jo` the output went from 37 lines to 5 lines. The added index helps parsing InsnOut indicies. Samples: ``` test backend::tests::test_jo ... [src/backend/ir.rs:589] &self = Assembler 000 Load(Mem64[Reg(3) + 8]) -> Out64(0) 001 Sub(Out64(0), 1_i64) -> Out64(1) 002 Load(Out64(1)) -> Out64(2) 003 Add(Out64(2), Mem64[Reg(3)]) -> Out64(3) 004 Jo() target=CodePtr(CodePtr(0x5)) -> Out64(4) 005 Mov(Mem64[Reg(3)], Out64(3)) -> Out64(5) test backend::tests::test_reuse_reg ... [src/backend/ir.rs:589] &self = Assembler 000 Load(Mem64[Reg(3)]) -> Out64(0) 001 Add(Out64(0), 1_u64) -> Out64(1) 002 Load(Mem64[Reg(3) + 8]) -> Out64(2) 003 Add(Out64(2), 1_u64) -> Out64(3) 004 Add(Out64(1), 1_u64) -> Out64(4) 005 Add(Out64(1), Out64(4)) -> Out64(5) 006 Store(Mem64[Reg(3)], Out64(4)) -> Out64(6) 007 Store(Mem64[Reg(3) + 8], Out64(5)) -> Out64(7) ``` --- yjit/src/backend/ir.rs | 71 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 96b314e9cbac07..4e8ed0b8a42d90 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -121,7 +121,7 @@ pub enum MemBase } // Memory location -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct Mem { // Base register number or instruction index @@ -134,8 +134,20 @@ pub struct Mem pub(super) num_bits: u8, } +impl fmt::Debug for Mem { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "Mem{}[{:?}", self.num_bits, self.base)?; + if self.disp != 0 { + let sign = if self.disp > 0 { '+' } else { '-' }; + write!(fmt, " {sign} {}", self.disp)?; + } + + write!(fmt, "]") + } +} + /// Operand to an IR instruction -#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[derive(Clone, Copy, PartialEq, Eq)] pub enum Opnd { None, // For insns with no output @@ -153,6 +165,22 @@ pub enum Opnd Reg(Reg), // Machine register } +impl fmt::Debug for Opnd { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + use Opnd::*; + match self { + Self::None => write!(fmt, "None"), + Value(val) => write!(fmt, "Value({val:?})"), + InsnOut { idx, num_bits } => write!(fmt, "Out{num_bits}({idx})"), + Imm(signed) => write!(fmt, "{signed:x}_i64"), + UImm(unsigned) => write!(fmt, "{unsigned:x}_u64"), + // Say Mem and Reg only once + Mem(mem) => write!(fmt, "{mem:?}"), + Reg(reg) => write!(fmt, "{reg:?}"), + } + } +} + impl Opnd { /// Convenience constructor for memory operands @@ -263,7 +291,7 @@ impl From for Target { } /// YJIT IR instruction -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct Insn { // Opcode for the instruction @@ -286,6 +314,35 @@ pub struct Insn pub(super) pos: Option, } +impl fmt::Debug for Insn { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{:?}(", self.op)?; + + // Print list of operands + let mut opnd_iter = self.opnds.iter(); + if let Some(first_opnd) = opnd_iter.next() { + write!(fmt, "{first_opnd:?}")?; + } + for opnd in opnd_iter { + write!(fmt, ", {opnd:?}")?; + } + write!(fmt, ")")?; + + // Print text, target, and pos if they are present + if let Some(text) = &self.text { + write!(fmt, " {text:?}")? + } + if let Some(target) = self.target { + write!(fmt, " target={target:?}")?; + } + if let Some(pos) = self.pos { + write!(fmt, " pos={pos:?}")?; + } + + write!(fmt, " -> {:?}", self.out) + } +} + /// Object into which we assemble instructions to be /// optimized and lowered pub struct Assembler @@ -687,7 +744,13 @@ impl Assembler impl fmt::Debug for Assembler { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - fmt.debug_list().entries(self.insns.iter()).finish() + write!(fmt, "Assembler\n")?; + + for (idx, insn) in self.insns.iter().enumerate() { + write!(fmt, " {idx:03} {insn:?}\n")?; + } + + Ok(()) } } From b272c57f27628ab114206c777d5b274713d31079 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 22 Jun 2022 12:27:59 -0400 Subject: [PATCH 355/546] LSL, LSR, B.cond (https://github.com/Shopify/ruby/pull/303) * LSL and LSR * B.cond * Move A64 files around to make more sense * offset -> byte_offset for bcond --- yjit/src/asm/arm64/README.md | 6 +- .../asm/arm64/{inst => arg}/bitmask_imm.rs | 0 yjit/src/asm/arm64/arg/condition.rs | 20 + yjit/src/asm/arm64/arg/mod.rs | 10 + yjit/src/asm/arm64/{inst => arg}/sf.rs | 0 yjit/src/asm/arm64/inst/branch_cond.rs | 73 +++ yjit/src/asm/arm64/inst/data_imm.rs | 2 +- yjit/src/asm/arm64/inst/data_reg.rs | 2 +- yjit/src/asm/arm64/inst/logical_imm.rs | 3 +- yjit/src/asm/arm64/inst/logical_reg.rs | 2 +- yjit/src/asm/arm64/inst/mod.rs | 494 +--------------- yjit/src/asm/arm64/inst/mov.rs | 2 +- yjit/src/asm/arm64/inst/shift_imm.rs | 147 +++++ yjit/src/asm/arm64/mod.rs | 527 ++++++++++++++++++ 14 files changed, 801 insertions(+), 487 deletions(-) rename yjit/src/asm/arm64/{inst => arg}/bitmask_imm.rs (100%) create mode 100644 yjit/src/asm/arm64/arg/condition.rs create mode 100644 yjit/src/asm/arm64/arg/mod.rs rename yjit/src/asm/arm64/{inst => arg}/sf.rs (100%) create mode 100644 yjit/src/asm/arm64/inst/branch_cond.rs create mode 100644 yjit/src/asm/arm64/inst/shift_imm.rs diff --git a/yjit/src/asm/arm64/README.md b/yjit/src/asm/arm64/README.md index 3d0ec57d340080..edae5773e8abb3 100644 --- a/yjit/src/asm/arm64/README.md +++ b/yjit/src/asm/arm64/README.md @@ -4,11 +4,9 @@ This module is responsible for encoding YJIT operands into an appropriate Arm64 ## Architecture -Every instruction in the Arm64 instruction set is 32 bits wide and is represented in little-endian order. Because they're all going to the same size, we represent each instruction by a struct that implements `From for u32`, which contains the mechanism for encoding each instruction. +Every instruction in the Arm64 instruction set is 32 bits wide and is represented in little-endian order. Because they're all going to the same size, we represent each instruction by a struct that implements `From for u32`, which contains the mechanism for encoding each instruction. The encoding for each instruction is shown in the documentation for the struct that ends up being created. -Generally each set of instructions falls under a certain family (like data processing -- register). These instructions are encoded similarly, so we group them into their own submodules. The encoding for each type is shown in the documentation for the struct that ends up being created. - -In general each set of bytes inside of the struct has either a direct value (usually a `u8`/`u16`) or some kind of `enum` that can be converted directly into a `u32`. +In general each set of bytes inside of the struct has either a direct value (usually a `u8`/`u16`) or some kind of `enum` that can be converted directly into a `u32`. For more complicated pieces of encoding (e.g., bitmask immediates) a corresponding module under the `arg` namespace is available. ## Helpful links diff --git a/yjit/src/asm/arm64/inst/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs similarity index 100% rename from yjit/src/asm/arm64/inst/bitmask_imm.rs rename to yjit/src/asm/arm64/arg/bitmask_imm.rs diff --git a/yjit/src/asm/arm64/arg/condition.rs b/yjit/src/asm/arm64/arg/condition.rs new file mode 100644 index 00000000000000..db269726d77abc --- /dev/null +++ b/yjit/src/asm/arm64/arg/condition.rs @@ -0,0 +1,20 @@ +/// Various instructions in A64 can have condition codes attached. This enum +/// includes all of the various kinds of conditions along with their respective +/// encodings. +pub enum Condition { + EQ = 0b0000, // equal to + NE = 0b0001, // not equal to + CS = 0b0010, // carry set (alias for HS) + CC = 0b0011, // carry clear (alias for LO) + MI = 0b0100, // minus, negative + PL = 0b0101, // positive or zero + VS = 0b0110, // signed overflow + VC = 0b0111, // no signed overflow + HI = 0b1000, // greater than (unsigned) + LS = 0b1001, // less than or equal to (unsigned) + GE = 0b1010, // greater than or equal to (signed) + LT = 0b1011, // less than (signed) + GT = 0b1100, // greater than (signed) + LE = 0b1101, // less than or equal to (signed) + AL = 0b1110, // always +} diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs new file mode 100644 index 00000000000000..0d2f1ac28a462e --- /dev/null +++ b/yjit/src/asm/arm64/arg/mod.rs @@ -0,0 +1,10 @@ +// This module contains various A64 instruction arguments and the logic +// necessary to encode them. + +mod bitmask_imm; +mod condition; +mod sf; + +pub use bitmask_imm::BitmaskImmediate; +pub use condition::Condition; +pub use sf::Sf; diff --git a/yjit/src/asm/arm64/inst/sf.rs b/yjit/src/asm/arm64/arg/sf.rs similarity index 100% rename from yjit/src/asm/arm64/inst/sf.rs rename to yjit/src/asm/arm64/arg/sf.rs diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs new file mode 100644 index 00000000000000..21fdda5d3f6e07 --- /dev/null +++ b/yjit/src/asm/arm64/inst/branch_cond.rs @@ -0,0 +1,73 @@ +use super::super::arg::Condition; + +/// The struct that represents an A64 conditional branch instruction that can be +/// encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 0 1 0 0 0 | +/// | imm19........................................................... cond....... | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct BranchCond { + /// The kind of condition to check before branching. + cond: Condition, + + /// The instruction offset from this instruction to branch to. + imm19: i32 +} + +impl BranchCond { + /// B.cond + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally- + pub fn bcond(cond: Condition, byte_offset: i32) -> Self { + Self { cond, imm19: byte_offset >> 2 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: BranchCond) -> Self { + let imm19 = (inst.imm19 as u32) & ((1 << 19) - 1); + + 0 + | (1 << 30) + | (FAMILY << 26) + | (imm19 << 5) + | (inst.cond as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: BranchCond) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_b_eq() { + let result: u32 = BranchCond::bcond(Condition::EQ, 128).into(); + assert_eq!(0x54000400, result); + } + + #[test] + fn test_b_vs() { + let result: u32 = BranchCond::bcond(Condition::VS, 128).into(); + assert_eq!(0x54000406, result); + } + + #[test] + fn test_b_ne_neg() { + let result: u32 = BranchCond::bcond(Condition::NE, -128).into(); + assert_eq!(0x54fffc01, result); + } +} diff --git a/yjit/src/asm/arm64/inst/data_imm.rs b/yjit/src/asm/arm64/inst/data_imm.rs index 950cf3421efd55..19e2bfa1992ef5 100644 --- a/yjit/src/asm/arm64/inst/data_imm.rs +++ b/yjit/src/asm/arm64/inst/data_imm.rs @@ -1,4 +1,4 @@ -use super::sf::Sf; +use super::super::arg::Sf; /// The operation being performed by this instruction. enum Op { diff --git a/yjit/src/asm/arm64/inst/data_reg.rs b/yjit/src/asm/arm64/inst/data_reg.rs index 40f026d1fd96a9..e2c2723fcffe1d 100644 --- a/yjit/src/asm/arm64/inst/data_reg.rs +++ b/yjit/src/asm/arm64/inst/data_reg.rs @@ -1,4 +1,4 @@ -use super::sf::Sf; +use super::super::arg::Sf; /// The operation being performed by this instruction. enum Op { diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs index 88de8ba4a197b9..cc2a16cbdc349a 100644 --- a/yjit/src/asm/arm64/inst/logical_imm.rs +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -1,5 +1,4 @@ -use super::bitmask_imm::BitmaskImmediate; -use super::sf::Sf; +use super::super::arg::{BitmaskImmediate, Sf}; // Which operation to perform. enum Opc { diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs index 929d80b1a7001b..3feb3350ab81aa 100644 --- a/yjit/src/asm/arm64/inst/logical_reg.rs +++ b/yjit/src/asm/arm64/inst/logical_reg.rs @@ -1,4 +1,4 @@ -use super::sf::Sf; +use super::super::arg::Sf; /// The type of shift to perform on the second operand register. enum Shift { diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 7d05f286047c9d..2f0e7089996e45 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -1,6 +1,9 @@ +// This module contains various A64 instructions and the logic necessary to +// encode them into u32s. + mod atomic; -mod bitmask_imm; mod branch; +mod branch_cond; mod call; mod data_imm; mod data_reg; @@ -8,481 +11,18 @@ mod load; mod logical_imm; mod logical_reg; mod mov; -mod sf; +mod shift_imm; mod store; -use core::num; - -use atomic::Atomic; -use branch::Branch; -use call::Call; -use data_imm::DataImm; -use data_reg::DataReg; -use load::Load; -use logical_imm::LogicalImm; -use logical_reg::LogicalReg; -use mov::Mov; -use store::Store; - -use crate::asm::CodeBlock; -use super::opnd::*; - -/// Checks that a signed value fits within the specified number of bits. -const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { - let minimum = if num_bits == 64 { i64::MIN } else { -2_i64.pow((num_bits as u32) - 1) }; - let maximum = if num_bits == 64 { i64::MAX } else { 2_i64.pow((num_bits as u32) - 1) - 1 }; - - imm >= minimum && imm <= maximum -} - -/// Checks that an unsigned value fits within the specified number of bits. -const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool { - let maximum = if num_bits == 64 { u64::MAX } else { 2_u64.pow(num_bits as u32) - 1 }; - - uimm <= maximum -} - -/// ADD - add rn and rm, put the result in rd, don't update flags -pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { - let bytes: [u8; 4] = match (rd, rn, rm) { - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { - assert!( - rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, - "All operands must be of the same size." - ); - - DataReg::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() - }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { - assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - - DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() - }, - _ => panic!("Invalid operand combination to add instruction."), - }; - - cb.write_bytes(&bytes); -} - -/// ADDS - add rn and rm, put the result in rd, update flags -pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { - let bytes: [u8; 4] = match (rd, rn, rm) { - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { - assert!( - rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, - "All operands must be of the same size." - ); - - DataReg::adds(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() - }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { - assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - - DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() - }, - _ => panic!("Invalid operand combination to adds instruction."), - }; - - cb.write_bytes(&bytes); -} - -/// AND - and rn and rm, put the result in rd, don't update flags -pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { - let bytes: [u8; 4] = match (rd, rn, rm) { - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { - assert!( - rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, - "All operands must be of the same size." - ); - - LogicalReg::and(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() - }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { - assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - - LogicalImm::and(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() - }, - _ => panic!("Invalid operand combination to and instruction."), - }; - - cb.write_bytes(&bytes); -} - -/// ANDS - and rn and rm, put the result in rd, update flags -pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { - let bytes: [u8; 4] = match (rd, rn, rm) { - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { - assert!( - rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, - "All operands must be of the same size." - ); - - LogicalReg::ands(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() - }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { - assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - - LogicalImm::ands(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() - }, - _ => panic!("Invalid operand combination to ands instruction."), - }; - - cb.write_bytes(&bytes); -} - -/// BL - branch with link (offset is number of instructions to jump) -pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) { - let bytes: [u8; 4] = match imm26 { - A64Opnd::Imm(imm26) => { - assert!(imm_fits_bits(imm26, 26), "The immediate operand must be 26 bits or less."); - - Call::bl(imm26 as i32).into() - }, - _ => panic!("Invalid operand combination to bl instruction.") - }; - - cb.write_bytes(&bytes); -} - -/// BR - branch to a register -pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { - let bytes: [u8; 4] = match rn { - A64Opnd::Reg(rn) => Branch::br(rn.reg_no).into(), - _ => panic!("Invalid operand to br instruction."), - }; - - cb.write_bytes(&bytes); -} - -/// CMP - compare rn and rm, update flags -pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { - let bytes: [u8; 4] = match (rn, rm) { - (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { - assert!( - rn.num_bits == rm.num_bits, - "All operands must be of the same size." - ); - - DataReg::cmp(rn.reg_no, rm.reg_no, rn.num_bits).into() - }, - (A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - - DataImm::cmp(rn.reg_no, imm12 as u16, rn.num_bits).into() - }, - _ => panic!("Invalid operand combination to cmp instruction."), - }; - - cb.write_bytes(&bytes); -} - -/// LDADDAL - atomic add with acquire and release semantics -pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { - let bytes: [u8; 4] = match (rs, rt, rn) { - (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { - assert!( - rs.num_bits == rt.num_bits && rt.num_bits == rn.num_bits, - "All operands must be of the same size." - ); - - Atomic::ldaddal(rs.reg_no, rt.reg_no, rn.reg_no, rs.num_bits).into() - }, - _ => panic!("Invalid operand combination to ldaddal instruction."), - }; - - cb.write_bytes(&bytes); -} - -/// LDUR - load a memory address into a register -pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { - let bytes: [u8; 4] = match (rt, rn) { - (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { - assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); - assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); - - Load::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() - }, - _ => panic!("Invalid operands for LDUR") - }; - - cb.write_bytes(&bytes); -} - -/// MOVK - move a 16 bit immediate into a register, keep the other bits in place -pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { - let bytes: [u8; 4] = match (rd, imm16) { - (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { - assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); - - Mov::movk(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() - }, - _ => panic!("Invalid operand combination to movk instruction.") - }; - - cb.write_bytes(&bytes); -} - -/// MOVZ - move a 16 bit immediate into a register, zero the other bits -pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { - let bytes: [u8; 4] = match (rd, imm16) { - (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { - assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); - - Mov::movz(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() - }, - _ => panic!("Invalid operand combination to movz instruction.") - }; - - cb.write_bytes(&bytes); -} - -/// STUR - store a value in a register at a memory address -pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { - let bytes: [u8; 4] = match (rt, rn) { - (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { - assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); - assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); - - Store::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() - }, - _ => panic!("Invalid operand combination to stur instruction.") - }; - - cb.write_bytes(&bytes); -} - -/// SUB - subtract rm from rn, put the result in rd, don't update flags -pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { - let bytes: [u8; 4] = match (rd, rn, rm) { - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { - assert!( - rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, - "All operands must be of the same size." - ); - - DataReg::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() - }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { - assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - - DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() - }, - _ => panic!("Invalid operand combination to sub instruction."), - }; - - cb.write_bytes(&bytes); -} - -/// SUBS - subtract rm from rn, put the result in rd, update flags -pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { - let bytes: [u8; 4] = match (rd, rn, rm) { - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { - assert!( - rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, - "All operands must be of the same size." - ); - - DataReg::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() - }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { - assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - - DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() - }, - _ => panic!("Invalid operand combination to subs instruction."), - }; - - cb.write_bytes(&bytes); -} - -/// RET - unconditionally return to a location in a register, defaults to X30 -pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) { - let bytes: [u8; 4] = match rn { - A64Opnd::None => Branch::ret(30).into(), - A64Opnd::Reg(reg) => Branch::ret(reg.reg_no).into(), - _ => panic!("Invalid operand to ret instruction.") - }; - - cb.write_bytes(&bytes); -} - -/// TST - test the bits of a register against a mask, then update flags -pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { - let bytes: [u8; 4] = match (rn, rm) { - (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { - assert!(rn.num_bits == rm.num_bits, "All operands must be of the same size."); - - LogicalReg::tst(rn.reg_no, rm.reg_no, rn.num_bits).into() - }, - (A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { - LogicalImm::tst(rn.reg_no, imm.try_into().unwrap(), rn.num_bits).into() - }, - _ => panic!("Invalid operand combination to tst instruction."), - }; - - cb.write_bytes(&bytes); -} - -#[cfg(test)] -mod tests { - use super::*; - - /// Check that the bytes for an instruction sequence match a hex string - fn check_bytes(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) { - let mut cb = super::CodeBlock::new_dummy(128); - run(&mut cb); - assert_eq!(format!("{:x}", cb), bytes); - } - - #[test] - fn test_imm_fits_bits() { - assert!(imm_fits_bits(i8::MAX.into(), 8)); - assert!(imm_fits_bits(i8::MIN.into(), 8)); - - assert!(imm_fits_bits(i16::MAX.into(), 16)); - assert!(imm_fits_bits(i16::MIN.into(), 16)); - - assert!(imm_fits_bits(i32::MAX.into(), 32)); - assert!(imm_fits_bits(i32::MIN.into(), 32)); - - assert!(imm_fits_bits(i64::MAX.into(), 64)); - assert!(imm_fits_bits(i64::MIN.into(), 64)); - } - - #[test] - fn test_uimm_fits_bits() { - assert!(uimm_fits_bits(u8::MAX.into(), 8)); - assert!(uimm_fits_bits(u16::MAX.into(), 16)); - assert!(uimm_fits_bits(u32::MAX.into(), 32)); - assert!(uimm_fits_bits(u64::MAX.into(), 64)); - } - - #[test] - fn test_add_register() { - check_bytes("2000028b", |cb| add(cb, X0, X1, X2)); - } - - #[test] - fn test_add_immediate() { - check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_uimm(7))); - } - - #[test] - fn test_adds_register() { - check_bytes("200002ab", |cb| adds(cb, X0, X1, X2)); - } - - #[test] - fn test_adds_immediate() { - check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7))); - } - - #[test] - fn test_and_register() { - check_bytes("2000028a", |cb| and(cb, X0, X1, X2)); - } - - #[test] - fn test_and_immediate() { - check_bytes("20084092", |cb| and(cb, X0, X1, A64Opnd::new_uimm(7))); - } - - #[test] - fn test_ands_register() { - check_bytes("200002ea", |cb| ands(cb, X0, X1, X2)); - } - - #[test] - fn test_ands_immediate() { - check_bytes("200840f2", |cb| ands(cb, X0, X1, A64Opnd::new_uimm(7))); - } - - #[test] - fn test_bl() { - check_bytes("00040094", |cb| bl(cb, A64Opnd::new_imm(1024))); - } - - #[test] - fn test_br() { - check_bytes("80021fd6", |cb| br(cb, X20)); - } - - #[test] - fn test_cmp_register() { - check_bytes("5f010beb", |cb| cmp(cb, X10, X11)); - } - - #[test] - fn test_cmp_immediate() { - check_bytes("5f3900f1", |cb| cmp(cb, X10, A64Opnd::new_uimm(14))); - } - - #[test] - fn test_ldaddal() { - check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); - } - - #[test] - fn test_ldur() { - check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(X1, 123))); - } - - #[test] - fn test_movk() { - check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); - } - - #[test] - fn test_movz() { - check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16)); - } - - #[test] - fn test_ret_none() { - check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None)); - } - - #[test] - fn test_ret_register() { - check_bytes("80025fd6", |cb| ret(cb, X20)); - } - - #[test] - fn test_stur() { - check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(X11, 128))); - } - - #[test] - fn test_sub_register() { - check_bytes("200002cb", |cb| sub(cb, X0, X1, X2)); - } - - #[test] - fn test_sub_immediate() { - check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_uimm(7))); - } - - #[test] - fn test_subs_register() { - check_bytes("200002eb", |cb| subs(cb, X0, X1, X2)); - } - - #[test] - fn test_subs_immediate() { - check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7))); - } - - #[test] - fn test_tst_register() { - check_bytes("1f0001ea", |cb| tst(cb, X0, X1)); - } - - #[test] - fn test_tst_immediate() { - check_bytes("3f0840f2", |cb| tst(cb, X1, A64Opnd::new_uimm(7))); - } -} +pub use atomic::Atomic; +pub use branch::Branch; +pub use branch_cond::BranchCond; +pub use call::Call; +pub use data_imm::DataImm; +pub use data_reg::DataReg; +pub use load::Load; +pub use logical_imm::LogicalImm; +pub use logical_reg::LogicalReg; +pub use mov::Mov; +pub use shift_imm::ShiftImm; +pub use store::Store; diff --git a/yjit/src/asm/arm64/inst/mov.rs b/yjit/src/asm/arm64/inst/mov.rs index 0d68ffd206ac8c..e7cb9215b0774f 100644 --- a/yjit/src/asm/arm64/inst/mov.rs +++ b/yjit/src/asm/arm64/inst/mov.rs @@ -1,4 +1,4 @@ -use super::sf::Sf; +use super::super::arg::Sf; /// Which operation is being performed. enum Op { diff --git a/yjit/src/asm/arm64/inst/shift_imm.rs b/yjit/src/asm/arm64/inst/shift_imm.rs new file mode 100644 index 00000000000000..3d2685a997b1a2 --- /dev/null +++ b/yjit/src/asm/arm64/inst/shift_imm.rs @@ -0,0 +1,147 @@ +use super::super::arg::Sf; + +/// The operation to perform for this instruction. +enum Opc { + /// Logical left shift + LSL, + + /// Logical shift right + LSR +} + +/// The struct that represents an A64 unsigned bitfield move instruction that +/// can be encoded. +/// +/// LSL (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 1 0 0 1 1 0 | +/// | sf N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct ShiftImm { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The immediate value to shift by. + shift: u8, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl ShiftImm { + /// LSL (immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM-?lang=en + pub fn lsl(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { + ShiftImm { rd, rn, shift, opc: Opc::LSL, sf: num_bits.into() } + } + + /// LSR (immediate) + /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en + pub fn lsr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { + ShiftImm { rd, rn, shift, opc: Opc::LSR, sf: num_bits.into() } + } + + /// Returns a triplet of (n, immr, imms) encoded in u32s for this + /// instruction. This mirrors how they will be encoded in the actual bits. + fn bitmask(&self) -> (u32, u32, u32) { + match self.opc { + // The key insight is a little buried in the docs, but effectively: + // LSL , , # == UBFM , , #(- MOD 32), #(31-) + // LSL , , # == UBFM , , #(- MOD 64), #(63-) + Opc::LSL => { + let shift = -(self.shift as i16); + + match self.sf { + Sf::Sf32 => ( + 0, + (shift.rem_euclid(32) & 0x3f) as u32, + ((31 - self.shift) & 0x3f) as u32 + ), + Sf::Sf64 => ( + 1, + (shift.rem_euclid(64) & 0x3f) as u32, + ((63 - self.shift) & 0x3f) as u32 + ) + } + }, + // Similar to LSL: + // LSR , , # == UBFM , , #, #31 + // LSR , , # == UBFM , , #, #63 + Opc::LSR => { + match self.sf { + Sf::Sf32 => (0, (self.shift & 0x3f) as u32, 31), + Sf::Sf64 => (1, (self.shift & 0x3f) as u32, 63) + } + } + } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield +const FAMILY: u32 = 0b10011; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: ShiftImm) -> Self { + let (n, immr, imms) = inst.bitmask(); + + 0 + | ((inst.sf as u32) << 31) + | (1 << 30) + | (FAMILY << 24) + | (n << 22) + | (immr << 16) + | (imms << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: ShiftImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_lsl_32() { + let inst = ShiftImm::lsl(0, 1, 7, 32); + let result: u32 = inst.into(); + assert_eq!(0x53196020, result); + } + + #[test] + fn test_lsl_64() { + let inst = ShiftImm::lsl(0, 1, 7, 64); + let result: u32 = inst.into(); + assert_eq!(0xd379e020, result); + } + + #[test] + fn test_lsr_32() { + let inst = ShiftImm::lsr(0, 1, 7, 32); + let result: u32 = inst.into(); + assert_eq!(0x53077c20, result); + } + + #[test] + fn test_lsr_64() { + let inst = ShiftImm::lsr(0, 1, 7, 64); + let result: u32 = inst.into(); + assert_eq!(0xd347fc20, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 85a472ddec19ff..24f349d589a323 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -1,4 +1,531 @@ #![allow(dead_code)] // For instructions and operands we're not currently using. +use crate::asm::CodeBlock; + +mod arg; mod inst; mod opnd; + +use arg::*; +use inst::*; +use opnd::*; + +/// Checks that a signed value fits within the specified number of bits. +const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { + let minimum = if num_bits == 64 { i64::MIN } else { -2_i64.pow((num_bits as u32) - 1) }; + let maximum = if num_bits == 64 { i64::MAX } else { 2_i64.pow((num_bits as u32) - 1) - 1 }; + + imm >= minimum && imm <= maximum +} + +/// Checks that an unsigned value fits within the specified number of bits. +const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool { + let maximum = if num_bits == 64 { u64::MAX } else { 2_u64.pow(num_bits as u32) - 1 }; + + uimm <= maximum +} + +/// ADD - add rn and rm, put the result in rd, don't update flags +pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to add instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ADDS - add rn and rm, put the result in rd, update flags +pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::adds(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to adds instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// AND - and rn and rm, put the result in rd, don't update flags +pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::and(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::and(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to and instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ANDS - and rn and rm, put the result in rd, update flags +pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::ands(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::ands(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to ands instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// B.cond - branch to target if condition is true +pub fn bcond(cb: &mut CodeBlock, cond: Condition, byte_offset: A64Opnd) { + let bytes: [u8; 4] = match byte_offset { + A64Opnd::Imm(imm) => { + assert!(imm_fits_bits(imm, 21), "The immediate operand must be 21 bits or less."); + assert!(imm & 0b11 == 0, "The immediate operand must be aligned to a 2-bit boundary."); + + BranchCond::bcond(cond, imm as i32).into() + }, + _ => panic!("Invalid operand combination to bcond instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// BL - branch with link (offset is number of instructions to jump) +pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) { + let bytes: [u8; 4] = match imm26 { + A64Opnd::Imm(imm26) => { + assert!(imm_fits_bits(imm26, 26), "The immediate operand must be 26 bits or less."); + + Call::bl(imm26 as i32).into() + }, + _ => panic!("Invalid operand combination to bl instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// BR - branch to a register +pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::Reg(rn) => Branch::br(rn.reg_no).into(), + _ => panic!("Invalid operand to br instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// CMP - compare rn and rm, update flags +pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rn, rm) { + (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::cmp(rn.reg_no, rm.reg_no, rn.num_bits).into() + }, + (A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + DataImm::cmp(rn.reg_no, imm12 as u16, rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to cmp instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDADDAL - atomic add with acquire and release semantics +pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rs, rt, rn) { + (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert!( + rs.num_bits == rt.num_bits && rt.num_bits == rn.num_bits, + "All operands must be of the same size." + ); + + Atomic::ldaddal(rs.reg_no, rt.reg_no, rn.reg_no, rs.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldaddal instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDUR - load a memory address into a register +pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + + Load::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operands for LDUR") + }; + + cb.write_bytes(&bytes); +} + +/// LSL - logical shift left a register by an immediate +pub fn lsl(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, shift) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm)) => { + assert!(rd.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(uimm_fits_bits(uimm, 6), "Expected shift to be 6 bits or less"); + + ShiftImm::lsl(rd.reg_no, rn.reg_no, uimm as u8, rd.num_bits).into() + }, + _ => panic!("Invalid operands combination to lsl instruction") + }; + + cb.write_bytes(&bytes); +} + +/// LSR - logical shift right a register by an immediate +pub fn lsr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, shift) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm)) => { + assert!(rd.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(uimm_fits_bits(uimm, 6), "Expected shift to be 6 bits or less"); + + ShiftImm::lsr(rd.reg_no, rn.reg_no, uimm as u8, rd.num_bits).into() + }, + _ => panic!("Invalid operands combination to lsr instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MOVK - move a 16 bit immediate into a register, keep the other bits in place +pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + + Mov::movk(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movk instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// MOVZ - move a 16 bit immediate into a register, zero the other bits +pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + + Mov::movz(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STUR - store a value in a register at a memory address +pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + + Store::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to stur instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// SUB - subtract rm from rn, put the result in rd, don't update flags +pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to sub instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// SUBS - subtract rm from rn, put the result in rd, update flags +pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to subs instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// RET - unconditionally return to a location in a register, defaults to X30 +pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::None => Branch::ret(30).into(), + A64Opnd::Reg(reg) => Branch::ret(reg.reg_no).into(), + _ => panic!("Invalid operand to ret instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// TST - test the bits of a register against a mask, then update flags +pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rn, rm) { + (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rn.num_bits == rm.num_bits, "All operands must be of the same size."); + + LogicalReg::tst(rn.reg_no, rm.reg_no, rn.num_bits).into() + }, + (A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + LogicalImm::tst(rn.reg_no, imm.try_into().unwrap(), rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to tst instruction."), + }; + + cb.write_bytes(&bytes); +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Check that the bytes for an instruction sequence match a hex string + fn check_bytes(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) { + let mut cb = super::CodeBlock::new_dummy(128); + run(&mut cb); + assert_eq!(format!("{:x}", cb), bytes); + } + + #[test] + fn test_imm_fits_bits() { + assert!(imm_fits_bits(i8::MAX.into(), 8)); + assert!(imm_fits_bits(i8::MIN.into(), 8)); + + assert!(imm_fits_bits(i16::MAX.into(), 16)); + assert!(imm_fits_bits(i16::MIN.into(), 16)); + + assert!(imm_fits_bits(i32::MAX.into(), 32)); + assert!(imm_fits_bits(i32::MIN.into(), 32)); + + assert!(imm_fits_bits(i64::MAX.into(), 64)); + assert!(imm_fits_bits(i64::MIN.into(), 64)); + } + + #[test] + fn test_uimm_fits_bits() { + assert!(uimm_fits_bits(u8::MAX.into(), 8)); + assert!(uimm_fits_bits(u16::MAX.into(), 16)); + assert!(uimm_fits_bits(u32::MAX.into(), 32)); + assert!(uimm_fits_bits(u64::MAX.into(), 64)); + } + + #[test] + fn test_add_register() { + check_bytes("2000028b", |cb| add(cb, X0, X1, X2)); + } + + #[test] + fn test_add_immediate() { + check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_adds_register() { + check_bytes("200002ab", |cb| adds(cb, X0, X1, X2)); + } + + #[test] + fn test_adds_immediate() { + check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_and_register() { + check_bytes("2000028a", |cb| and(cb, X0, X1, X2)); + } + + #[test] + fn test_and_immediate() { + check_bytes("20084092", |cb| and(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_ands_register() { + check_bytes("200002ea", |cb| ands(cb, X0, X1, X2)); + } + + #[test] + fn test_ands_immediate() { + check_bytes("200840f2", |cb| ands(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_bcond() { + check_bytes("01200054", |cb| bcond(cb, Condition::NE, A64Opnd::new_imm(0x400))); + } + + #[test] + fn test_bl() { + check_bytes("00040094", |cb| bl(cb, A64Opnd::new_imm(1024))); + } + + #[test] + fn test_br() { + check_bytes("80021fd6", |cb| br(cb, X20)); + } + + #[test] + fn test_cmp_register() { + check_bytes("5f010beb", |cb| cmp(cb, X10, X11)); + } + + #[test] + fn test_cmp_immediate() { + check_bytes("5f3900f1", |cb| cmp(cb, X10, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_ldaddal() { + check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); + } + + #[test] + fn test_ldur() { + check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(X1, 123))); + } + + #[test] + fn test_lsl() { + check_bytes("6ac572d3", |cb| lsl(cb, X10, X11, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_lsr() { + check_bytes("6afd4ed3", |cb| lsr(cb, X10, X11, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_movk() { + check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); + } + + #[test] + fn test_movz() { + check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16)); + } + + #[test] + fn test_ret_none() { + check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None)); + } + + #[test] + fn test_ret_register() { + check_bytes("80025fd6", |cb| ret(cb, X20)); + } + + #[test] + fn test_stur() { + check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(X11, 128))); + } + + #[test] + fn test_sub_register() { + check_bytes("200002cb", |cb| sub(cb, X0, X1, X2)); + } + + #[test] + fn test_sub_immediate() { + check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_subs_register() { + check_bytes("200002eb", |cb| subs(cb, X0, X1, X2)); + } + + #[test] + fn test_subs_immediate() { + check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_tst_register() { + check_bytes("1f0001ea", |cb| tst(cb, X0, X1)); + } + + #[test] + fn test_tst_immediate() { + check_bytes("3f0840f2", |cb| tst(cb, X1, A64Opnd::new_uimm(7))); + } +} From 7a9b581e0896d4aa7a037da90c837b830213c8e8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 5 Jul 2022 16:04:19 -0400 Subject: [PATCH 356/546] Arm64 progress (https://github.com/Shopify/ruby/pull/304) * Get initial wiring up * Split IncrCounter instruction * Breakpoints in Arm64 * Support for ORR * MOV instruction encodings * Implement JmpOpnd and CRet * Add ORN * Add MVN * PUSH, POP, CCALL for Arm64 * Some formatting and implement Op::Not for Arm64 * Consistent constants when working with the Arm64 SP * Allow OR-ing values into the memory buffer * Test lowering Arm64 ADD * Emit unconditional jumps consistently in Arm64 * Begin emitting conditional jumps for A64 * Back out some labelref changes * Remove label API that no longer exists * Use a trait for the label encoders * Encode nop * Add in nops so jumps are the same width no matter what on Arm64 * Op::Jbe for CodePtr * Pass src_addr and dst_addr instead of calculated offset to label refs * Even more jump work for Arm64 * Fix up jumps to use consistent assertions * Handle splitting Add, Sub, and Not insns for Arm64 * More Arm64 splits and various fixes * PR feedback for Arm64 support * Split up jumps and conditional jump logic --- yjit/src/asm/arm64/inst/breakpoint.rs | 55 ++++ yjit/src/asm/arm64/inst/call.rs | 51 ++- yjit/src/asm/arm64/inst/logical_imm.rs | 37 ++- yjit/src/asm/arm64/inst/logical_reg.rs | 80 ++++- yjit/src/asm/arm64/inst/mod.rs | 4 + yjit/src/asm/arm64/inst/nop.rs | 44 +++ yjit/src/asm/arm64/mod.rs | 198 +++++++++++- yjit/src/asm/arm64/opnd.rs | 48 ++- yjit/src/asm/mod.rs | 33 +- yjit/src/asm/x86_64/mod.rs | 101 +++--- yjit/src/backend/arm64/mod.rs | 410 +++++++++++++++++++++++-- yjit/src/backend/ir.rs | 7 + yjit/src/backend/mod.rs | 7 +- yjit/src/backend/x86_64/mod.rs | 3 + 14 files changed, 939 insertions(+), 139 deletions(-) create mode 100644 yjit/src/asm/arm64/inst/breakpoint.rs create mode 100644 yjit/src/asm/arm64/inst/nop.rs diff --git a/yjit/src/asm/arm64/inst/breakpoint.rs b/yjit/src/asm/arm64/inst/breakpoint.rs new file mode 100644 index 00000000000000..be4920ac7656ed --- /dev/null +++ b/yjit/src/asm/arm64/inst/breakpoint.rs @@ -0,0 +1,55 @@ +/// The struct that represents an A64 breakpoint instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 | +/// | imm16.................................................. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Breakpoint { + /// The value to be captured by ESR_ELx.ISS + imm16: u16 +} + +impl Breakpoint { + /// BRK + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction- + pub fn brk(imm16: u16) -> Self { + Self { imm16 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Breakpoint) -> Self { + let imm16 = inst.imm16 as u32; + + 0 + | (0b11 << 30) + | (FAMILY << 26) + | (1 << 21) + | (imm16 << 5) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Breakpoint) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_brk() { + let result: u32 = Breakpoint::brk(7).into(); + assert_eq!(0xd42000e0, result); + } +} diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs index 6f23acf9f57645..8d65359f771261 100644 --- a/yjit/src/asm/arm64/inst/call.rs +++ b/yjit/src/asm/arm64/inst/call.rs @@ -1,22 +1,41 @@ -/// The struct that represents an A64 branch with link instruction that can be -/// encoded. +/// The operation to perform for this instruction. +enum Op { + /// Branch directly, with a hint that this is not a subroutine call or + /// return. + Branch = 0, + + /// Branch directly, with a hint that this is a subroutine call or return. + BranchWithLink = 1 +} + +/// The struct that represents an A64 branch with our without link instruction +/// that can be encoded. /// /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | -/// | 1 0 0 1 0 1 | -/// | imm26.................................................................................... | +/// | 0 0 1 0 1 | +/// | op imm26.................................................................................... | /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// pub struct Call { /// The PC-relative offset to jump to (which will be multiplied by 4). - imm26: i32 + imm26: i32, + + /// The operation to perform for this instruction. + op: Op } impl Call { + /// B + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch- + pub fn b(imm26: i32) -> Self { + Self { imm26, op: Op::Branch } + } + /// BL /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en pub fn bl(imm26: i32) -> Self { - Self { imm26 } + Self { imm26, op: Op::BranchWithLink } } } @@ -29,7 +48,7 @@ impl From for u32 { let imm26 = (inst.imm26 as u32) & ((1 << 26) - 1); 0 - | (1 << 31) + | ((inst.op as u32) << 31) | (FAMILY << 26) | imm26 } @@ -64,4 +83,22 @@ mod tests { let result: u32 = Call::bl(-256).into(); assert_eq!(0x97ffff00, result); } + + #[test] + fn test_b() { + let result: u32 = Call::b(0).into(); + assert_eq!(0x14000000, result); + } + + #[test] + fn test_b_positive() { + let result: u32 = Call::b(256).into(); + assert_eq!(0x14000100, result); + } + + #[test] + fn test_b_negative() { + let result: u32 = Call::b(-256).into(); + assert_eq!(0x17ffff00, result); + } } diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs index cc2a16cbdc349a..13865697f6cd42 100644 --- a/yjit/src/asm/arm64/inst/logical_imm.rs +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -5,6 +5,9 @@ enum Opc { /// The AND operation. And = 0b00, + /// The ORR operation. + Orr = 0b01, + /// The ANDS operation. Ands = 0b11 } @@ -12,7 +15,7 @@ enum Opc { /// The struct that represents an A64 bitwise immediate instruction that can be /// encoded. /// -/// AND/ANDS (immediate) +/// AND/ORR/ANDS (immediate) /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | /// | 1 0 0 1 0 0 | @@ -37,19 +40,31 @@ pub struct LogicalImm { } impl LogicalImm { - /// AND (immediate) + /// AND (bitmask immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() } } - /// ANDS (immediate) + /// ANDS (bitmask immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() } } - /// TST (immediate) + /// MOV (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en + pub fn mov(rd: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORR (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate-- + pub fn orr(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Orr, sf: num_bits.into() } + } + + /// TST (bitmask immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en pub fn tst(rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { Self::ands(31, rn, imm, num_bits) @@ -100,6 +115,20 @@ mod tests { assert_eq!(0xf2400820, result); } + #[test] + fn test_mov() { + let inst = LogicalImm::mov(0, 0x5555555555555555.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb200f3e0, result); + } + + #[test] + fn test_orr() { + let inst = LogicalImm::orr(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb2400820, result); + } + #[test] fn test_tst() { let inst = LogicalImm::tst(1, 7.try_into().unwrap(), 64); diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs index 3feb3350ab81aa..5d7954c587a4c0 100644 --- a/yjit/src/asm/arm64/inst/logical_reg.rs +++ b/yjit/src/asm/arm64/inst/logical_reg.rs @@ -1,5 +1,14 @@ use super::super::arg::Sf; +/// Whether or not this is a NOT instruction. +enum N { + /// This is not a NOT instruction. + No = 0, + + /// This is a NOT instruction. + Yes = 1 +} + /// The type of shift to perform on the second operand register. enum Shift { LSL = 0b00, // logical shift left (unsigned) @@ -13,6 +22,9 @@ enum Opc { /// The AND operation. And = 0b00, + /// The ORR operation. + Orr = 0b01, + /// The ANDS operation. Ands = 0b11 } @@ -20,11 +32,11 @@ enum Opc { /// The struct that represents an A64 logical register instruction that can be /// encoded. /// -/// AND/ANDS (shifted register) +/// AND/ORR/ANDS (shifted register) /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | -/// | 0 1 0 1 0 0 | -/// | sf opc.. shift rm.............. imm6............... rn.............. rd.............. | +/// | 0 1 0 1 0 | +/// | sf opc.. shift N rm.............. imm6............... rn.............. rd.............. | /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// pub struct LogicalReg { @@ -40,6 +52,9 @@ pub struct LogicalReg { /// The register number of the second operand register. rm: u8, + /// Whether or not this is a NOT instruction. + n: N, + /// The type of shift to perform on the second operand register. shift: Shift, @@ -54,19 +69,43 @@ impl LogicalReg { /// AND (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en pub fn and(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { - Self { rd, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() } + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() } } /// ANDS (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en pub fn ands(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { - Self { rd, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + } + + /// MOV (register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en + pub fn mov(rd: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// MVN (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en + pub fn mvn(rd: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORN (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register-- + pub fn orn(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORR (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register-- + pub fn orr(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } } /// TST (shifted register) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en pub fn tst(rn: u8, rm: u8, num_bits: u8) -> Self { - Self { rd: 31, rn, imm6: 0, rm, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + Self { rd: 31, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } } } @@ -83,6 +122,7 @@ impl From for u32 { | ((inst.opc as u32) << 29) | (FAMILY << 25) | ((inst.shift as u32) << 22) + | ((inst.n as u32) << 21) | ((inst.rm as u32) << 16) | (imm6 << 10) | ((inst.rn as u32) << 5) @@ -116,6 +156,34 @@ mod tests { assert_eq!(0xea020020, result); } + #[test] + fn test_mov() { + let inst = LogicalReg::mov(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa0103e0, result); + } + + #[test] + fn test_mvn() { + let inst = LogicalReg::mvn(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa2103e0, result); + } + + #[test] + fn test_orn() { + let inst = LogicalReg::orn(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa220020, result); + } + + #[test] + fn test_orr() { + let inst = LogicalReg::orr(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa020020, result); + } + #[test] fn test_tst() { let inst = LogicalReg::tst(0, 1, 64); diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 2f0e7089996e45..ae589ca564e83f 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -4,6 +4,7 @@ mod atomic; mod branch; mod branch_cond; +mod breakpoint; mod call; mod data_imm; mod data_reg; @@ -11,12 +12,14 @@ mod load; mod logical_imm; mod logical_reg; mod mov; +mod nop; mod shift_imm; mod store; pub use atomic::Atomic; pub use branch::Branch; pub use branch_cond::BranchCond; +pub use breakpoint::Breakpoint; pub use call::Call; pub use data_imm::DataImm; pub use data_reg::DataReg; @@ -24,5 +27,6 @@ pub use load::Load; pub use logical_imm::LogicalImm; pub use logical_reg::LogicalReg; pub use mov::Mov; +pub use nop::Nop; pub use shift_imm::ShiftImm; pub use store::Store; diff --git a/yjit/src/asm/arm64/inst/nop.rs b/yjit/src/asm/arm64/inst/nop.rs new file mode 100644 index 00000000000000..a99f8d34b7a9e0 --- /dev/null +++ b/yjit/src/asm/arm64/inst/nop.rs @@ -0,0 +1,44 @@ +/// The struct that represents an A64 nop instruction that can be encoded. +/// +/// NOP +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Nop; + +impl Nop { + /// NOP + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation- + pub fn nop() -> Self { + Self {} + } +} + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Nop) -> Self { + 0b11010101000000110010000000011111 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Nop) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_nop() { + let inst = Nop::nop(); + let result: u32 = inst.into(); + assert_eq!(0xd503201f, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 24f349d589a323..ced8b262c5b779 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -6,12 +6,15 @@ mod arg; mod inst; mod opnd; -use arg::*; use inst::*; -use opnd::*; + +// We're going to make these public to make using these things easier in the +// backend (so they don't have to have knowledge about the submodule). +pub use arg::*; +pub use opnd::*; /// Checks that a signed value fits within the specified number of bits. -const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { +pub const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { let minimum = if num_bits == 64 { i64::MIN } else { -2_i64.pow((num_bits as u32) - 1) }; let maximum = if num_bits == 64 { i64::MAX } else { 2_i64.pow((num_bits as u32) - 1) - 1 }; @@ -19,7 +22,7 @@ const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { } /// Checks that an unsigned value fits within the specified number of bits. -const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool { +pub const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool { let maximum = if num_bits == 64 { u64::MAX } else { 2_u64.pow(num_bits as u32) - 1 }; uimm <= maximum @@ -115,12 +118,39 @@ pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// Whether or not the offset between two instructions fits into the branch with +/// or without link instruction. If it doesn't, then we have to load the value +/// into a register first. +pub const fn b_offset_fits_bits(offset: i64) -> bool { + imm_fits_bits(offset, 26) +} + +/// B - branch without link (offset is number of instructions to jump) +pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) { + let bytes: [u8; 4] = match imm26 { + A64Opnd::Imm(imm26) => { + assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less."); + + Call::b(imm26 as i32).into() + }, + _ => panic!("Invalid operand combination to b instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// Whether or not the offset between two instructions fits into the b.cond +/// instruction. If it doesn't, then we have to load the value into a register +/// first, then use the b.cond instruction to skip past a direct jump. +pub const fn bcond_offset_fits_bits(offset: i64) -> bool { + imm_fits_bits(offset, 21) && (offset & 0b11 == 0) +} + /// B.cond - branch to target if condition is true pub fn bcond(cb: &mut CodeBlock, cond: Condition, byte_offset: A64Opnd) { let bytes: [u8; 4] = match byte_offset { A64Opnd::Imm(imm) => { - assert!(imm_fits_bits(imm, 21), "The immediate operand must be 21 bits or less."); - assert!(imm & 0b11 == 0, "The immediate operand must be aligned to a 2-bit boundary."); + assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary."); BranchCond::bcond(cond, imm as i32).into() }, @@ -134,7 +164,7 @@ pub fn bcond(cb: &mut CodeBlock, cond: Condition, byte_offset: A64Opnd) { pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) { let bytes: [u8; 4] = match imm26 { A64Opnd::Imm(imm26) => { - assert!(imm_fits_bits(imm26, 26), "The immediate operand must be 26 bits or less."); + assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less."); Call::bl(imm26 as i32).into() }, @@ -154,6 +184,20 @@ pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// BRK - create a breakpoint +pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) { + let bytes: [u8; 4] = match imm16 { + A64Opnd::None => Breakpoint::brk(0).into(), + A64Opnd::UImm(imm16) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + Breakpoint::brk(imm16 as u16).into() + }, + _ => panic!("Invalid operand combination to brk instruction.") + }; + + cb.write_bytes(&bytes); +} + /// CMP - compare rn and rm, update flags pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rn, rm) { @@ -196,6 +240,11 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { /// LDUR - load a memory address into a register pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + + Load::ldur(rt.reg_no, rn.reg_no, 0, rt.num_bits).into() + }, (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); @@ -238,6 +287,23 @@ pub fn lsr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { cb.write_bytes(&bytes); } +/// MOV - move a value in a register to another register +pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => { + LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mov instruction") + }; + + cb.write_bytes(&bytes); +} + /// MOVK - move a 16 bit immediate into a register, keep the other bits in place pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { let bytes: [u8; 4] = match (rd, imm16) { @@ -266,6 +332,63 @@ pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { cb.write_bytes(&bytes); } +/// MVN - move a value in a register to another register, negating it +pub fn mvn(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::mvn(rd.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mvn instruction") + }; + + cb.write_bytes(&bytes); +} + +/// NOP - no-operation, used for alignment purposes +pub fn nop(cb: &mut CodeBlock) { + let bytes: [u8; 4] = Nop::nop().into(); + + cb.write_bytes(&bytes); +} + +/// ORN - perform a bitwise OR of rn and NOT rm, put the result in rd, don't update flags +pub fn orn(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::orn(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to orn instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// ORR - perform a bitwise OR of rn and rm, put the result in rd, don't update flags +pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::orr(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::orr(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to orr instruction."), + }; + + cb.write_bytes(&bytes); +} + /// STUR - store a value in a register at a memory address pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { @@ -434,6 +557,11 @@ mod tests { check_bytes("01200054", |cb| bcond(cb, Condition::NE, A64Opnd::new_imm(0x400))); } + #[test] + fn test_b() { + check_bytes("00040014", |cb| b(cb, A64Opnd::new_imm(1024))); + } + #[test] fn test_bl() { check_bytes("00040094", |cb| bl(cb, A64Opnd::new_imm(1024))); @@ -444,6 +572,16 @@ mod tests { check_bytes("80021fd6", |cb| br(cb, X20)); } + #[test] + fn test_brk_none() { + check_bytes("000020d4", |cb| brk(cb, A64Opnd::None)); + } + + #[test] + fn test_brk_uimm() { + check_bytes("c00120d4", |cb| brk(cb, A64Opnd::new_uimm(14))); + } + #[test] fn test_cmp_register() { check_bytes("5f010beb", |cb| cmp(cb, X10, X11)); @@ -460,8 +598,13 @@ mod tests { } #[test] - fn test_ldur() { - check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(X1, 123))); + fn test_ldur_memory() { + check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123))); + } + + #[test] + fn test_ldur_register() { + check_bytes("200040f8", |cb| ldur(cb, X0, X1)); } #[test] @@ -474,6 +617,16 @@ mod tests { check_bytes("6afd4ed3", |cb| lsr(cb, X10, X11, A64Opnd::new_uimm(14))); } + #[test] + fn test_mov_registers() { + check_bytes("ea030baa", |cb| mov(cb, X10, X11)); + } + + #[test] + fn test_mov_immediate() { + check_bytes("eaf300b2", |cb| mov(cb, X10, A64Opnd::new_uimm(0x5555555555555555))); + } + #[test] fn test_movk() { check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); @@ -484,6 +637,31 @@ mod tests { check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16)); } + #[test] + fn test_mvn() { + check_bytes("ea032baa", |cb| mvn(cb, X10, X11)); + } + + #[test] + fn test_nop() { + check_bytes("1f2003d5", |cb| nop(cb)); + } + + #[test] + fn test_orn() { + check_bytes("6a012caa", |cb| orn(cb, X10, X11, X12)); + } + + #[test] + fn test_orr_register() { + check_bytes("6a010caa", |cb| orr(cb, X10, X11, X12)); + } + + #[test] + fn test_orr_immediate() { + check_bytes("6a0940b2", |cb| orr(cb, X10, X11, A64Opnd::new_uimm(7))); + } + #[test] fn test_ret_none() { check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None)); @@ -496,7 +674,7 @@ mod tests { #[test] fn test_stur() { - check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(X11, 128))); + check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(64, X11, 128))); } #[test] diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index aa73d438fe7278..6c06d2db3c2254 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -11,6 +11,15 @@ pub struct A64Reg pub reg_no: u8, } +impl A64Reg { + pub fn sub_reg(&self, num_bits: u8) -> Self { + assert!(num_bits == 32 || num_bits == 64); + assert!(num_bits <= self.num_bits); + + Self { num_bits, reg_no: self.reg_no } + } +} + #[derive(Clone, Copy, Debug)] pub struct A64Mem { @@ -25,14 +34,10 @@ pub struct A64Mem } impl A64Mem { - pub fn new(reg: A64Opnd, disp: i32) -> Self { + pub fn new(num_bits: u8, reg: A64Opnd, disp: i32) -> Self { match reg { A64Opnd::Reg(reg) => { - Self { - num_bits: reg.num_bits, - base_reg_no: reg.reg_no, - disp - } + Self { num_bits, base_reg_no: reg.reg_no, disp } }, _ => panic!("Expected register operand") } @@ -70,8 +75,8 @@ impl A64Opnd { } /// Creates a new memory operand. - pub fn new_mem(reg: A64Opnd, disp: i32) -> Self { - A64Opnd::Mem(A64Mem::new(reg, disp)) + pub fn new_mem(num_bits: u8, reg: A64Opnd, disp: i32) -> Self { + A64Opnd::Mem(A64Mem::new(num_bits, reg, disp)) } /// Convenience function to check if this operand is a register. @@ -87,23 +92,32 @@ pub const X0_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 0 }; pub const X1_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 1 }; pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 }; pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 }; +pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 }; +pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 }; +pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 }; +pub const X10_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 10 }; +pub const X11_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 11 }; pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 }; pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 }; +pub const X24_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 24 }; +pub const X25_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 25 }; +pub const X26_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 26 }; + // 64-bit registers pub const X0: A64Opnd = A64Opnd::Reg(X0_REG); pub const X1: A64Opnd = A64Opnd::Reg(X1_REG); pub const X2: A64Opnd = A64Opnd::Reg(X2_REG); pub const X3: A64Opnd = A64Opnd::Reg(X3_REG); -pub const X4: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 4 }); -pub const X5: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 5 }); +pub const X4: A64Opnd = A64Opnd::Reg(X4_REG); +pub const X5: A64Opnd = A64Opnd::Reg(X5_REG); pub const X6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 6 }); pub const X7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 7 }); pub const X8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 8 }); -pub const X9: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 9 }); -pub const X10: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 10 }); -pub const X11: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 11 }); +pub const X9: A64Opnd = A64Opnd::Reg(X9_REG); +pub const X10: A64Opnd = A64Opnd::Reg(X10_REG); +pub const X11: A64Opnd = A64Opnd::Reg(X11_REG); pub const X12: A64Opnd = A64Opnd::Reg(X12_REG); pub const X13: A64Opnd = A64Opnd::Reg(X13_REG); pub const X14: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 14 }); @@ -116,13 +130,14 @@ pub const X20: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 20 }); pub const X21: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 21 }); pub const X22: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 22 }); pub const X23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 23 }); -pub const X24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 24 }); -pub const X25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 25 }); -pub const X26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 26 }); +pub const X24: A64Opnd = A64Opnd::Reg(X24_REG); +pub const X25: A64Opnd = A64Opnd::Reg(X25_REG); +pub const X26: A64Opnd = A64Opnd::Reg(X26_REG); pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 }); pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 }); pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 }); +pub const X31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 31 }); // 32-bit registers pub const W0: A64Reg = A64Reg { num_bits: 32, reg_no: 0 }; @@ -156,6 +171,7 @@ pub const W27: A64Reg = A64Reg { num_bits: 32, reg_no: 27 }; pub const W28: A64Reg = A64Reg { num_bits: 32, reg_no: 28 }; pub const W29: A64Reg = A64Reg { num_bits: 32, reg_no: 29 }; pub const W30: A64Reg = A64Reg { num_bits: 32, reg_no: 30 }; +pub const W31: A64Reg = A64Reg { num_bits: 32, reg_no: 31 }; // C argument registers pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3]; diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index b54fc362b4fac3..5723406aecf5a3 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -23,6 +23,14 @@ struct LabelRef { // Label which this refers to label_idx: usize, + + /// The number of bytes that this label reference takes up in the memory. + /// It's necessary to know this ahead of time so that when we come back to + /// patch it it takes the same amount of space. + num_bytes: usize, + + /// The object that knows how to encode the branch instruction. + encode: Box } /// Block of memory into which instructions can be assembled @@ -154,7 +162,7 @@ impl CodeBlock { self.get_ptr(self.write_pos) } - // Write a single byte at the current position + /// Write a single byte at the current position. pub fn write_byte(&mut self, byte: u8) { let write_ptr = self.get_write_ptr(); @@ -165,15 +173,15 @@ impl CodeBlock { } } - // Write multiple bytes starting from the current position - pub fn write_bytes(&mut self, bytes: &[u8]) { + /// Write multiple bytes starting from the current position. + fn write_bytes(&mut self, bytes: &[u8]) { for byte in bytes { self.write_byte(*byte); } } - // Write a signed integer over a given number of bits at the current position - pub fn write_int(&mut self, val: u64, num_bits: u32) { + /// Write an integer over the given number of bits at the current position. + fn write_int(&mut self, val: u64, num_bits: u32) { assert!(num_bits > 0); assert!(num_bits % 8 == 0); @@ -219,14 +227,14 @@ impl CodeBlock { } // Add a label reference at the current write position - pub fn label_ref(&mut self, label_idx: usize) { + pub fn label_ref(&mut self, label_idx: usize, num_bytes: usize, encode: E) where E: FnOnce(&mut CodeBlock, i64, i64) { assert!(label_idx < self.label_addrs.len()); // Keep track of the reference - self.label_refs.push(LabelRef { - pos: self.write_pos, - label_idx, - }); + self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode: Box::new(encode) }); + + // Move past however many bytes the instruction takes up + self.write_pos += num_bytes; } // Link internal label references @@ -242,11 +250,8 @@ impl CodeBlock { let label_addr = self.label_addrs[label_idx]; assert!(label_addr < self.mem_size); - // Compute the offset from the reference's end to the label - let offset = (label_addr as i64) - ((ref_pos + 4) as i64); - self.set_pos(ref_pos); - self.write_int(offset as u64, 32); + (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64); } self.write_pos = orig_pos; diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 9869b79e233849..a2a3b47f82799d 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -703,14 +703,10 @@ pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) { /// call - Call to label with 32-bit offset pub fn call_label(cb: &mut CodeBlock, label_idx: usize) { - // Write the opcode - cb.write_byte(0xE8); - - // Add a reference to the label - cb.label_ref(label_idx); - - // Relative 32-bit offset to be patched - cb.write_int(0, 32); + cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| { + cb.write_byte(0xE8); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); } /// call - Indirect call with an R/M operand @@ -801,55 +797,54 @@ pub fn int3(cb: &mut CodeBlock) { cb.write_byte(0xcc); } -// Encode a relative jump to a label (direct or conditional) +// Encode a conditional relative jump to a label // Note: this always encodes a 32-bit offset -fn write_jcc(cb: &mut CodeBlock, op0: u8, op1: u8, label_idx: usize) { - // Write the opcode - if op0 != 0xff { - cb.write_byte(op0); - } - - cb.write_byte(op1); - - // Add a reference to the label - cb.label_ref(label_idx); - - // Relative 32-bit offset to be patched - cb.write_int( 0, 32); +fn write_jcc(cb: &mut CodeBlock, op: u8, label_idx: usize) { + cb.label_ref(label_idx, 6, move |cb, src_addr, dst_addr| { + cb.write_byte(0x0F); + cb.write_byte(op); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); } /// jcc - relative jumps to a label -pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); } -pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); } -pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); } -pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); } -pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); } -pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); } -pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); } -pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); } -pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); } -pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); } -pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); } -pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); } -pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); } -pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); } -pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); } -pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); } -pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); } -pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); } -pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); } -pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); } -pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x81, label_idx); } -pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8b, label_idx); } -pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x89, label_idx); } -pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); } -pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x80, label_idx); } -pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); } -pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); } -pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8B, label_idx); } -pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x88, label_idx); } -pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); } -pub fn jmp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0xFF, 0xE9, label_idx); } +pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x87, label_idx); } +pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); } +pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); } +pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x86, label_idx); } +pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); } +pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x84, label_idx); } +pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8F, label_idx); } +pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8D, label_idx); } +pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8C, label_idx); } +pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8E, label_idx); } +pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x86, label_idx); } +pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); } +pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); } +pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x87, label_idx); } +pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); } +pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x85, label_idx); } +pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8E, label_idx); } +pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8C, label_idx); } +pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8D, label_idx); } +pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8F, label_idx); } +pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x81, label_idx); } +pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8b, label_idx); } +pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x89, label_idx); } +pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x85, label_idx); } +pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x80, label_idx); } +pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8A, label_idx); } +pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8A, label_idx); } +pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8B, label_idx); } +pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x88, label_idx); } +pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x84, label_idx); } + +pub fn jmp_label(cb: &mut CodeBlock, label_idx: usize) { + cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| { + cb.write_byte(0xE9); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); +} /// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional) fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) { diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 4e4c553c9d1ede..061d21d19bab60 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -7,26 +7,51 @@ use crate::asm::arm64::*; use crate::codegen::{JITState}; use crate::cruby::*; use crate::backend::ir::*; +use crate::virtualmem::CodePtr; // Use the arm64 register type for this platform pub type Reg = A64Reg; // Callee-saved registers -pub const _CFP: Opnd = Opnd::Reg(X9); -pub const _EC: Opnd = Opnd::Reg(X10); -pub const _SP: Opnd = Opnd::Reg(X11); +pub const _CFP: Opnd = Opnd::Reg(X24_REG); +pub const _EC: Opnd = Opnd::Reg(X25_REG); +pub const _SP: Opnd = Opnd::Reg(X26_REG); + +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(X0_REG), + Opnd::Reg(X1_REG), + Opnd::Reg(X2_REG), + Opnd::Reg(X3_REG), + Opnd::Reg(X4_REG), + Opnd::Reg(X5_REG) +]; // C return value register on this platform -pub const RET_REG: Reg = X0; +pub const C_RET_REG: Reg = X0_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); + +// These constants define the way we work with Arm64's stack pointer. The stack +// pointer always needs to be aligned to a 16-byte boundary. +pub const C_SP_REG: A64Opnd = X31; +pub const C_SP_STEP: A64Opnd = A64Opnd::UImm(16); /// Map Opnd to A64Opnd impl From for A64Opnd { fn from(opnd: Opnd) -> Self { match opnd { - Opnd::UImm(val) => uimm_opnd(val), - Opnd::Imm(val) => imm_opnd(val), + Opnd::UImm(value) => A64Opnd::new_uimm(value), + Opnd::Imm(value) => A64Opnd::new_imm(value), Opnd::Reg(reg) => A64Opnd::Reg(reg), - _ => panic!("unsupported arm64 operand type") + Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => { + A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp) + }, + Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => { + panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") + }, + Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), + Opnd::None => panic!("attempted to lower an Opnd::None"), + Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), } } } @@ -43,39 +68,368 @@ impl Assembler } /// Split platform-specific instructions + /// The transformations done here are meant to make our lives simpler in later + /// stages of the compilation pipeline. + /// Here we may want to make sure that all instructions (except load and store) + /// have no memory operands. fn arm64_split(mut self) -> Assembler { - // The transformations done here are meant to make our lives simpler in later - // stages of the compilation pipeline. - // Here we may want to make sure that all instructions (except load and store) - // have no memory operands. + self.forward_pass(|asm, index, op, opnds, target| { + match op { + Op::Add | Op::Sub => { + // Check if one of the operands is a register. If it is, + // then we'll make that the first operand. + match (opnds[0], opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + let opnd0 = asm.load(opnds[0]); + let opnd1 = asm.load(opnds[1]); + asm.push_insn(op, vec![opnd0, opnd1], target); + }, + (mem_opnd @ Opnd::Mem(_), other_opnd) | + (other_opnd, mem_opnd @ Opnd::Mem(_)) => { + let opnd0 = asm.load(mem_opnd); + asm.push_insn(op, vec![opnd0, other_opnd], target); + }, + _ => { + asm.push_insn(op, opnds, target); + } + } + }, + Op::IncrCounter => { + // Every operand to the IncrCounter instruction need to be a + // register once it gets there. So here we're going to load + // anything that isn't a register first. + let new_opnds: Vec = opnds.into_iter().map(|opnd| { + match opnd { + Opnd::Mem(_) | Opnd::Imm(_) | Opnd::UImm(_) => asm.load(opnd), + _ => opnd, + } + }).collect(); + + asm.incr_counter(new_opnds[0], new_opnds[1]); + }, + Op::Mov => { + // The value that is being moved must be either a register + // or an immediate that can be encoded as a bitmask + // immediate. Otherwise, we'll need to split the move into + // multiple instructions. + let value = match opnds[1] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], + Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnds[1]), + Opnd::UImm(uimm) => { + if let Ok(encoded) = BitmaskImmediate::try_from(uimm) { + opnds[1] + } else { + asm.load(opnds[1]) + } + }, + _ => unreachable!() + }; + + /// If we're attempting to load into a memory operand, then + /// we'll switch over to the store instruction. Otherwise + /// we'll use the normal mov instruction. + match opnds[0] { + Opnd::Mem(_) => asm.store(opnds[0], value), + _ => asm.mov(opnds[0], value) + }; + }, + Op::Not => { + // The value that is being negated must be in a register, so + // if we get anything else we need to load it first. + let opnd0 = match opnds[0] { + Opnd::Mem(_) => asm.load(opnds[0]), + _ => opnds[0] + }; + + asm.not(opnd0); + }, + Op::Store => { + // The value being stored must be in a register, so if it's + // not already one we'll load it first. + let opnd1 = match opnds[1] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], + _ => asm.load(opnds[1]) + }; - todo!(); + asm.store(opnds[0], opnd1); + }, + _ => { + asm.push_insn(op, opnds, target); + } + }; + }) } /// Emit platform-specific machine code /// Returns a list of GC offsets pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec { - // NOTE: dear Kevin, - // for arm, you may want to reserve 1 or 2 caller-save registers - // to use as scracth registers (during the last phase of the codegen) - // These registers will not be allocated to anything by the register - // allocator, they're just useful because arm is slightly trickier - // than x86 to generate code for. - // For example, if you want to jump far away, you may want to store - // the jump target address in a register first. - - todo!(); + /// Emit a conditional jump instruction to a specific target. This is + /// called when lowering any of the conditional jump instructions. + fn emit_conditional_jump(cb: &mut CodeBlock, condition: Condition, target: Target) { + match target { + Target::CodePtr(dst_ptr) => { + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = dst_ptr.into_i64(); + let offset = dst_addr - src_addr; + + // If the jump offset fits into the conditional jump as an + // immediate value and it's properly aligned, then we can + // use the b.cond instruction directly. Otherwise, we need + // to load the address into a register and use the branch + // register instruction. + if bcond_offset_fits_bits(offset) { + bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr)); + } else { + // If the condition is met, then we'll skip past the + // next instruction, put the address in a register, and + // jump to it. + bcond(cb, condition, A64Opnd::new_imm(4)); + + // If the offset fits into a direct jump, then we'll use + // that and the number of instructions will be shorter. + // Otherwise we'll use the branch register instruction. + if b_offset_fits_bits(offset) { + // If we get to this instruction, then the condition + // wasn't met, in which case we'll jump past the + // next instruction that performs the direct jump. + b(cb, A64Opnd::new_imm(4)); + + // Here we'll perform the direct jump to the target. + b(cb, A64Opnd::new_imm(offset / 4)); + } else { + // If we get to this instruction, then the condition + // wasn't met, in which case we'll jump past the + // next instruction that perform the direct jump. + b(cb, A64Opnd::new_imm(8)); + mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + br(cb, X29); + } + } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for ourselves and + // then come back and write the instruction once we know the + // offset. We're going to assume we can fit into a single + // b.cond instruction. It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr)); + }); + }, + Target::FunPtr(_) => unreachable!() + }; + } + + // dbg!(&self.insns); + + // List of GC offsets + let mut gc_offsets: Vec = Vec::new(); + + // For each instruction + for insn in &self.insns { + match insn.op { + Op::Comment => { + if cfg!(feature = "asm_comments") { + cb.add_comment(&insn.text.as_ref().unwrap()); + } + }, + Op::Label => { + cb.write_label(insn.target.unwrap().unwrap_label_idx()); + }, + Op::Add => { + add(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Sub => { + sub(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::And => { + and(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Not => { + mvn(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::Store => { + // This order may be surprising but it is correct. The way + // the Arm64 assembler works, the register that is going to + // be stored is first and the address is second. However in + // our IR we have the address first and the register second. + stur(cb, insn.opnds[1].into(), insn.opnds[0].into()); + }, + Op::Load => { + mov(cb, insn.out.into(), insn.opnds[0].into()); + + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value being + // loaded is a heap object, we'll report that back out to + // the gc_offsets list. + if let Opnd::Value(val) = insn.opnds[0] { + if !val.special_const_p() { + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + gc_offsets.push(ptr_offset); + } + } + }, + Op::Mov => { + mov(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Lea => { + ldur(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CPush => { + add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + mov(cb, A64Opnd::new_mem(64, C_SP_REG, 0), insn.opnds[0].into()); + }, + Op::CPop => { + mov(cb, insn.out.into(), A64Opnd::new_mem(64, C_SP_REG, 0)); + sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + }, + Op::CCall => { + // Temporary + assert!(insn.opnds.len() < C_ARG_REGS.len()); + + // For each operand + for (idx, opnd) in insn.opnds.iter().enumerate() { + mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); + } + + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64; + + // The offset between the two instructions in bytes. Note + // that when we encode this into a bl instruction, we'll + // divide by 4 because it accepts the number of instructions + // to jump over. + let offset = dst_addr - src_addr; + + // If the offset is short enough, then we'll use the branch + // link instruction. Otherwise, we'll move the destination + // and return address into appropriate registers and use the + // branch register instruction. + if b_offset_fits_bits(offset) { + bl(cb, A64Opnd::new_imm(offset / 4)); + } else { + mov(cb, X30, A64Opnd::new_uimm(src_addr as u64)); + mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + br(cb, X29); + } + }, + Op::CRet => { + // TODO: bias allocation towards return register + if insn.opnds[0] != Opnd::Reg(C_RET_REG) { + mov(cb, C_RET_OPND.into(), insn.opnds[0].into()); + } + + ret(cb, A64Opnd::None); + }, + Op::Cmp => { + cmp(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Test => { + tst(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::JmpOpnd => { + br(cb, insn.opnds[0].into()); + }, + Op::Jmp => { + match insn.target.unwrap() { + Target::CodePtr(dst_ptr) => { + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = dst_ptr.into_i64(); + + // The offset between the two instructions in bytes. + // Note that when we encode this into a b + // instruction, we'll divide by 4 because it accepts + // the number of instructions to jump over. + let offset = dst_addr - src_addr; + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + if b_offset_fits_bits(offset) { + b(cb, A64Opnd::new_imm(offset / 4)); + } else { + mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + br(cb, X29); + } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for + // ourselves and then come back and write the + // instruction once we know the offset. We're going + // to assume we can fit into a single b instruction. + // It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + b(cb, A64Opnd::new_imm((dst_addr - src_addr) / 4)); + }); + }, + _ => unreachable!() + }; + }, + Op::Je => { + emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap()); + }, + Op::Jbe => { + emit_conditional_jump(cb, Condition::LS, insn.target.unwrap()); + }, + Op::Jz => { + emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap()); + }, + Op::Jnz => { + emit_conditional_jump(cb, Condition::NE, insn.target.unwrap()); + }, + Op::Jo => { + emit_conditional_jump(cb, Condition::VS, insn.target.unwrap()); + }, + Op::IncrCounter => { + ldaddal(cb, insn.opnds[0].into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Breakpoint => { + brk(cb, A64Opnd::None); + } + }; + } + + gc_offsets } /// Optimize and compile the stored instructions - pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) + pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Vec { - self - .arm64_split() - .split_loads() - .alloc_regs(regs) - .arm64_emit(jit, cb) + let mut asm = self.arm64_split().split_loads().alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let gc_offsets = asm.arm64_emit(cb); + cb.link_labels(); + + gc_offsets + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn setup_asm() -> (Assembler, CodeBlock) { + (Assembler::new(), CodeBlock::new_dummy(1024)) + } + + #[test] + fn test_emit_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_regs(&mut cb, vec![X3_REG]); + + let insns = cb.get_ptr(0).raw_ptr() as *const u32; + assert_eq!(0x8b010003, unsafe { *insns }); } } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 4e8ed0b8a42d90..bdefe1c6bc15ef 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -276,6 +276,13 @@ pub enum Target impl Target { + pub fn unwrap_fun_ptr(&self) -> *const u8 { + match self { + Target::FunPtr(ptr) => *ptr, + _ => unreachable!("trying to unwrap {:?} into fun ptr", self) + } + } + pub fn unwrap_label_idx(&self) -> usize { match self { Target::Label(idx) => *idx, diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs index 0841c9ffa5859c..47946950946438 100644 --- a/yjit/src/backend/mod.rs +++ b/yjit/src/backend/mod.rs @@ -1,3 +1,8 @@ +#[cfg(target_arch = "x86_64")] pub mod x86_64; + +#[cfg(target_arch = "aarch64")] +pub mod arm64; + pub mod ir; -mod tests; \ No newline at end of file +mod tests; diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index f4e0d4f53aab16..19b5096a26f884 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -186,6 +186,9 @@ impl Assembler for (idx, opnd) in insn.opnds.iter().enumerate() { mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); } + + let ptr = insn.target.unwrap().unwrap_fun_ptr(); + call_ptr(cb, RAX, ptr); }, Op::CRet => { From aab53e2868f7b1a28915f181e0875b990c07b8c9 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 6 Jul 2022 15:09:08 -0400 Subject: [PATCH 357/546] Add test for direct jump to a code pointer --- yjit/src/backend/tests.rs | 12 ++++++++++++ yjit/src/codegen.rs | 7 +++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 6545d0151751f9..9ec986f73453d8 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -250,6 +250,18 @@ fn test_jcc_ptr() asm.compile_with_num_regs(&mut cb, 1); } +/// Direct jump to a stub e.g. for deferred compilation +#[test] +fn test_jmp_ptr() +{ + let (mut asm, mut cb) = setup_asm(); + + let stub = Target::CodePtr((5 as *mut u8).into()); + asm.jmp(stub); + + asm.compile_with_num_regs(&mut cb, 0); +} + #[test] fn test_jo() { diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index c99fef5b0c4af1..656fc63c0c3867 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1100,6 +1100,7 @@ fn gen_adjuststack( KeepCompiling } + /* fn gen_opt_plus( jit: &mut JITState, @@ -1134,7 +1135,7 @@ fn gen_opt_plus( // Add arg0 + arg1 and test for overflow let arg0_untag = asm.sub(arg0, Opnd::Imm(1)); let out_val = asm.add(arg0_untag, arg1); - asm.jo(side_exit); + asm.jo(side_exit.into()); // Push the output on the stack let dst = ctx.stack_push(Type::Fixnum); @@ -1142,12 +1143,14 @@ fn gen_opt_plus( KeepCompiling } else { - todo!(); + todo!("opt_plus send path"); //gen_opt_send_without_block(jit, ctx, cb, ocb) } } */ + + // new array initialized from top N values fn gen_newarray( jit: &mut JITState, From 65019ed60c635b34337ea35978e931d09ab0181b Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 7 Jul 2022 10:49:59 -0400 Subject: [PATCH 358/546] Get codegen for deferred compilation working --- yjit/src/codegen.rs | 42 ++++++++++++++++++++---------- yjit/src/core.rs | 63 +++++++++++++++++++++++++++++++-------------- 2 files changed, 72 insertions(+), 33 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 656fc63c0c3867..6c8fd950a66569 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -34,9 +34,10 @@ pub use crate::virtualmem::CodePtr; /// Status returned by code generation functions #[derive(PartialEq, Debug)] enum CodegenStatus { - EndBlock, KeepCompiling, CantCompile, + EndBlock, + DeferCompilation, } /// Code generation function signature @@ -734,6 +735,9 @@ pub fn gen_single_block( // Create a backend assembler instance let mut asm = Assembler::new(); + // Codegen status for the last instruction compiled + let mut status = CantCompile; + // For each instruction to compile // NOTE: could rewrite this loop with a std::iter::Iterator while insn_idx < iseq_size { @@ -759,16 +763,12 @@ pub fn gen_single_block( // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { - // FIXME: is this sound with the new assembler? // Generate an exit to this instruction and record it let exit_pos = gen_outlined_exit(jit.pc, &ctx, ocb); record_global_inval_patch(cb, exit_pos); jit.record_boundary_patch_point = false; - - - } // In debug mode, verify our existing assumption @@ -777,7 +777,7 @@ pub fn gen_single_block( } // Lookup the codegen function for this instruction - let mut status = CantCompile; + status = CantCompile; if let Some(gen_fn) = get_gen_fn(VALUE(opcode)) { // :count-placement: // Count bytecode instructions that execute in generated code. @@ -820,6 +820,11 @@ pub fn gen_single_block( break; } + // If we are deferring compilation for this instruction + if status == DeferCompilation { + break; + } + // For now, reset the chain depth after each instruction as only the // first instruction in the block can concern itself with the depth. ctx.reset_chain_depth(); @@ -850,10 +855,25 @@ pub fn gen_single_block( block.set_end_idx(insn_idx); } + // If we are deferring compilation for the current instruction + if status == DeferCompilation { + defer_compilation(&jit.block, insn_idx, &ctx, cb, ocb); + + // Mark the end position of the block + let mut block = jit.block.borrow_mut(); + block.set_end_addr(cb.get_write_ptr()); + } + + + // We currently can't handle cases where the request is for a block that // doesn't go to the next instruction. //assert!(!jit.record_boundary_patch_point); + + + + // If code for the block doesn't fit, fail if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() { return Err(()); @@ -1100,8 +1120,6 @@ fn gen_adjuststack( KeepCompiling } - -/* fn gen_opt_plus( jit: &mut JITState, ctx: &mut Context, @@ -1109,8 +1127,7 @@ fn gen_opt_plus( ocb: &mut OutlinedCb, ) -> CodegenStatus { if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); - return EndBlock; + return DeferCompilation; } let comptime_a = jit_peek_at_stack(jit, ctx, 1); @@ -1147,9 +1164,6 @@ fn gen_opt_plus( //gen_opt_send_without_block(jit, ctx, cb, ocb) } } -*/ - - // new array initialized from top N values fn gen_newarray( @@ -5969,7 +5983,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { //YARVINSN_setlocal => Some(gen_setlocal), //YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), //YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), - //YARVINSN_opt_plus => Some(gen_opt_plus), + YARVINSN_opt_plus => Some(gen_opt_plus), /* YARVINSN_opt_minus => Some(gen_opt_minus), YARVINSN_opt_and => Some(gen_opt_and), diff --git a/yjit/src/core.rs b/yjit/src/core.rs index a2659b55fdbb21..1b9026024854e8 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -255,7 +255,7 @@ pub enum InsnOpnd { /// Code generation context /// Contains information we can use to specialize/optimize code /// There are a lot of context objects so we try to keep the size small. -#[derive(Copy, Clone, Default, Debug)] +#[derive(Copy, Clone, Default, PartialEq, Debug)] pub struct Context { // Number of values currently on the temporary stack stack_size: u16, @@ -301,7 +301,7 @@ pub enum BranchShape { // Branch code generation function signature type BranchGenFn = - fn(cb: &mut CodeBlock, target0: CodePtr, target1: Option, shape: BranchShape) -> (); + fn(cb: &mut Assembler, target0: CodePtr, target1: Option, shape: BranchShape) -> (); /// Store info about an outgoing branch in a code segment /// Note: care must be taken to minimize the size of branch objects @@ -1511,12 +1511,18 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { // Rewrite the branch assert!(branch.dst_addrs[0].is_some()); cb.set_write_ptr(branch.start_addr.unwrap()); + + let mut asm = Assembler::new(); + (branch.gen_fn)( - cb, + &mut asm, branch.dst_addrs[0].unwrap(), branch.dst_addrs[1], branch.shape, ); + + asm.compile(cb); + branch.end_addr = Some(cb.get_write_ptr()); // The block may have shrunk after the branch is rewritten @@ -1542,7 +1548,7 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { } /// Create a new outgoing branch entry for a block -fn make_branch_entry(block: BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) -> BranchRef { +fn make_branch_entry(block: &BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) -> BranchRef { let branch = Branch { // Block this is attached to block: block.clone(), @@ -1591,6 +1597,10 @@ extern "sysv64" fn branch_stub_hit( /// Called by the generated code when a branch stub is executed /// Triggers compilation of branches and code patching fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -> *const u8 { + if get_option!(dump_insns) { + println!("branch_stub_hit"); + } + assert!(!branch_ptr.is_null()); //branch_ptr is actually: @@ -1770,13 +1780,13 @@ fn get_branch_target( let mut asm = Assembler::new(); - // Call branch_stub_hit(branch_idx, target_idx, ec) + // Call branch_stub_hit(branch_ptr, target_idx, ec) let jump_addr = asm.ccall( branch_stub_hit as *mut u8, vec![ - EC, + Opnd::const_ptr(branch_ptr as *const u8), Opnd::UImm(target_idx as u64), - Opnd::const_ptr(branch_ptr as *const u8) + EC, ] ); @@ -1804,7 +1814,7 @@ pub fn gen_branch( ctx1: Option<&Context>, gen_fn: BranchGenFn, ) { - let branchref = make_branch_entry(jit.get_block(), src_ctx, gen_fn); + let branchref = make_branch_entry(&jit.get_block(), src_ctx, gen_fn); // Get the branch targets or stubs let dst_addr0 = get_branch_target(target0, ctx0, &branchref, 0, ocb); @@ -1835,7 +1845,7 @@ pub fn gen_branch( } fn gen_jump_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, @@ -1845,13 +1855,12 @@ fn gen_jump_branch( } if shape == BranchShape::Default { - //jmp_ptr(cb, target0); - todo!("jmp_ptr with new assembler"); + asm.jmp(target0.into()); } } pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut CodeBlock) { - let branchref = make_branch_entry(jit.get_block(), ctx, gen_jump_branch); + let branchref = make_branch_entry(&jit.get_block(), ctx, gen_jump_branch); let mut branch = branchref.borrow_mut(); branch.targets[0] = Some(target0); @@ -1869,10 +1878,25 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut branch.blocks[0] = Some(blockref.clone()); branch.shape = BranchShape::Default; + + + todo!("convert gen_direct_jump to using new asm"); + + + // TODO: could we use regenerate_branch logic here? + + /* // Call the branch generation function branch.start_addr = Some(cb.get_write_ptr()); gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); branch.end_addr = Some(cb.get_write_ptr()); + */ + + + + + + } else { // This None target address signals gen_block_series() to compile the // target block right after this one (fallthrough). @@ -1885,7 +1909,8 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut /// Create a stub to force the code up to this point to be executed pub fn defer_compilation( - jit: &JITState, + block: &BlockRef, + insn_idx: u32, cur_ctx: &Context, cb: &mut CodeBlock, ocb: &mut OutlinedCb, @@ -1901,14 +1926,12 @@ pub fn defer_compilation( } next_ctx.chain_depth += 1; - let block_rc = jit.get_block(); - let branch_rc = make_branch_entry(jit.get_block(), cur_ctx, gen_jump_branch); + let branch_rc = make_branch_entry(block, cur_ctx, gen_jump_branch); let mut branch = branch_rc.borrow_mut(); - let block = block_rc.borrow(); let blockid = BlockId { - iseq: block.blockid.iseq, - idx: jit.get_insn_idx(), + iseq: block.borrow().blockid.iseq, + idx: insn_idx, }; branch.target_ctxs[0] = next_ctx; branch.targets[0] = Some(blockid); @@ -1916,7 +1939,9 @@ pub fn defer_compilation( // Call the branch generation function branch.start_addr = Some(cb.get_write_ptr()); - gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); + let mut asm = Assembler::new(); + gen_jump_branch(&mut asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); + asm.compile(cb); branch.end_addr = Some(cb.get_write_ptr()); } From 580f26959eb31d523ac21d640e21ddbe70779512 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 7 Jul 2022 12:35:18 -0400 Subject: [PATCH 359/546] Get started on branchunless port --- yjit/src/backend/ir.rs | 6 ++++++ yjit/src/codegen.rs | 29 +++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index bdefe1c6bc15ef..2d68936db1acff 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -251,6 +251,12 @@ impl From for Opnd { } } +impl From for Opnd { + fn from(value: i64) -> Self { + Opnd::Imm(value.try_into().unwrap()) + } +} + impl From for Opnd { fn from(value: i32) -> Self { Opnd::Imm(value.try_into().unwrap()) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 6c8fd950a66569..06c7756c7a5490 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3256,19 +3256,20 @@ fn gen_branchif( EndBlock } +*/ fn gen_branchunless_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, target1: Option, shape: BranchShape, ) { match shape { - BranchShape::Next0 => jnz_ptr(cb, target1.unwrap()), - BranchShape::Next1 => jz_ptr(cb, target0), + BranchShape::Next0 => asm.jnz(target1.unwrap().into()), + BranchShape::Next1 => asm.jz(target0.into()), BranchShape::Default => { - jz_ptr(cb, target0); - jmp_ptr(cb, target1.unwrap()); + asm.jz(target0.into()); + asm.jmp(target1.unwrap().into()); } } } @@ -3276,7 +3277,7 @@ fn gen_branchunless_branch( fn gen_branchunless( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0).as_i32(); @@ -3284,14 +3285,15 @@ fn gen_branchunless( // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); } // Test if any bit (outside of the Qnil bit) is on // RUBY_Qfalse /* ...0000 0000 */ // RUBY_Qnil /* ...0000 1000 */ let val_opnd = ctx.stack_pop(1); - test(cb, val_opnd, imm_opnd(!Qnil.as_i64())); + let not_qnil = !Qnil.as_i64(); + asm.test(val_opnd, not_qnil.into()); // Get the branch target instruction offsets let next_idx = jit_next_insn_idx(jit) as i32; @@ -3305,6 +3307,13 @@ fn gen_branchunless( idx: jump_idx.try_into().unwrap(), }; + + + + // TODO: port gen_branch logic + todo!("complete branchunless implementation"); + + /* // Generate the branch instructions gen_branch( jit, @@ -3319,8 +3328,12 @@ fn gen_branchunless( ); EndBlock + */ + + } +/* fn gen_branchnil_branch( cb: &mut CodeBlock, target0: CodePtr, From ab2fa6ebddc13679452ad9f0fc3a7b630a1cbe3a Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 7 Jul 2022 16:28:17 -0400 Subject: [PATCH 360/546] Add a backend test with a load of a GC'd VALUE --- yjit/src/backend/tests.rs | 15 +++++++++++++++ yjit/src/backend/x86_64/mod.rs | 1 + 2 files changed, 16 insertions(+) diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 9ec986f73453d8..d72f0ec0ac4698 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -138,6 +138,21 @@ fn test_load_reg() asm.compile_with_num_regs(&mut cb, 1); } +// Test load of a GC'd value +#[test] +fn test_load_value() +{ + let (mut asm, mut cb) = setup_asm(); + + let gcd_value = VALUE(0xFFFFFFFFFFFF00); + assert!(!gcd_value.special_const_p()); + + let out = asm.load(Opnd::Value(gcd_value)); + asm.mov(Opnd::mem(64, SP, 0), out); + + asm.compile_with_num_regs(&mut cb, 1); +} + // Multiple registers needed and register reuse #[test] fn test_reuse_reg() diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 19b5096a26f884..7a26650549bd18 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -44,6 +44,7 @@ impl From for X86Opnd { Opnd::UImm(val) => uimm_opnd(val), Opnd::Imm(val) => imm_opnd(val), + Opnd::Value(VALUE(uimm)) => uimm_opnd(uimm as u64), // General-purpose register Opnd::Reg(reg) => X86Opnd::Reg(reg), From 0551115912fd6682187dd501275096fdb7570084 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 7 Jul 2022 16:39:39 -0400 Subject: [PATCH 361/546] Add #[must_use] annotations to asm instructions --- yjit/src/backend/ir.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 2d68936db1acff..c9e75df01aedde 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -804,6 +804,7 @@ macro_rules! def_push_1_opnd { ($op_name:ident, $opcode:expr) => { impl Assembler { + #[must_use] pub fn $op_name(&mut self, opnd0: Opnd) -> Opnd { self.push_insn($opcode, vec![opnd0], None) @@ -828,6 +829,7 @@ macro_rules! def_push_2_opnd { ($op_name:ident, $opcode:expr) => { impl Assembler { + #[must_use] pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd { self.push_insn($opcode, vec![opnd0, opnd1], None) From 6773832ab9cad3c7bcb3b93ef85a4bcfc9b3a4e3 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 8 Jul 2022 13:01:21 -0400 Subject: [PATCH 362/546] More Arm64 lowering/backend work (https://github.com/Shopify/ruby/pull/307) * More Arm64 lowering/backend work * We now have encoding support for the LDR instruction for loading a PC-relative memory location * You can now call add/adds/sub/subs with signed immediates, which switches appropriately based on sign * We can now load immediates into registers appropriately, attempting to keep the minimal number of instructions: * If it fits into 16 bytes, we use just a single movz. * Else if it can be encoded into a bitmask immediate, we use a single mov. * Otherwise we use a movz, a movk, and then optionally another one or two movks. * Fixed a bunch of code to do with the Op::Load opcode. * We now handle GC-offsets properly for Op::Load by skipping around them with a jump instruction. (This will be made better by constant pools in the future.) * Op::Lea is doing what it's supposed to do now. * Fixed a bug in the backend tests to do with not using the result of an Op::Add. * Fix the remaining tests for Arm64 * Move split loads logic into each backend --- yjit/src/asm/arm64/inst/load_literal.rs | 89 ++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/mod.rs | 131 +++++++++++++++--- yjit/src/asm/arm64/opnd.rs | 1 + yjit/src/asm/mod.rs | 2 +- yjit/src/backend/arm64/mod.rs | 172 +++++++++++++++++++----- yjit/src/backend/ir.rs | 65 --------- yjit/src/backend/tests.rs | 27 +--- yjit/src/backend/x86_64/mod.rs | 105 +++++++++++---- 9 files changed, 431 insertions(+), 163 deletions(-) create mode 100644 yjit/src/asm/arm64/inst/load_literal.rs diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs new file mode 100644 index 00000000000000..a49130c3eb0562 --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_literal.rs @@ -0,0 +1,89 @@ +/// The size of the operands being operated on. +enum Opc { + Size32 = 0b00, + Size64 = 0b01, +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From for Opc { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Opc::Size64, + 32 => Opc::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 load literal instruction that can be encoded. +/// +/// LDR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 0 0 0 | +/// | opc.. imm19........................................................... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadLiteral { + /// The number of the register to load the value into. + rt: u8, + + /// The PC-relative number of instructions to load the value from. + imm19: i32, + + /// The size of the operands being operated on. + opc: Opc +} + +impl LoadLiteral { + /// LDR (load literal) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en + pub fn ldr(rt: u8, imm19: i32, num_bits: u8) -> Self { + Self { rt, imm19, opc: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadLiteral) -> Self { + let imm19 = (inst.imm19 as u32) & ((1 << 19) - 1); + + 0 + | ((inst.opc as u32) << 30) + | (1 << 28) + | (FAMILY << 25) + | (imm19 << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadLiteral) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr_positive() { + let inst = LoadLiteral::ldr(0, 5, 64); + let result: u32 = inst.into(); + assert_eq!(0x580000a0, result); + } + + #[test] + fn test_ldr_negative() { + let inst = LoadLiteral::ldr(0, -5, 64); + let result: u32 = inst.into(); + assert_eq!(0x58ffff60, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index ae589ca564e83f..f402f6765a92b4 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -9,6 +9,7 @@ mod call; mod data_imm; mod data_reg; mod load; +mod load_literal; mod logical_imm; mod logical_reg; mod mov; @@ -24,6 +25,7 @@ pub use call::Call; pub use data_imm::DataImm; pub use data_reg::DataReg; pub use load::Load; +pub use load_literal::LoadLiteral; pub use logical_imm::LogicalImm; pub use logical_reg::LogicalReg; pub use mov::Mov; diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index ced8b262c5b779..2dc5aa938817ad 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -39,11 +39,21 @@ pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { DataReg::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + DataImm::add(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + if imm12 < 0 { + DataImm::sub(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + } else { + DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + } }, _ => panic!("Invalid operand combination to add instruction."), }; @@ -68,6 +78,16 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + if imm12 < 0 { + DataImm::subs(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + } else { + DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + } + }, _ => panic!("Invalid operand combination to adds instruction."), }; @@ -237,6 +257,18 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// LDR - load a PC-relative memory address into a register +pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + LoadLiteral::ldr(rt.reg_no, rn, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + /// LDUR - load a memory address into a register pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { @@ -415,11 +447,21 @@ pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { DataReg::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + DataImm::sub(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + + if imm12 < 0 { + DataImm::add(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + } else { + DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + } }, _ => panic!("Invalid operand combination to sub instruction."), }; @@ -438,11 +480,21 @@ pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { DataReg::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() }, - (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); + assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less."); + + DataImm::subs(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + if imm12 < 0 { + DataImm::adds(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + } else { + DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + } }, _ => panic!("Invalid operand combination to subs instruction."), }; @@ -513,25 +565,45 @@ mod tests { } #[test] - fn test_add_register() { + fn test_add_reg() { check_bytes("2000028b", |cb| add(cb, X0, X1, X2)); } #[test] - fn test_add_immediate() { + fn test_add_uimm() { check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_uimm(7))); } #[test] - fn test_adds_register() { + fn test_add_imm_positive() { + check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_add_imm_negative() { + check_bytes("201c00d1", |cb| add(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_adds_reg() { check_bytes("200002ab", |cb| adds(cb, X0, X1, X2)); } #[test] - fn test_adds_immediate() { + fn test_adds_uimm() { check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7))); } + #[test] + fn test_adds_imm_positive() { + check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_adds_imm_negatve() { + check_bytes("201c00f1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(-7))); + } + #[test] fn test_and_register() { check_bytes("2000028a", |cb| and(cb, X0, X1, X2)); @@ -597,6 +669,11 @@ mod tests { check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); } + #[test] + fn test_ldr() { + check_bytes("40010058", |cb| ldr(cb, X0, 10)); + } + #[test] fn test_ldur_memory() { check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123))); @@ -678,22 +755,42 @@ mod tests { } #[test] - fn test_sub_register() { + fn test_sub_reg() { check_bytes("200002cb", |cb| sub(cb, X0, X1, X2)); } #[test] - fn test_sub_immediate() { + fn test_sub_uimm() { check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_uimm(7))); } #[test] - fn test_subs_register() { + fn test_sub_imm_positive() { + check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_sub_imm_negative() { + check_bytes("201c0091", |cb| sub(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_subs_reg() { check_bytes("200002eb", |cb| subs(cb, X0, X1, X2)); } #[test] - fn test_subs_immediate() { + fn test_subs_imm_positive() { + check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_subs_imm_negative() { + check_bytes("201c00b1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_subs_uimm() { check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7))); } diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index 6c06d2db3c2254..1738f0985c3811 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -175,3 +175,4 @@ pub const W31: A64Reg = A64Reg { num_bits: 32, reg_no: 31 }; // C argument registers pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3]; +pub const C_ARG_REGREGS: [A64Reg; 4] = [X0_REG, X1_REG, X2_REG, X3_REG]; diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 5723406aecf5a3..126c9a8548bd6d 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -174,7 +174,7 @@ impl CodeBlock { } /// Write multiple bytes starting from the current position. - fn write_bytes(&mut self, bytes: &[u8]) { + pub fn write_bytes(&mut self, bytes: &[u8]) { for byte in bytes { self.write_byte(*byte); } diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 061d21d19bab60..7e6a187f8face3 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -61,10 +61,7 @@ impl Assembler /// Get the list of registers from which we can allocate on this platform pub fn get_alloc_regs() -> Vec { - vec![ - X12_REG, - X13_REG - ] + vec![C_RET_REG, X12_REG] } /// Split platform-specific instructions @@ -75,8 +72,21 @@ impl Assembler fn arm64_split(mut self) -> Assembler { self.forward_pass(|asm, index, op, opnds, target| { + // Load all Value operands into registers that aren't already a part + // of Load instructions. + let opnds = match op { + Op::Load => opnds, + _ => opnds.into_iter().map(|opnd| { + if let Opnd::Value(_) = opnd { + asm.load(opnd) + } else { + opnd + } + }).collect() + }; + match op { - Op::Add | Op::Sub => { + Op::Add | Op::And | Op::Sub => { // Check if one of the operands is a register. If it is, // then we'll make that the first operand. match (opnds[0], opnds[1]) { @@ -95,6 +105,28 @@ impl Assembler } } }, + Op::CCall => { + assert!(opnds.len() < C_ARG_REGS.len()); + + // For each of the operands we're going to first load them + // into a register and then move them into the correct + // argument register. + for (idx, opnd) in opnds.into_iter().enumerate() { + let value = asm.load(opnd); + asm.mov(Opnd::Reg(C_ARG_REGREGS[idx]), value); + } + + // Now we push the CCall without any arguments so that it + // just performs the call. + asm.ccall(target.unwrap().unwrap_fun_ptr(), vec![]); + }, + Op::CRet => { + if opnds[0] != Opnd::Reg(C_RET_REG) { + let value = asm.load(opnds[0]); + asm.mov(C_RET_OPND, value); + } + asm.cret(C_RET_OPND); + }, Op::IncrCounter => { // Every operand to the IncrCounter instruction need to be a // register once it gets there. So here we're going to load @@ -154,6 +186,16 @@ impl Assembler asm.store(opnds[0], opnd1); }, + Op::Test => { + // The value being tested must be in a register, so if it's + // not already one we'll load it first. + let opnd0 = match opnds[0] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], + _ => asm.load(opnds[0]) + }; + + asm.test(opnd0, opnds[1]); + }, _ => { asm.push_insn(op, opnds, target); } @@ -165,6 +207,45 @@ impl Assembler /// Returns a list of GC offsets pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec { + /// Emit the required instructions to load the given value into the + /// given register. Our goal here is to use as few instructions as + /// possible to get this value into the register. + fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) { + let mut current = value; + + if current <= 0xffff { + // If the value fits into a single movz + // instruction, then we'll use that. + movz(cb, rd, A64Opnd::new_uimm(current), 0); + } else if BitmaskImmediate::try_from(current).is_ok() { + // Otherwise, if the immediate can be encoded + // with the special bitmask immediate encoding, + // we'll use that. + mov(cb, rd, A64Opnd::new_uimm(current)); + } else { + // Finally we'll fall back to encoding the value + // using movz for the first 16 bits and movk for + // each subsequent set of 16 bits as long we + // they are necessary. + movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); + + // (We're sure this is necessary since we + // checked if it only fit into movz above). + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); + } + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); + } + } + } + /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. fn emit_conditional_jump(cb: &mut CodeBlock, condition: Condition, target: Target) { @@ -203,7 +284,7 @@ impl Assembler // wasn't met, in which case we'll jump past the // next instruction that perform the direct jump. b(cb, A64Opnd::new_imm(8)); - mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + emit_load_value(cb, X29, dst_addr as u64); br(cb, X29); } } @@ -257,25 +338,57 @@ impl Assembler stur(cb, insn.opnds[1].into(), insn.opnds[0].into()); }, Op::Load => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - - // This assumes only load instructions can contain - // references to GC'd Value operands. If the value being - // loaded is a heap object, we'll report that back out to - // the gc_offsets list. - if let Opnd::Value(val) = insn.opnds[0] { - if !val.special_const_p() { - // The pointer immediate is encoded as the last part of the mov written out - let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - gc_offsets.push(ptr_offset); + match insn.opnds[0] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => { + mov(cb, insn.out.into(), insn.opnds[0].into()); + }, + Opnd::UImm(uimm) => { + emit_load_value(cb, insn.out.into(), uimm); + }, + Opnd::Imm(imm) => { + emit_load_value(cb, insn.out.into(), imm as u64); + }, + Opnd::Mem(_) => { + ldur(cb, insn.out.into(), insn.opnds[0].into()); + }, + Opnd::Value(value) => { + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value + // being loaded is a heap object, we'll report that + // back out to the gc_offsets list. + ldr(cb, insn.out.into(), 1); + b(cb, A64Opnd::new_uimm((SIZEOF_VALUE as u64) / 4)); + cb.write_bytes(&value.as_u64().to_le_bytes()); + + if !value.special_const_p() { + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + gc_offsets.push(ptr_offset); + } + }, + Opnd::None => { + unreachable!("Attempted to load from None operand"); } - } + }; }, Op::Mov => { mov(cb, insn.opnds[0].into(), insn.opnds[1].into()); }, Op::Lea => { - ldur(cb, insn.out.into(), insn.opnds[0].into()); + let opnd: A64Opnd = insn.opnds[0].into(); + + match opnd { + A64Opnd::Mem(mem) => { + add( + cb, + insn.out.into(), + A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }), + A64Opnd::new_imm(mem.disp.into()) + ); + }, + _ => { + panic!("Op::Lea only accepts Opnd::Mem operands."); + } + }; }, Op::CPush => { add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); @@ -286,14 +399,6 @@ impl Assembler sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); }, Op::CCall => { - // Temporary - assert!(insn.opnds.len() < C_ARG_REGS.len()); - - // For each operand - for (idx, opnd) in insn.opnds.iter().enumerate() { - mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); - } - let src_addr = cb.get_write_ptr().into_i64() + 4; let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64; @@ -310,17 +415,12 @@ impl Assembler if b_offset_fits_bits(offset) { bl(cb, A64Opnd::new_imm(offset / 4)); } else { - mov(cb, X30, A64Opnd::new_uimm(src_addr as u64)); - mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + emit_load_value(cb, X30, src_addr as u64); + emit_load_value(cb, X29, dst_addr as u64); br(cb, X29); } }, Op::CRet => { - // TODO: bias allocation towards return register - if insn.opnds[0] != Opnd::Reg(C_RET_REG) { - mov(cb, C_RET_OPND.into(), insn.opnds[0].into()); - } - ret(cb, A64Opnd::None); }, Op::Cmp => { @@ -351,7 +451,7 @@ impl Assembler if b_offset_fits_bits(offset) { b(cb, A64Opnd::new_imm(offset / 4)); } else { - mov(cb, X29, A64Opnd::new_uimm(dst_addr as u64)); + emit_load_value(cb, X29, dst_addr as u64); br(cb, X29); } }, @@ -398,7 +498,7 @@ impl Assembler /// Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Vec { - let mut asm = self.arm64_split().split_loads().alloc_regs(regs); + let mut asm = self.arm64_split().alloc_regs(regs); // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index c9e75df01aedde..cd88ec560b4bda 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -534,71 +534,6 @@ impl Assembler asm } - /// Transforms the instructions by splitting instructions that cannot be - /// represented in the final architecture into multiple instructions that - /// can. - pub(super) fn split_loads(self) -> Assembler - { - // Load operands that are GC values into a register - fn load_gc_opnds(op: Op, opnds: Vec, asm: &mut Assembler) -> Vec - { - if op == Op::Load || op == Op::Mov { - return opnds; - } - - fn map_opnd(opnd: Opnd, asm: &mut Assembler) -> Opnd { - if let Opnd::Value(val) = opnd { - // If this is a heap object, load it into a register - if !val.special_const_p() { - asm.load(opnd); - } - } - - opnd - } - - opnds.into_iter().map(|opnd| map_opnd(opnd, asm)).collect() - } - - self.forward_pass(|asm, _, op, opnds, target| { - // Load heap object operands into registers because most - // instructions can't directly work with 64-bit constants - let opnds = load_gc_opnds(op, opnds, asm); - - match op { - // Check for Add, Sub, And, Mov, with two memory operands. - // Load one operand into memory. - Op::Add | Op::Sub | Op::And | Op::Mov => { - match opnds.as_slice() { - [Opnd::Mem(_), Opnd::Mem(_)] => { - // We load opnd1 because for mov, opnd0 is the output - let opnd1 = asm.load(opnds[1]); - asm.push_insn(op, vec![opnds[0], opnd1], None); - }, - - [Opnd::Mem(_), Opnd::UImm(val)] => { - if uimm_num_bits(*val) > 32 { - let opnd1 = asm.load(opnds[1]); - asm.push_insn(op, vec![opnds[0], opnd1], None); - } - else - { - asm.push_insn(op, opnds, target); - } - }, - - _ => { - asm.push_insn(op, opnds, target); - } - } - }, - _ => { - asm.push_insn(op, opnds, target); - } - }; - }) - } - /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index d72f0ec0ac4698..27f799fc316532 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -43,22 +43,6 @@ fn test_add() { asm.add(out, Opnd::UImm(2)); } -#[test] -fn test_split_loads() { - let mut asm = Assembler::new(); - - let regs = Assembler::get_alloc_regs(); - - asm.add( - Opnd::mem(64, Opnd::Reg(regs[0]), 0), - Opnd::mem(64, Opnd::Reg(regs[1]), 0) - ); - - let result = asm.split_loads(); - assert_eq!(result.insns.len(), 2); - assert_eq!(result.insns[0].op, Op::Load); -} - #[test] fn test_alloc_regs() { let mut asm = Assembler::new(); @@ -109,7 +93,8 @@ fn test_compile() let regs = Assembler::get_alloc_regs(); let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); - asm.add(out, Opnd::UImm(2)); + let out2 = asm.add(out, Opnd::UImm(2)); + asm.store(Opnd::mem(64, SP, 0), out2); asm.compile_with_num_regs(&mut cb, 1); } @@ -162,7 +147,7 @@ fn test_reuse_reg() let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); - let v2 = asm.add(v0, Opnd::UImm(1)); // Reuse v1 register + let v2 = asm.add(v1, Opnd::UImm(1)); // Reuse v1 register let v3 = asm.add(v0, v2); asm.store(Opnd::mem(64, SP, 0), v2); @@ -202,7 +187,7 @@ fn test_base_insn_out() // Increment and store the updated value asm.incr_counter(counter_opnd, 1.into()); - asm.compile_with_num_regs(&mut cb, 1); + asm.compile_with_num_regs(&mut cb, 2); } #[test] @@ -262,7 +247,7 @@ fn test_jcc_ptr() ); asm.jnz(side_exit); - asm.compile_with_num_regs(&mut cb, 1); + asm.compile_with_num_regs(&mut cb, 2); } /// Direct jump to a stub e.g. for deferred compilation @@ -293,5 +278,5 @@ fn test_jo() asm.mov(Opnd::mem(64, SP, 0), out_val); - asm.compile_with_num_regs(&mut cb, 1); + asm.compile_with_num_regs(&mut cb, 2); } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 7a26650549bd18..4fd30e7144d6f0 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -2,7 +2,7 @@ #![allow(unused_variables)] #![allow(unused_imports)] -use crate::asm::{CodeBlock}; +use crate::asm::{uimm_num_bits, CodeBlock}; use crate::asm::x86_64::*; use crate::codegen::{JITState}; use crate::cruby::*; @@ -82,36 +82,97 @@ impl Assembler let live_ranges: Vec = std::mem::take(&mut self.live_ranges); self.forward_pass(|asm, index, op, opnds, target| { + // Load heap object operands into registers because most + // instructions can't directly work with 64-bit constants + let opnds = match op { + Op::Load | Op::Mov => opnds, + _ => opnds.into_iter().map(|opnd| { + if let Opnd::Value(value) = opnd { + if !value.special_const_p() { + asm.load(opnd) + } else { + opnd + } + } else { + opnd + } + }).collect() + }; + match op { - Op::Add | Op::Sub | Op::And | Op::Not => { - match opnds[0] { + Op::Add | Op::Sub | Op::And => { + let (opnd0, opnd1) = match (opnds[0], opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + (asm.load(opnds[0]), asm.load(opnds[1])) + }, + (Opnd::Mem(_), Opnd::UImm(value)) => { + if uimm_num_bits(value) > 32 { + (asm.load(opnds[0]), asm.load(opnds[1])) + } else { + (asm.load(opnds[0]), opnds[1]) + } + }, // Instruction output whose live range spans beyond this instruction - Opnd::InsnOut{idx, ..} => { + (Opnd::InsnOut { idx, .. }, _) => { if live_ranges[idx] > index { - let opnd0 = asm.load(opnds[0]); - let mut new_opnds = vec![opnd0]; - new_opnds.extend_from_slice(&opnds[1..]); - asm.push_insn(op, new_opnds, None); - return; + (asm.load(opnds[0]), opnds[1]) + } else { + (opnds[0], opnds[1]) } }, - // We have to load memory and register operands to avoid corrupting them - Opnd::Mem(_) | Opnd::Reg(_) => { - let opnd0 = asm.load(opnds[0]); - let mut new_opnds = vec![opnd0]; - new_opnds.extend_from_slice(&opnds[1..]); - asm.push_insn(op, new_opnds, None); - return; + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + (asm.load(opnds[0]), opnds[1]) }, + _ => (opnds[0], opnds[1]) + }; - _ => {} + asm.push_insn(op, vec![opnd0, opnd1], target); + }, + Op::Mov => { + match (opnds[0], opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + // We load opnd1 because for mov, opnd0 is the output + let opnd1 = asm.load(opnds[1]); + asm.mov(opnds[0], opnd1); + }, + (Opnd::Mem(_), Opnd::UImm(value)) => { + if uimm_num_bits(value) > 32 { + let opnd1 = asm.load(opnds[1]); + asm.mov(opnds[0], opnd1); + } else { + asm.mov(opnds[0], opnds[1]); + } + }, + _ => { + asm.mov(opnds[0], opnds[1]); + } } }, - _ => {} + Op::Not => { + let opnd0 = match opnds[0] { + // If we have an instruction output whose live range + // spans beyond this instruction, we have to load it. + Opnd::InsnOut { idx, .. } => { + if live_ranges[idx] > index { + asm.load(opnds[0]) + } else { + opnds[0] + } + }, + // We have to load memory and register operands to avoid + // corrupting them. + Opnd::Mem(_) | Opnd::Reg(_) => asm.load(opnds[0]), + // Otherwise we can just reuse the existing operand. + _ => opnds[0] + }; + + asm.not(opnd0); + }, + _ => { + asm.push_insn(op, opnds, target); + } }; - - asm.push_insn(op, opnds, target); }) } @@ -270,9 +331,7 @@ impl Assembler /// Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Vec { - let mut asm = self.x86_split(); - let mut asm = asm.split_loads(); - let mut asm = asm.alloc_regs(regs); + let mut asm = self.x86_split().alloc_regs(regs); // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { From e1f3f038e93d5b36ed6e6a15feac478bf3cfe1fa Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 8 Jul 2022 16:35:51 -0400 Subject: [PATCH 363/546] Fix jumps (https://github.com/Shopify/ruby/pull/309) * Jumps for A64 should be in # of instructions * More splitting for Arm64 https://github.com/Shopify/ruby/pull/309 --- yjit/src/backend/arm64/mod.rs | 56 +++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 7e6a187f8face3..0fee18c068fe96 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -140,6 +140,14 @@ impl Assembler asm.incr_counter(new_opnds[0], new_opnds[1]); }, + Op::JmpOpnd => { + if let Opnd::Mem(_) = opnds[0] { + let opnd0 = asm.load(opnds[0]); + asm.jmp_opnd(opnd0); + } else { + asm.jmp_opnd(opnds[0]); + } + }, Op::Mov => { // The value that is being moved must be either a register // or an immediate that can be encoded as a bitmask @@ -150,7 +158,15 @@ impl Assembler Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnds[1]), Opnd::UImm(uimm) => { if let Ok(encoded) = BitmaskImmediate::try_from(uimm) { - opnds[1] + if let Opnd::Mem(_) = opnds[0] { + // If the first operand is a memory operand, + // we're going to transform this into a + // store instruction, so we'll need to load + // this anyway. + asm.load(opnds[1]) + } else { + opnds[1] + } } else { asm.load(opnds[1]) } @@ -158,9 +174,9 @@ impl Assembler _ => unreachable!() }; - /// If we're attempting to load into a memory operand, then - /// we'll switch over to the store instruction. Otherwise - /// we'll use the normal mov instruction. + // If we're attempting to load into a memory operand, then + // we'll switch over to the store instruction. Otherwise + // we'll use the normal mov instruction. match opnds[0] { Opnd::Mem(_) => asm.store(opnds[0], value), _ => asm.mov(opnds[0], value) @@ -207,6 +223,26 @@ impl Assembler /// Returns a list of GC offsets pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec { + /// Determine how many instructions it will take to represent moving + /// this value into a register. Note that the return value of this + /// function must correspond to how many instructions are used to + /// represent this load in the emit_load_value function. + fn emit_load_size(value: u64) -> u8 { + if BitmaskImmediate::try_from(value).is_ok() { + return 1; + } + + if value < (1 << 16) { + 1 + } else if value < (1 << 32) { + 2 + } else if value < (1 << 48) { + 3 + } else { + 4 + } + } + /// Emit the required instructions to load the given value into the /// given register. Our goal here is to use as few instructions as /// possible to get this value into the register. @@ -275,7 +311,7 @@ impl Assembler // If we get to this instruction, then the condition // wasn't met, in which case we'll jump past the // next instruction that performs the direct jump. - b(cb, A64Opnd::new_imm(4)); + b(cb, A64Opnd::new_imm(1)); // Here we'll perform the direct jump to the target. b(cb, A64Opnd::new_imm(offset / 4)); @@ -283,8 +319,10 @@ impl Assembler // If we get to this instruction, then the condition // wasn't met, in which case we'll jump past the // next instruction that perform the direct jump. - b(cb, A64Opnd::new_imm(8)); - emit_load_value(cb, X29, dst_addr as u64); + let value = dst_addr as u64; + + b(cb, A64Opnd::new_imm(emit_load_size(value).into())); + emit_load_value(cb, X29, value); br(cb, X29); } } @@ -392,10 +430,10 @@ impl Assembler }, Op::CPush => { add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); - mov(cb, A64Opnd::new_mem(64, C_SP_REG, 0), insn.opnds[0].into()); + stur(cb, insn.opnds[0].into(), A64Opnd::new_mem(64, C_SP_REG, 0)); }, Op::CPop => { - mov(cb, insn.out.into(), A64Opnd::new_mem(64, C_SP_REG, 0)); + ldur(cb, insn.opnds[0].into(), A64Opnd::new_mem(64, C_SP_REG, 0)); sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); }, Op::CCall => { From 8864691bde2560ef440c4a8dac16b2c661faa228 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 11 Jul 2022 17:51:58 -0400 Subject: [PATCH 364/546] Better label refs (https://github.com/Shopify/ruby/pull/310) Previously we were using a `Box` to support patching the code when jumping to labels. We needed to do this because some of the closures that were being used to patch needed to capture local variables (on both X86 and ARM it was the type of condition for the conditional jumps). To get around that, we can instead use const generics since the condition codes are always known at compile-time. This means that the closures go from polymorphic to monomorphic, which means they can be represented as an `fn` instead of a `Box`, which means they can fall back to a plain function pointer. This simplifies the storage of the `LabelRef` structs and should hopefully be a better default going forward. --- yjit/src/asm/arm64/arg/condition.rs | 34 ++++++------- yjit/src/asm/arm64/inst/branch_cond.rs | 4 +- yjit/src/asm/arm64/mod.rs | 2 +- yjit/src/asm/mod.rs | 6 +-- yjit/src/asm/x86_64/mod.rs | 66 +++++++++++++------------- yjit/src/backend/arm64/mod.rs | 20 ++++---- 6 files changed, 67 insertions(+), 65 deletions(-) diff --git a/yjit/src/asm/arm64/arg/condition.rs b/yjit/src/asm/arm64/arg/condition.rs index db269726d77abc..e791e4b0783c44 100644 --- a/yjit/src/asm/arm64/arg/condition.rs +++ b/yjit/src/asm/arm64/arg/condition.rs @@ -1,20 +1,22 @@ /// Various instructions in A64 can have condition codes attached. This enum /// includes all of the various kinds of conditions along with their respective /// encodings. -pub enum Condition { - EQ = 0b0000, // equal to - NE = 0b0001, // not equal to - CS = 0b0010, // carry set (alias for HS) - CC = 0b0011, // carry clear (alias for LO) - MI = 0b0100, // minus, negative - PL = 0b0101, // positive or zero - VS = 0b0110, // signed overflow - VC = 0b0111, // no signed overflow - HI = 0b1000, // greater than (unsigned) - LS = 0b1001, // less than or equal to (unsigned) - GE = 0b1010, // greater than or equal to (signed) - LT = 0b1011, // less than (signed) - GT = 0b1100, // greater than (signed) - LE = 0b1101, // less than or equal to (signed) - AL = 0b1110, // always +pub struct Condition; + +impl Condition { + pub const EQ: u8 = 0b0000; // equal to + pub const NE: u8 = 0b0001; // not equal to + pub const CS: u8 = 0b0010; // carry set (alias for HS) + pub const CC: u8 = 0b0011; // carry clear (alias for LO) + pub const MI: u8 = 0b0100; // minus, negative + pub const PL: u8 = 0b0101; // positive or zero + pub const VS: u8 = 0b0110; // signed overflow + pub const VC: u8 = 0b0111; // no signed overflow + pub const HI: u8 = 0b1000; // greater than (unsigned) + pub const LS: u8 = 0b1001; // less than or equal to (unsigned) + pub const GE: u8 = 0b1010; // greater than or equal to (signed) + pub const LT: u8 = 0b1011; // less than (signed) + pub const GT: u8 = 0b1100; // greater than (signed) + pub const LE: u8 = 0b1101; // less than or equal to (signed) + pub const AL: u8 = 0b1110; // always } diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs index 21fdda5d3f6e07..33cc9c364918e7 100644 --- a/yjit/src/asm/arm64/inst/branch_cond.rs +++ b/yjit/src/asm/arm64/inst/branch_cond.rs @@ -11,7 +11,7 @@ use super::super::arg::Condition; /// pub struct BranchCond { /// The kind of condition to check before branching. - cond: Condition, + cond: u8, /// The instruction offset from this instruction to branch to. imm19: i32 @@ -20,7 +20,7 @@ pub struct BranchCond { impl BranchCond { /// B.cond /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally- - pub fn bcond(cond: Condition, byte_offset: i32) -> Self { + pub fn bcond(cond: u8, byte_offset: i32) -> Self { Self { cond, imm19: byte_offset >> 2 } } } diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 2dc5aa938817ad..3b5f1ff0226c80 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -167,7 +167,7 @@ pub const fn bcond_offset_fits_bits(offset: i64) -> bool { } /// B.cond - branch to target if condition is true -pub fn bcond(cb: &mut CodeBlock, cond: Condition, byte_offset: A64Opnd) { +pub fn bcond(cb: &mut CodeBlock, cond: u8, byte_offset: A64Opnd) { let bytes: [u8; 4] = match byte_offset { A64Opnd::Imm(imm) => { assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary."); diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 126c9a8548bd6d..2fc75083e450d7 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -30,7 +30,7 @@ struct LabelRef { num_bytes: usize, /// The object that knows how to encode the branch instruction. - encode: Box + encode: fn(&mut CodeBlock, i64, i64) } /// Block of memory into which instructions can be assembled @@ -227,11 +227,11 @@ impl CodeBlock { } // Add a label reference at the current write position - pub fn label_ref(&mut self, label_idx: usize, num_bytes: usize, encode: E) where E: FnOnce(&mut CodeBlock, i64, i64) { + pub fn label_ref(&mut self, label_idx: usize, num_bytes: usize, encode: fn(&mut CodeBlock, i64, i64)) { assert!(label_idx < self.label_addrs.len()); // Keep track of the reference - self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode: Box::new(encode) }); + self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode }); // Move past however many bytes the instruction takes up self.write_pos += num_bytes; diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index a2a3b47f82799d..d23279f2770d61 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -799,45 +799,45 @@ pub fn int3(cb: &mut CodeBlock) { // Encode a conditional relative jump to a label // Note: this always encodes a 32-bit offset -fn write_jcc(cb: &mut CodeBlock, op: u8, label_idx: usize) { - cb.label_ref(label_idx, 6, move |cb, src_addr, dst_addr| { +fn write_jcc(cb: &mut CodeBlock, label_idx: usize) { + cb.label_ref(label_idx, 6, |cb, src_addr, dst_addr| { cb.write_byte(0x0F); - cb.write_byte(op); + cb.write_byte(OP); cb.write_int((dst_addr - src_addr) as u64, 32); }); } /// jcc - relative jumps to a label -pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x87, label_idx); } -pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); } -pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); } -pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x86, label_idx); } -pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); } -pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x84, label_idx); } -pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8F, label_idx); } -pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8D, label_idx); } -pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8C, label_idx); } -pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8E, label_idx); } -pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x86, label_idx); } -pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x82, label_idx); } -pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); } -pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x87, label_idx); } -pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x83, label_idx); } -pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x85, label_idx); } -pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8E, label_idx); } -pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8C, label_idx); } -pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8D, label_idx); } -pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8F, label_idx); } -pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x81, label_idx); } -pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8b, label_idx); } -pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x89, label_idx); } -pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x85, label_idx); } -pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x80, label_idx); } -pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8A, label_idx); } -pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8A, label_idx); } -pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x8B, label_idx); } -pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x88, label_idx); } -pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x84, label_idx); } +pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x87>(cb, label_idx); } +pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); } +pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); } +pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x86>(cb, label_idx); } +pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); } +pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x84>(cb, label_idx); } +pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8F>(cb, label_idx); } +pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8D>(cb, label_idx); } +pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8C>(cb, label_idx); } +pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8E>(cb, label_idx); } +pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x86>(cb, label_idx); } +pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); } +pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); } +pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x87>(cb, label_idx); } +pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); } +pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x85>(cb, label_idx); } +pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8E>(cb, label_idx); } +pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8C>(cb, label_idx); } +pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8D>(cb, label_idx); } +pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8F>(cb, label_idx); } +pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x81>(cb, label_idx); } +pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8b>(cb, label_idx); } +pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x89>(cb, label_idx); } +pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x85>(cb, label_idx); } +pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x80>(cb, label_idx); } +pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8A>(cb, label_idx); } +pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8A>(cb, label_idx); } +pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8B>(cb, label_idx); } +pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x88>(cb, label_idx); } +pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x84>(cb, label_idx); } pub fn jmp_label(cb: &mut CodeBlock, label_idx: usize) { cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| { diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 0fee18c068fe96..f6429dbceaedb1 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -284,7 +284,7 @@ impl Assembler /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. - fn emit_conditional_jump(cb: &mut CodeBlock, condition: Condition, target: Target) { + fn emit_conditional_jump(cb: &mut CodeBlock, target: Target) { match target { Target::CodePtr(dst_ptr) => { let src_addr = cb.get_write_ptr().into_i64() + 4; @@ -297,12 +297,12 @@ impl Assembler // to load the address into a register and use the branch // register instruction. if bcond_offset_fits_bits(offset) { - bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr)); + bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - src_addr)); } else { // If the condition is met, then we'll skip past the // next instruction, put the address in a register, and // jump to it. - bcond(cb, condition, A64Opnd::new_imm(4)); + bcond(cb, CONDITION, A64Opnd::new_imm(4)); // If the offset fits into a direct jump, then we'll use // that and the number of instructions will be shorter. @@ -333,7 +333,7 @@ impl Assembler // offset. We're going to assume we can fit into a single // b.cond instruction. It will panic otherwise. cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { - bcond(cb, condition, A64Opnd::new_imm(dst_addr - src_addr)); + bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - src_addr)); }); }, Target::FunPtr(_) => unreachable!() @@ -395,7 +395,7 @@ impl Assembler // being loaded is a heap object, we'll report that // back out to the gc_offsets list. ldr(cb, insn.out.into(), 1); - b(cb, A64Opnd::new_uimm((SIZEOF_VALUE as u64) / 4)); + b(cb, A64Opnd::new_imm((SIZEOF_VALUE as i64) / 4)); cb.write_bytes(&value.as_u64().to_le_bytes()); if !value.special_const_p() { @@ -507,19 +507,19 @@ impl Assembler }; }, Op::Je => { - emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap()); + emit_conditional_jump::<{Condition::EQ}>(cb, insn.target.unwrap()); }, Op::Jbe => { - emit_conditional_jump(cb, Condition::LS, insn.target.unwrap()); + emit_conditional_jump::<{Condition::LS}>(cb, insn.target.unwrap()); }, Op::Jz => { - emit_conditional_jump(cb, Condition::EQ, insn.target.unwrap()); + emit_conditional_jump::<{Condition::EQ}>(cb, insn.target.unwrap()); }, Op::Jnz => { - emit_conditional_jump(cb, Condition::NE, insn.target.unwrap()); + emit_conditional_jump::<{Condition::NE}>(cb, insn.target.unwrap()); }, Op::Jo => { - emit_conditional_jump(cb, Condition::VS, insn.target.unwrap()); + emit_conditional_jump::<{Condition::VS}>(cb, insn.target.unwrap()); }, Op::IncrCounter => { ldaddal(cb, insn.opnds[0].into(), insn.opnds[0].into(), insn.opnds[1].into()); From 38c2fb8a90172bcaabc153016f0a43b92a921d9c Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 12 Jul 2022 13:40:59 -0400 Subject: [PATCH 365/546] Port YJIT New Backend Temp Checks to Cirrus (https://github.com/Shopify/ruby/pull/312) Co-authored-by: Jean Boussier --- .cirrus.yml | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/.cirrus.yml b/.cirrus.yml index ec8036297c64b0..a2c2cf84aa052b 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -62,3 +62,62 @@ task: make_test-tool_script: make test-tool make_test-all_script: make test-all make_test-spec_script: make test-spec + + +# The following is to test YJIT on ARM64 CPUs available on Cirrus CI +yjit_task: + name: Arm64 Graviton2 / $CC YJIT New Backend Temp Checks + auto_cancellation: $CIRRUS_BRANCH != 'master' + skip: "changesIncludeOnly('doc/**', '**.{md,rdoc}')" + arm_container: + # We use the arm64 images at https://github.com/ruby/ruby-ci-image/pkgs/container/ruby-ci-image . + image: ghcr.io/ruby/ruby-ci-image:$CC + # Define the used cpu core in each matrix task. We can use total 16 cpu + # cores in entire matrix. [cpu] = [total cpu: 16] / [number of tasks] + cpu: 8 + # We can request maximum 4 GB per cpu. + # [memory per task] = [memory per cpu: 4 GB] * [cpu] + memory: 32G + env: + CIRRUS_CLONE_DEPTH: 50 + optflags: '-O1' + debugflags: '-ggdb3' + RUBY_PREFIX: /tmp/ruby-prefix + RUBY_DEBUG: ci rgengc + RUBY_TESTOPTS: >- + -q + --color=always + --tty=no + matrix: + CC: clang-12 + CC: gcc-11 + id_script: id + set_env_script: + # Set `GNUMAKEFLAGS`, because the flags are GNU make specific. Note using + # the `make` environment variable used in compilers.yml causes some rubygems + # tests to fail. + # https://github.com/rubygems/rubygems/issues/4921 + - echo "GNUMAKEFLAGS=-s -j$((1 + $CIRRUS_CPU))" >> $CIRRUS_ENV + print_env_script: + - echo "GNUMAKEFLAGS=$GNUMAKEFLAGS" + # Arm containers are executed in AWS's EKS, and it's not yet supporting IPv6 + # See https://github.com/aws/containers-roadmap/issues/835 + disable_ipv6_script: sudo ./tool/disable_ipv6.sh + install_rust_script: + - sudo apt-get update -y + - sudo apt-get install -y curl + - "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y" + autogen_script: ./autogen.sh + configure_script: >- + source $HOME/.cargo/env && ./configure -C + --enable-debug-env + --disable-install-doc + --with-ext=-test-/cxxanyargs,+ + --prefix="$RUBY_PREFIX" + --enable-yjit=dev + make_miniruby_script: source $HOME/.cargo/env && make -j miniruby + make_bindgen_script: source $HOME/.cargo/env && make -j yjit-bindgen + boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 + # output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 + bootstrap_tests_script: RUST_BACKTRACE=1 ruby --disable=gems bootstraptest/runner.rb --ruby="./miniruby -I./lib -I. -I.ext/common --disable-gems --yjit-call-threshold=1 --yjit-verify-ctx" bootstraptest/test_yjit_new_backend.rb + # full_build_script: make -j From b45b29fdbf43ea6043c1df041372842a07f5b3a8 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 12 Jul 2022 14:59:31 -0400 Subject: [PATCH 366/546] Port gen_getlocal() --- yjit/src/codegen.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 06c7756c7a5490..e78463ce5e6c08 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1541,23 +1541,24 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd { ep_opnd } -/* fn gen_getlocal_generic( ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, local_idx: u32, level: u32, ) -> CodegenStatus { - gen_get_ep(cb, REG0, level); + // Load environment pointer EP (level 0) from CFP + let ep_opnd = gen_get_ep(asm, level); // Load the local from the block // val = *(vm_get_ep(GET_EP(), level) - idx); let offs = -(SIZEOF_VALUE as i32 * local_idx as i32); - mov(cb, REG0, mem_opnd(64, REG0, offs)); + let local_opnd = Opnd::mem(64, ep_opnd, offs); // Write the local at SP let stack_top = ctx.stack_push(Type::Unknown); - mov(cb, stack_top, REG0); + + asm.mov(stack_top, local_opnd); KeepCompiling } @@ -1565,24 +1566,25 @@ fn gen_getlocal_generic( fn gen_getlocal( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let idx = jit_get_arg(jit, 0); let level = jit_get_arg(jit, 1); - gen_getlocal_generic(ctx, cb, idx.as_u32(), level.as_u32()) + gen_getlocal_generic(ctx, asm, idx.as_u32(), level.as_u32()) } fn gen_getlocal_wc1( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let idx = jit_get_arg(jit, 0); - gen_getlocal_generic(ctx, cb, idx.as_u32(), 1) + gen_getlocal_generic(ctx, asm, idx.as_u32(), 1) } +/* fn gen_setlocal_wc0( jit: &mut JITState, ctx: &mut Context, From 86606e01ee984cd9aebdcc68c0b5025604fb5184 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 20 Jul 2022 15:00:42 -0400 Subject: [PATCH 367/546] Port over setlocal_wc0 --- yjit/src/backend/ir.rs | 8 +++++++- yjit/src/codegen.rs | 23 +++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index cd88ec560b4bda..5758d72d4347da 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -253,7 +253,7 @@ impl From for Opnd { impl From for Opnd { fn from(value: i64) -> Self { - Opnd::Imm(value.try_into().unwrap()) + Opnd::Imm(value) } } @@ -263,6 +263,12 @@ impl From for Opnd { } } +impl From for Opnd { + fn from(value: u32) -> Self { + Opnd::UImm(value as u64) + } +} + impl From for Opnd { fn from(value: VALUE) -> Self { let VALUE(uimm) = value; diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index e78463ce5e6c08..1b05dc36c6f069 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1584,11 +1584,10 @@ fn gen_getlocal_wc1( gen_getlocal_generic(ctx, asm, idx.as_u32(), 1) } -/* fn gen_setlocal_wc0( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { /* @@ -1609,25 +1608,25 @@ fn gen_setlocal_wc0( let value_type = ctx.get_opnd_type(StackOpnd(0)); // Load environment pointer EP (level 0) from CFP - gen_get_ep(cb, REG0, 0); + let ep_opnd = gen_get_ep(asm, 0); // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers // only affect heap objects being written. If we know an immediate value is being written we // can skip this check. if !value_type.is_imm() { // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = mem_opnd( + let flags_opnd = Opnd::mem( 64, - REG0, + ep_opnd, SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); - test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED as i64)); + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // Create a side-exit to fall back to the interpreter let side_exit = get_side_exit(jit, ocb, ctx); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - jnz_ptr(cb, side_exit); + asm.jnz(side_exit.into()); } // Set the type of the local variable in the context @@ -1635,15 +1634,15 @@ fn gen_setlocal_wc0( // Pop the value to write from the stack let stack_top = ctx.stack_pop(1); - mov(cb, REG1, stack_top); // Write the value at the environment pointer let offs: i32 = -8 * slot_idx; - mov(cb, mem_opnd(64, REG0, offs), REG1); + asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top); KeepCompiling } +/* fn gen_setlocal_generic( jit: &mut JITState, ctx: &mut Context, @@ -5992,11 +5991,11 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_topn => Some(gen_topn), YARVINSN_adjuststack => Some(gen_adjuststack), - //YARVINSN_getlocal => Some(gen_getlocal), + YARVINSN_getlocal => Some(gen_getlocal), YARVINSN_getlocal_WC_0 => Some(gen_getlocal_wc0), - //YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), + YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), //YARVINSN_setlocal => Some(gen_setlocal), - //YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), + YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), //YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), YARVINSN_opt_plus => Some(gen_opt_plus), /* From 6e5382780f4c6cdb7eef945898448639071969fc Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 12 Jul 2022 15:28:54 -0400 Subject: [PATCH 368/546] Port over putstring --- yjit/src/codegen.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 1b05dc36c6f069..a54137fa978ced 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1754,28 +1754,31 @@ fn gen_newhash( KeepCompiling } +*/ fn gen_putstring( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let put_val = jit_get_arg(jit, 0); // Save the PC and SP because the callee will allocate - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - mov(cb, C_ARG_REGS[0], REG_EC); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val); - call_ptr(cb, REG0, rb_ec_str_resurrect as *const u8); + let str_opnd = asm.ccall( + rb_ec_str_resurrect as *const u8, + vec![EC, put_val.into()] + ); let stack_top = ctx.stack_push(Type::CString); - mov(cb, stack_top, RAX); + asm.mov(stack_top, str_opnd); KeepCompiling } +/* // Push Qtrue or Qfalse depending on whether the given keyword was supplied by // the caller fn gen_checkkeyword( @@ -6018,7 +6021,9 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), YARVINSN_splatarray => Some(gen_splatarray), YARVINSN_newrange => Some(gen_newrange), + */ YARVINSN_putstring => Some(gen_putstring), + /* YARVINSN_expandarray => Some(gen_expandarray), YARVINSN_defined => Some(gen_defined), YARVINSN_checkkeyword => Some(gen_checkkeyword), From 8d743e965e6bf95ea1649839fc1fe2429564c2d9 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 12 Jul 2022 17:04:09 -0400 Subject: [PATCH 369/546] Fix compile errors on arm on the CI (https://github.com/Shopify/ruby/pull/313) * Fix compile errors on arm on the CI * Fix typo --- yjit/src/backend/tests.rs | 7 +++++-- yjit/src/core.rs | 18 ++++++++-------- yjit/src/cruby.rs | 2 +- yjit/src/utils.rs | 43 +++++++++++++++++++++++++++++---------- 4 files changed, 48 insertions(+), 22 deletions(-) diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 27f799fc316532..d386d31d73d411 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -5,6 +5,7 @@ use crate::virtualmem::{CodePtr}; use crate::backend::ir::*; use crate::cruby::*; use crate::core::*; +use crate::utils::c_callable; use InsnOpnd::*; // Test that this function type checks @@ -193,8 +194,10 @@ fn test_base_insn_out() #[test] fn test_c_call() { - extern "sysv64" fn dummy_c_fun(v0: usize, v1: usize) - { + c_callable! { + fn dummy_c_fun(v0: usize, v1: usize) + { + } } let (mut asm, mut cb) = setup_asm(); diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 1b9026024854e8..6c7044c8433a5f 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1584,14 +1584,16 @@ fn make_branch_entry(block: &BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) - /// Generated code calls this function with the SysV calling convention. /// See [get_branch_target]. -extern "sysv64" fn branch_stub_hit( - branch_ptr: *const c_void, - target_idx: u32, - ec: EcPtr, -) -> *const u8 { - with_vm_lock(src_loc!(), || { - branch_stub_hit_body(branch_ptr, target_idx, ec) - }) +c_callable! { + fn branch_stub_hit( + branch_ptr: *const c_void, + target_idx: u32, + ec: EcPtr, + ) -> *const u8 { + with_vm_lock(src_loc!(), || { + branch_stub_hit_body(branch_ptr, target_idx, ec) + }) + } } /// Called by the generated code when a branch stub is executed diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 8543b6d971edc6..1c31b8c1494c69 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -610,7 +610,7 @@ impl From for u16 { /// Produce a Ruby string from a Rust string slice #[cfg(feature = "asm_comments")] pub fn rust_str_to_ruby(str: &str) -> VALUE { - unsafe { rb_utf8_str_new(str.as_ptr() as *const i8, str.len() as i64) } + unsafe { rb_utf8_str_new(str.as_ptr() as *const _, str.len() as i64) } } /// Produce a Ruby symbol from a Rust string slice diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index ade573b8da9881..dd89413090a0de 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -151,6 +151,19 @@ yjit_print_iseq(const rb_iseq_t *iseq) } */ +#[cfg(target_arch = "aarch64")] +macro_rules! c_callable { + (fn $f:ident $args:tt -> $ret:ty $body:block) => { fn $f $args -> $ret $body }; + (fn $f:ident $args:tt $body:block) => { fn $f $args $body }; +} + +#[cfg(target_arch = "x86_64")] +macro_rules! c_callable { + (fn $f:ident $args:tt -> $ret:ty $body:block) => { extern "sysv64" fn $f $args -> $ret $body }; + (fn $f:ident $args:tt $body:block) => { extern "sysv64" fn $f $args $body }; +} +pub(crate) use c_callable; + // Save caller-save registers on the stack before a C call fn push_regs(cb: &mut CodeBlock) { push(cb, RAX); @@ -180,8 +193,10 @@ fn pop_regs(cb: &mut CodeBlock) { } pub fn print_int(cb: &mut CodeBlock, opnd: X86Opnd) { - extern "sysv64" fn print_int_fn(val: i64) { - println!("{}", val); + c_callable!{ + fn print_int_fn(val: i64) { + println!("{}", val); + } } push_regs(cb); @@ -208,8 +223,10 @@ pub fn print_int(cb: &mut CodeBlock, opnd: X86Opnd) { /// Generate code to print a pointer pub fn print_ptr(cb: &mut CodeBlock, opnd: X86Opnd) { - extern "sysv64" fn print_ptr_fn(ptr: *const u8) { - println!("{:p}", ptr); + c_callable!{ + fn print_ptr_fn(ptr: *const u8) { + println!("{:p}", ptr); + } } assert!(opnd.num_bits() == 64); @@ -223,8 +240,10 @@ pub fn print_ptr(cb: &mut CodeBlock, opnd: X86Opnd) { /// Generate code to print a value pub fn print_value(cb: &mut CodeBlock, opnd: X86Opnd) { - extern "sysv64" fn print_value_fn(val: VALUE) { - unsafe { rb_obj_info_dump(val) } + c_callable!{ + fn print_value_fn(val: VALUE) { + unsafe { rb_obj_info_dump(val) } + } } assert!(opnd.num_bits() == 64); @@ -240,11 +259,13 @@ pub fn print_value(cb: &mut CodeBlock, opnd: X86Opnd) { /// Generate code to print constant string to stdout pub fn print_str(cb: &mut CodeBlock, str: &str) { - extern "sysv64" fn print_str_cfun(ptr: *const u8, num_bytes: usize) { - unsafe { - let slice = slice::from_raw_parts(ptr, num_bytes); - let str = std::str::from_utf8(slice).unwrap(); - println!("{}", str); + c_callable!{ + fn print_str_cfun(ptr: *const u8, num_bytes: usize) { + unsafe { + let slice = slice::from_raw_parts(ptr, num_bytes); + let str = std::str::from_utf8(slice).unwrap(); + println!("{}", str); + } } } From 15c6aacd399b2bc7fb5ee2d6422451c4eb47941f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 13 Jul 2022 16:48:55 -0400 Subject: [PATCH 370/546] Encode MRS and MSR for AArch64 (https://github.com/Shopify/ruby/pull/315) --- yjit/src/asm/arm64/arg/mod.rs | 2 + yjit/src/asm/arm64/arg/sys_reg.rs | 6 +++ yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/inst/sys_reg.rs | 86 ++++++++++++++++++++++++++++++ yjit/src/asm/arm64/mod.rs | 34 ++++++++++++ 5 files changed, 130 insertions(+) create mode 100644 yjit/src/asm/arm64/arg/sys_reg.rs create mode 100644 yjit/src/asm/arm64/inst/sys_reg.rs diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs index 0d2f1ac28a462e..bb779ab6dfccba 100644 --- a/yjit/src/asm/arm64/arg/mod.rs +++ b/yjit/src/asm/arm64/arg/mod.rs @@ -4,7 +4,9 @@ mod bitmask_imm; mod condition; mod sf; +mod sys_reg; pub use bitmask_imm::BitmaskImmediate; pub use condition::Condition; pub use sf::Sf; +pub use sys_reg::SystemRegister; diff --git a/yjit/src/asm/arm64/arg/sys_reg.rs b/yjit/src/asm/arm64/arg/sys_reg.rs new file mode 100644 index 00000000000000..41d71920cb74cc --- /dev/null +++ b/yjit/src/asm/arm64/arg/sys_reg.rs @@ -0,0 +1,6 @@ +/// The encoded representation of an A64 system register. +/// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/ +pub enum SystemRegister { + /// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/NZCV--Condition-Flags?lang=en + NZCV = 0b1_011_0100_0010_000 +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index f402f6765a92b4..9dfc923f53d594 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -16,6 +16,7 @@ mod mov; mod nop; mod shift_imm; mod store; +mod sys_reg; pub use atomic::Atomic; pub use branch::Branch; @@ -32,3 +33,4 @@ pub use mov::Mov; pub use nop::Nop; pub use shift_imm::ShiftImm; pub use store::Store; +pub use sys_reg::SysReg; diff --git a/yjit/src/asm/arm64/inst/sys_reg.rs b/yjit/src/asm/arm64/inst/sys_reg.rs new file mode 100644 index 00000000000000..108737a870a92f --- /dev/null +++ b/yjit/src/asm/arm64/inst/sys_reg.rs @@ -0,0 +1,86 @@ +use super::super::arg::SystemRegister; + +/// Which operation to perform (loading or storing the system register value). +enum L { + /// Store the value of a general-purpose register in a system register. + MSR = 0, + + /// Store the value of a system register in a general-purpose register. + MRS = 1 +} + +/// The struct that represents an A64 system register instruction that can be +/// encoded. +/// +/// MSR/MRS (register) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 1 0 0 1 | +/// | L o0 op1..... CRn........ CRm........ op2..... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct SysReg { + /// The register to load the system register value into. + rt: u8, + + /// Which system register to load or store. + systemreg: SystemRegister, + + /// Which operation to perform (loading or storing the system register value). + l: L +} + +impl SysReg { + /// MRS (register) + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MRS--Move-System-Register-?lang=en + pub fn mrs(rt: u8, systemreg: SystemRegister) -> Self { + SysReg { rt, systemreg, l: L::MRS } + } + + /// MSR (register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-?lang=en + pub fn msr(systemreg: SystemRegister, rt: u8) -> Self { + SysReg { rt, systemreg, l: L::MSR } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#systemmove +const FAMILY: u32 = 0b110101010001; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: SysReg) -> Self { + 0 + | (FAMILY << 20) + | ((inst.l as u32) << 21) + | ((inst.systemreg as u32) << 5) + | inst.rt as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: SysReg) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mrs() { + let inst = SysReg::mrs(0, SystemRegister::NZCV); + let result: u32 = inst.into(); + assert_eq!(0xd53b4200, result); + } + + #[test] + fn test_msr() { + let inst = SysReg::msr(SystemRegister::NZCV, 0); + let result: u32 = inst.into(); + assert_eq!(0xd51b4200, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 3b5f1ff0226c80..7adc1a274544b2 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -364,6 +364,30 @@ pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { cb.write_bytes(&bytes); } +/// MRS - move a system register into a general-purpose register +pub fn mrs(cb: &mut CodeBlock, rt: A64Opnd, systemregister: SystemRegister) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + SysReg::mrs(rt.reg_no, systemregister).into() + }, + _ => panic!("Invalid operand combination to mrs instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MSR - move a general-purpose register into a system register +pub fn msr(cb: &mut CodeBlock, systemregister: SystemRegister, rt: A64Opnd) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + SysReg::msr(systemregister, rt.reg_no).into() + }, + _ => panic!("Invalid operand combination to msr instruction") + }; + + cb.write_bytes(&bytes); +} + /// MVN - move a value in a register to another register, negating it pub fn mvn(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rm) { @@ -714,6 +738,16 @@ mod tests { check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16)); } + #[test] + fn test_mrs() { + check_bytes("0a423bd5", |cb| mrs(cb, X10, SystemRegister::NZCV)); + } + + #[test] + fn test_msr() { + check_bytes("0a421bd5", |cb| msr(cb, SystemRegister::NZCV, X10)); + } + #[test] fn test_mvn() { check_bytes("ea032baa", |cb| mvn(cb, X10, X11)); From ac77d151d6ef2848a709ff832424fca9cbb66ac6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 14 Jul 2022 10:35:09 -0400 Subject: [PATCH 371/546] Assert that the # of bytes matches for label refs (https://github.com/Shopify/ruby/pull/316) --- yjit/src/asm/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 2fc75083e450d7..0e05eb57831751 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -252,6 +252,10 @@ impl CodeBlock { self.set_pos(ref_pos); (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64); + + // Assert that we've written the same number of bytes that we + // expected to have written. + assert!(self.write_pos == ref_pos + label_ref.num_bytes); } self.write_pos = orig_pos; From 159566fef91b010d8e236151bdbc77993f77c15f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 14 Jul 2022 11:10:58 -0400 Subject: [PATCH 372/546] Op::CPushAll and Op::CPopAll (https://github.com/Shopify/ruby/pull/317) Instructions for pushing all caller-save registers and the flags so that we can implement dump_insns. --- yjit/src/asm/arm64/opnd.rs | 30 ++++++++------ yjit/src/backend/arm64/mod.rs | 73 +++++++++++++++++++++++++++++----- yjit/src/backend/ir.rs | 7 ++++ yjit/src/backend/x86_64/mod.rs | 27 +++++++++++++ 4 files changed, 116 insertions(+), 21 deletions(-) diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index 1738f0985c3811..e1f95979a9e151 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -88,6 +88,7 @@ impl A64Opnd { } } +// argument registers pub const X0_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 0 }; pub const X1_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 1 }; pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 }; @@ -95,15 +96,20 @@ pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 }; pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 }; pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 }; +// caller-save registers pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 }; pub const X10_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 10 }; pub const X11_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 11 }; pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 }; pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 }; +pub const X14_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 14 }; +pub const X15_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 15 }; -pub const X24_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 24 }; -pub const X25_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 25 }; -pub const X26_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 26 }; +// callee-save registers +pub const X19_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 19 }; +pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 }; +pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 }; +pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 }; // 64-bit registers pub const X0: A64Opnd = A64Opnd::Reg(X0_REG); @@ -120,19 +126,19 @@ pub const X10: A64Opnd = A64Opnd::Reg(X10_REG); pub const X11: A64Opnd = A64Opnd::Reg(X11_REG); pub const X12: A64Opnd = A64Opnd::Reg(X12_REG); pub const X13: A64Opnd = A64Opnd::Reg(X13_REG); -pub const X14: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 14 }); -pub const X15: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 15 }); +pub const X14: A64Opnd = A64Opnd::Reg(X14_REG); +pub const X15: A64Opnd = A64Opnd::Reg(X15_REG); pub const X16: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 16 }); pub const X17: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 17 }); pub const X18: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 18 }); -pub const X19: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 19 }); -pub const X20: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 20 }); -pub const X21: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 21 }); -pub const X22: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 22 }); +pub const X19: A64Opnd = A64Opnd::Reg(X19_REG); +pub const X20: A64Opnd = A64Opnd::Reg(X20_REG); +pub const X21: A64Opnd = A64Opnd::Reg(X21_REG); +pub const X22: A64Opnd = A64Opnd::Reg(X22_REG); pub const X23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 23 }); -pub const X24: A64Opnd = A64Opnd::Reg(X24_REG); -pub const X25: A64Opnd = A64Opnd::Reg(X25_REG); -pub const X26: A64Opnd = A64Opnd::Reg(X26_REG); +pub const X24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 24 }); +pub const X25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 25 }); +pub const X26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 26 }); pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 }); pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 }); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index f6429dbceaedb1..a208eb6316d135 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -13,9 +13,9 @@ use crate::virtualmem::CodePtr; pub type Reg = A64Reg; // Callee-saved registers -pub const _CFP: Opnd = Opnd::Reg(X24_REG); -pub const _EC: Opnd = Opnd::Reg(X25_REG); -pub const _SP: Opnd = Opnd::Reg(X26_REG); +pub const _CFP: Opnd = Opnd::Reg(X19_REG); +pub const _EC: Opnd = Opnd::Reg(X20_REG); +pub const _SP: Opnd = Opnd::Reg(X21_REG); // C argument registers on this platform pub const _C_ARG_OPNDS: [Opnd; 6] = [ @@ -59,11 +59,15 @@ impl From for A64Opnd { impl Assembler { /// Get the list of registers from which we can allocate on this platform - pub fn get_alloc_regs() -> Vec - { + pub fn get_alloc_regs() -> Vec { vec![C_RET_REG, X12_REG] } + /// Get a list of all of the caller-save registers + pub fn get_caller_save_regs() -> Vec { + vec![X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] + } + /// Split platform-specific instructions /// The transformations done here are meant to make our lives simpler in later /// stages of the compilation pipeline. @@ -340,11 +344,28 @@ impl Assembler }; } + /// Emit a push instruction for the given operand by adding to the stack + /// pointer and then storing the given value. + fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { + add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + stur(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, 0)); + } + + /// Emit a pop instruction into the given operand by loading the value + /// and then subtracting from the stack pointer. + fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { + ldur(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, 0)); + sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + } + // dbg!(&self.insns); // List of GC offsets let mut gc_offsets: Vec = Vec::new(); + // A special scratch register for loading/storing system registers. + let mut sys_scratch = A64Opnd::Reg(X22_REG); + // For each instruction for insn in &self.insns { match insn.op { @@ -429,12 +450,30 @@ impl Assembler }; }, Op::CPush => { - add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); - stur(cb, insn.opnds[0].into(), A64Opnd::new_mem(64, C_SP_REG, 0)); + emit_push(cb, insn.opnds[0].into()); + }, + Op::CPushAll => { + let regs = Assembler::get_caller_save_regs(); + + for reg in regs { + emit_push(cb, A64Opnd::Reg(reg)); + } + + mrs(cb, sys_scratch, SystemRegister::NZCV); + emit_push(cb, sys_scratch); }, Op::CPop => { - ldur(cb, insn.opnds[0].into(), A64Opnd::new_mem(64, C_SP_REG, 0)); - sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + emit_pop(cb, insn.opnds[0].into()); + }, + Op::CPopAll => { + let regs = Assembler::get_caller_save_regs(); + + msr(cb, SystemRegister::NZCV, sys_scratch); + emit_pop(cb, sys_scratch); + + for reg in regs.into_iter().rev() { + emit_pop(cb, A64Opnd::Reg(reg)); + } }, Op::CCall => { let src_addr = cb.get_write_ptr().into_i64() + 4; @@ -570,4 +609,20 @@ mod tests { let insns = cb.get_ptr(0).raw_ptr() as *const u32; assert_eq!(0x8b010003, unsafe { *insns }); } + + #[test] + fn test_emit_cpush_all() { + let (mut asm, mut cb) = setup_asm(); + + asm.cpush_all(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_cpop_all() { + let (mut asm, mut cb) = setup_asm(); + + asm.cpop_all(); + asm.compile_with_num_regs(&mut cb, 0); + } } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 5758d72d4347da..dbc6464a9c6e52 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -97,6 +97,11 @@ pub enum Op CPush, CPop, + // Push and pop all of the caller-save registers and the flags to/from the C + // stack + CPushAll, + CPopAll, + // C function call with N arguments (variadic) CCall, @@ -804,6 +809,8 @@ def_push_2_opnd!(and, Op::And); def_push_1_opnd!(not, Op::Not); def_push_1_opnd_no_out!(cpush, Op::CPush); def_push_1_opnd_no_out!(cpop, Op::CPop); +def_push_0_opnd_no_out!(cpush_all, Op::CPushAll); +def_push_0_opnd_no_out!(cpop_all, Op::CPopAll); def_push_1_opnd_no_out!(cret, Op::CRet); def_push_1_opnd!(load, Op::Load); def_push_1_opnd!(lea, Op::Lea); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 4fd30e7144d6f0..4e0a9dcf02d401 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -76,6 +76,14 @@ impl Assembler ] } + /// Get a list of all of the caller-save registers + pub fn get_caller_save_regs() -> Vec { + vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG] + + // Technically these are also caller-save: R12_REG, R13_REG, R14_REG, + // and R15_REG, but we don't use them so we don't include them here. + } + /// Split IR instructions for the x86 platform fn x86_split(mut self) -> Assembler { @@ -239,6 +247,25 @@ impl Assembler Op::CPush => push(cb, insn.opnds[0].into()), Op::CPop => pop(cb, insn.opnds[0].into()), + // Push and pop to the C stack all caller-save registers and the + // flags + Op::CPushAll => { + let regs = Assembler::get_caller_save_regs(); + + for reg in regs { + push(cb, X86Opnd::Reg(reg)); + } + pushfq(cb); + }, + Op::CPopAll => { + let regs = Assembler::get_caller_save_regs(); + + popfq(cb); + for reg in regs.into_iter().rev() { + pop(cb, X86Opnd::Reg(reg)); + } + }, + // C function call Op::CCall => { // Temporary From 5e834195fd71652939c54b15952e269a6f172853 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 14 Jul 2022 14:52:57 -0400 Subject: [PATCH 373/546] Exclude X0 (C_RET_REG) from allocatable registers on arm (https://github.com/Shopify/ruby/pull/319) * Exclude X0 (C_RET_REG) from allocatable registers on arm * Add another small test snippett --- yjit/src/backend/arm64/mod.rs | 9 ++++++--- yjit/src/backend/ir.rs | 20 ++++++++++++++------ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index a208eb6316d135..94da426ba319d6 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -58,12 +58,15 @@ impl From for A64Opnd { impl Assembler { - /// Get the list of registers from which we can allocate on this platform + /// Get the list of registers from which we will allocate on this platform + /// These are caller-saved registers + /// Note: we intentionally exclude C_RET_REG (X0) from this list + /// because of the way it's used in gen_leave() and gen_leave_exit() pub fn get_alloc_regs() -> Vec { - vec![C_RET_REG, X12_REG] + vec![X11_REG, X12_REG] } - /// Get a list of all of the caller-save registers + /// Get a list of all of the caller-saved registers pub fn get_caller_save_regs() -> Vec { vec![X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index dbc6464a9c6e52..89c12456a20178 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -570,18 +570,26 @@ impl Assembler // Allocate a specific register fn take_reg(pool: &mut u32, regs: &Vec, reg: &Reg) -> Reg { - let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no).unwrap(); - assert_eq!(*pool & (1 << reg_index), 0); - *pool |= 1 << reg_index; - return regs[reg_index]; + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); + + if let Some(reg_index) = reg_index { + assert_eq!(*pool & (1 << reg_index), 0); + *pool |= 1 << reg_index; + //return regs[reg_index]; + } + + return *reg; } // Mutate the pool bitmap to indicate that the given register is being // returned as it is no longer used by the instruction that previously // held it. fn dealloc_reg(pool: &mut u32, regs: &Vec, reg: &Reg) { - let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no).unwrap(); - *pool &= !(1 << reg_index); + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); + + if let Some(reg_index) = reg_index { + *pool &= !(1 << reg_index); + } } let live_ranges: Vec = std::mem::take(&mut self.live_ranges); From a1ea018fd67c38bf24c0ebfdc75e994953c29644 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 14 Jul 2022 15:57:58 -0400 Subject: [PATCH 374/546] Add extra assertion in new_label for Kevin --- yjit/src/backend/ir.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 89c12456a20178..5387629cb87bf2 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -471,8 +471,9 @@ impl Assembler /// Create a new label instance that we can jump to pub fn new_label(&mut self, name: &str) -> Target { - let label_idx = self.label_names.len(); + assert!(!name.contains(" "), "use underscores in label names, not spaces"); + let label_idx = self.label_names.len(); self.label_names.push(name.to_string()); Target::Label(label_idx) } From 6c5008959925d2768e9495a5601b5245784bc87f Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 14 Jul 2022 16:51:01 -0400 Subject: [PATCH 375/546] Port newhash, add tests for newhash, duphash --- yjit/src/backend/arm64/mod.rs | 3 ++ yjit/src/backend/ir.rs | 17 +++++++- yjit/src/backend/x86_64/mod.rs | 5 ++- yjit/src/codegen.rs | 80 ++++++++++++++++++---------------- 4 files changed, 64 insertions(+), 41 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 94da426ba319d6..22998b1ab5f164 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -466,6 +466,9 @@ impl Assembler emit_push(cb, sys_scratch); }, Op::CPop => { + emit_pop(cb, insn.out.into()); + }, + Op::CPopInto => { emit_pop(cb, insn.opnds[0].into()); }, Op::CPopAll => { diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 5387629cb87bf2..f4afa567b34b7c 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -96,6 +96,7 @@ pub enum Op // Push and pop registers to/from the C stack CPush, CPop, + CPopInto, // Push and pop all of the caller-save registers and the flags to/from the C // stack @@ -743,6 +744,19 @@ macro_rules! def_push_jcc { }; } +macro_rules! def_push_0_opnd { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + #[must_use] + pub fn $op_name(&mut self) -> Opnd + { + self.push_insn($opcode, vec![], None) + } + } + }; +} + macro_rules! def_push_0_opnd_no_out { ($op_name:ident, $opcode:expr) => { impl Assembler @@ -817,7 +831,8 @@ def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); def_push_1_opnd!(not, Op::Not); def_push_1_opnd_no_out!(cpush, Op::CPush); -def_push_1_opnd_no_out!(cpop, Op::CPop); +def_push_0_opnd!(cpop, Op::CPop); +def_push_1_opnd_no_out!(cpop_into, Op::CPopInto); def_push_0_opnd_no_out!(cpush_all, Op::CPushAll); def_push_0_opnd_no_out!(cpop_all, Op::CPopAll); def_push_1_opnd_no_out!(cret, Op::CRet); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 4e0a9dcf02d401..dfbd9e990b9f47 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -243,9 +243,10 @@ impl Assembler // Load effective address Op::Lea => lea(cb, insn.out.into(), insn.opnds[0].into()), - // Push and pop to the C stack + // Push and pop to/from the C stack Op::CPush => push(cb, insn.opnds[0].into()), - Op::CPop => pop(cb, insn.opnds[0].into()), + Op::CPop => pop(cb, insn.out.into()), + Op::CPopInto => pop(cb, insn.opnds[0].into()), // Push and pop to the C stack all caller-save registers and the // flags diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index a54137fa978ced..87815902ee4fb7 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -394,9 +394,9 @@ fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { gen_counter_incr!(asm, exit_from_branch_stub); - asm.cpop(SP); - asm.cpop(EC); - asm.cpop(CFP); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); asm.cret(Qundef.into()); @@ -443,9 +443,9 @@ fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, asm: &mut Assembler) { } } - asm.cpop(SP); - asm.cpop(EC); - asm.cpop(CFP); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); asm.cret(Qundef.into()); } @@ -527,9 +527,9 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { gen_counter_incr!(asm, traced_cfunc_return); // Return to the interpreter - asm.cpop(SP); - asm.cpop(EC); - asm.cpop(CFP); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); asm.cret(Qundef.into()); @@ -551,9 +551,9 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { // Every exit to the interpreter should be counted gen_counter_incr!(asm, leave_interp_return); - asm.cpop(SP); - asm.cpop(EC); - asm.cpop(CFP); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); asm.cret(C_RET_OPND); @@ -580,9 +580,9 @@ fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) { // We're not starting at the first PC, so we need to exit. gen_counter_incr!(asm, leave_start_pc_non_zero); - asm.cpop(SP); - asm.cpop(EC); - asm.cpop(CFP); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); asm.cret(Qundef.into()); @@ -1706,55 +1706,59 @@ fn gen_setlocal_wc1( let idx = jit_get_arg(jit, 0).as_i32(); gen_setlocal_generic(jit, ctx, cb, ocb, idx, 1) } +*/ // new hash initialized from top N values fn gen_newhash( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let num: i64 = jit_get_arg(jit, 0).as_i64(); + let num: u64 = jit_get_arg(jit, 0).as_u64(); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); if num != 0 { // val = rb_hash_new_with_size(num / 2); - mov(cb, C_ARG_REGS[0], imm_opnd(num / 2)); - call_ptr(cb, REG0, rb_hash_new_with_size as *const u8); + let new_hash = asm.ccall( + rb_hash_new_with_size as *const u8, + vec![Opnd::UImm(num / 2)] + ); + + // Save the allocated hash as we want to push it after insertion + asm.cpush(new_hash); + asm.cpush(new_hash); // x86 alignment - // save the allocated hash as we want to push it after insertion - push(cb, RAX); - push(cb, RAX); // alignment + // Get a pointer to the values to insert into the hash + let stack_addr_from_top = asm.lea(ctx.stack_opnd((num - 1) as i32)); // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val); - mov(cb, C_ARG_REGS[0], imm_opnd(num)); - lea( - cb, - C_ARG_REGS[1], - ctx.stack_opnd((num - 1).try_into().unwrap()), + asm.ccall( + rb_hash_bulk_insert as *const u8, + vec![ + Opnd::UImm(num), + stack_addr_from_top, + new_hash + ] ); - mov(cb, C_ARG_REGS[2], RAX); - call_ptr(cb, REG0, rb_hash_bulk_insert as *const u8); - pop(cb, RAX); // alignment - pop(cb, RAX); + let new_hash = asm.cpop(); + asm.cpop_into(new_hash); // x86 alignment ctx.stack_pop(num.try_into().unwrap()); let stack_ret = ctx.stack_push(Type::Hash); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, new_hash); } else { // val = rb_hash_new(); - call_ptr(cb, REG0, rb_hash_new as *const u8); - + let new_hash = asm.ccall(rb_hash_new as *const u8, vec![]); let stack_ret = ctx.stack_push(Type::Hash); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, new_hash); } KeepCompiling } -*/ fn gen_putstring( jit: &mut JITState, @@ -6005,8 +6009,8 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_minus => Some(gen_opt_minus), YARVINSN_opt_and => Some(gen_opt_and), YARVINSN_opt_or => Some(gen_opt_or), - YARVINSN_newhash => Some(gen_newhash), */ + YARVINSN_newhash => Some(gen_newhash), YARVINSN_duphash => Some(gen_duphash), YARVINSN_newarray => Some(gen_newarray), /* From 160e29b9e5c9419e3275d4bd6de09c9c4f242602 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 15 Jul 2022 13:25:26 -0400 Subject: [PATCH 376/546] Port print_str to new backend (https://github.com/Shopify/ruby/pull/318) * ADR and ADRP for AArch64 * Implement Op::Jbe on X86 * Lera instruction * Op::BakeString * LeaPC -> LeaLabel * Port print_str to the new backend * Port print_value to the new backend * Port print_ptr to the new backend * Write null-terminators in Op::BakeString * Fix up rebase issues on print-str port * Add back in panic for X86 backend for unsupported instructions being lowered * Fix target architecture --- yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/inst/pc_rel.rs | 107 ++++++++++++++++++++++++++++++ yjit/src/asm/arm64/mod.rs | 42 ++++++++++++ yjit/src/backend/arm64/mod.rs | 75 +++++++++++++++++---- yjit/src/backend/ir.rs | 62 ++++++++++++----- yjit/src/backend/tests.rs | 8 +++ yjit/src/backend/x86_64/mod.rs | 53 +++++++++++++-- yjit/src/codegen.rs | 4 +- yjit/src/utils.rs | 89 +++++++------------------ 9 files changed, 339 insertions(+), 103 deletions(-) create mode 100644 yjit/src/asm/arm64/inst/pc_rel.rs diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 9dfc923f53d594..752ee64aa37488 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -14,6 +14,7 @@ mod logical_imm; mod logical_reg; mod mov; mod nop; +mod pc_rel; mod shift_imm; mod store; mod sys_reg; @@ -31,6 +32,7 @@ pub use logical_imm::LogicalImm; pub use logical_reg::LogicalReg; pub use mov::Mov; pub use nop::Nop; +pub use pc_rel::PCRelative; pub use shift_imm::ShiftImm; pub use store::Store; pub use sys_reg::SysReg; diff --git a/yjit/src/asm/arm64/inst/pc_rel.rs b/yjit/src/asm/arm64/inst/pc_rel.rs new file mode 100644 index 00000000000000..fa330cb9d6b934 --- /dev/null +++ b/yjit/src/asm/arm64/inst/pc_rel.rs @@ -0,0 +1,107 @@ +/// Which operation to perform for the PC-relative instruction. +enum Op { + /// Form a PC-relative address. + ADR = 0, + + /// Form a PC-relative address to a 4KB page. + ADRP = 1 +} + +/// The struct that represents an A64 PC-relative address instruction that can +/// be encoded. +/// +/// ADR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 0 0 | +/// | op immlo immhi........................................................... rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct PCRelative { + /// The number for the general-purpose register to load the address into. + rd: u8, + + /// The number of bytes to add to the PC to form the address. + imm: i32, + + /// Which operation to perform for this instruction. + op: Op +} + +impl PCRelative { + /// ADR + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADR--Form-PC-relative-address- + pub fn adr(rd: u8, imm: i32) -> Self { + Self { rd, imm, op: Op::ADR } + } + + /// ADRP + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page- + pub fn adrp(rd: u8, imm: i32) -> Self { + Self { rd, imm: imm >> 12, op: Op::ADRP } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +const FAMILY: u32 = 0b1000; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: PCRelative) -> Self { + let immlo = (inst.imm & 0b11) as u32; + let mut immhi = ((inst.imm >> 2) & ((1 << 18) - 1)) as u32; + + // Toggle the sign bit if necessary. + if inst.imm < 0 { + immhi |= (1 << 18); + } + + 0 + | ((inst.op as u32) << 31) + | (immlo << 29) + | (FAMILY << 25) + | (immhi << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: PCRelative) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_adr_positive() { + let inst = PCRelative::adr(0, 5); + let result: u32 = inst.into(); + assert_eq!(0x30000020, result); + } + + #[test] + fn test_adr_negative() { + let inst = PCRelative::adr(0, -5); + let result: u32 = inst.into(); + assert_eq!(0x70ffffc0, result); + } + + #[test] + fn test_adrp_positive() { + let inst = PCRelative::adrp(0, 0x4000); + let result: u32 = inst.into(); + assert_eq!(0x90000020, result); + } + + #[test] + fn test_adrp_negative() { + let inst = PCRelative::adrp(0, -0x4000); + let result: u32 = inst.into(); + assert_eq!(0x90ffffe0, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 7adc1a274544b2..ca69b33d9efb9f 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -94,6 +94,38 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// ADR - form a PC-relative address and load it into a register +pub fn adr(cb: &mut CodeBlock, rd: A64Opnd, imm: A64Opnd) { + let bytes: [u8; 4] = match (rd, imm) { + (A64Opnd::Reg(rd), A64Opnd::Imm(imm)) => { + assert!(rd.num_bits == 64, "The destination register must be 64 bits."); + assert!(imm_fits_bits(imm, 21), "The immediate operand must be 21 bits or less."); + + PCRelative::adr(rd.reg_no, imm as i32).into() + }, + _ => panic!("Invalid operand combination to adr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ADRP - form a PC-relative address to a 4KB page and load it into a register. +/// This is effectively the same as ADR except that the immediate must be a +/// multiple of 4KB. +pub fn adrp(cb: &mut CodeBlock, rd: A64Opnd, imm: A64Opnd) { + let bytes: [u8; 4] = match (rd, imm) { + (A64Opnd::Reg(rd), A64Opnd::Imm(imm)) => { + assert!(rd.num_bits == 64, "The destination register must be 64 bits."); + assert!(imm_fits_bits(imm, 32), "The immediate operand must be 32 bits or less."); + + PCRelative::adrp(rd.reg_no, imm as i32).into() + }, + _ => panic!("Invalid operand combination to adr instruction."), + }; + + cb.write_bytes(&bytes); +} + /// AND - and rn and rm, put the result in rd, don't update flags pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rn, rm) { @@ -628,6 +660,16 @@ mod tests { check_bytes("201c00f1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(-7))); } + #[test] + fn test_adr() { + check_bytes("aa000010", |cb| adr(cb, X10, A64Opnd::new_imm(20))); + } + + #[test] + fn test_adrp() { + check_bytes("4a000090", |cb| adrp(cb, X10, A64Opnd::new_imm(0x8000))); + } + #[test] fn test_and_register() { check_bytes("2000028a", |cb| and(cb, X0, X1, X2)); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 22998b1ab5f164..153237a9b12f32 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -58,6 +58,9 @@ impl From for A64Opnd { impl Assembler { + // A special scratch register for intermediate processing. + const SCRATCH0: A64Opnd = A64Opnd::Reg(X22_REG); + /// Get the list of registers from which we will allocate on this platform /// These are caller-saved registers /// Note: we intentionally exclude C_RET_REG (X0) from this list @@ -78,7 +81,7 @@ impl Assembler /// have no memory operands. fn arm64_split(mut self) -> Assembler { - self.forward_pass(|asm, index, op, opnds, target| { + self.forward_pass(|asm, index, op, opnds, target, text| { // Load all Value operands into registers that aren't already a part // of Load instructions. let opnds = match op { @@ -100,15 +103,15 @@ impl Assembler (Opnd::Mem(_), Opnd::Mem(_)) => { let opnd0 = asm.load(opnds[0]); let opnd1 = asm.load(opnds[1]); - asm.push_insn(op, vec![opnd0, opnd1], target); + asm.push_insn(op, vec![opnd0, opnd1], target, text); }, (mem_opnd @ Opnd::Mem(_), other_opnd) | (other_opnd, mem_opnd @ Opnd::Mem(_)) => { let opnd0 = asm.load(mem_opnd); - asm.push_insn(op, vec![opnd0, other_opnd], target); + asm.push_insn(op, vec![opnd0, other_opnd], target, text); }, _ => { - asm.push_insn(op, opnds, target); + asm.push_insn(op, opnds, target, text); } } }, @@ -220,7 +223,7 @@ impl Assembler asm.test(opnd0, opnds[1]); }, _ => { - asm.push_insn(op, opnds, target); + asm.push_insn(op, opnds, target, text); } }; }) @@ -366,9 +369,6 @@ impl Assembler // List of GC offsets let mut gc_offsets: Vec = Vec::new(); - // A special scratch register for loading/storing system registers. - let mut sys_scratch = A64Opnd::Reg(X22_REG); - // For each instruction for insn in &self.insns { match insn.op { @@ -380,6 +380,22 @@ impl Assembler Op::Label => { cb.write_label(insn.target.unwrap().unwrap_label_idx()); }, + Op::BakeString => { + let str = insn.text.as_ref().unwrap(); + for byte in str.as_bytes() { + cb.write_byte(*byte); + } + + // Add a null-terminator byte for safety (in case we pass + // this to C code) + cb.write_byte(0); + + // Pad out the string to the next 4-byte boundary so that + // it's easy to jump past. + for _ in 0..(4 - ((str.len() + 1) % 4)) { + cb.write_byte(0); + } + }, Op::Add => { add(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); }, @@ -452,6 +468,15 @@ impl Assembler } }; }, + Op::LeaLabel => { + let label_idx = insn.target.unwrap().unwrap_label_idx(); + + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - src_addr)); + }); + + mov(cb, insn.out.into(), Self::SCRATCH0); + }, Op::CPush => { emit_push(cb, insn.opnds[0].into()); }, @@ -462,8 +487,8 @@ impl Assembler emit_push(cb, A64Opnd::Reg(reg)); } - mrs(cb, sys_scratch, SystemRegister::NZCV); - emit_push(cb, sys_scratch); + mrs(cb, Self::SCRATCH0, SystemRegister::NZCV); + emit_push(cb, Self::SCRATCH0); }, Op::CPop => { emit_pop(cb, insn.out.into()); @@ -474,8 +499,8 @@ impl Assembler Op::CPopAll => { let regs = Assembler::get_caller_save_regs(); - msr(cb, SystemRegister::NZCV, sys_scratch); - emit_pop(cb, sys_scratch); + msr(cb, SystemRegister::NZCV, Self::SCRATCH0); + emit_pop(cb, Self::SCRATCH0); for reg in regs.into_iter().rev() { emit_pop(cb, A64Opnd::Reg(reg)); @@ -616,6 +641,18 @@ mod tests { assert_eq!(0x8b010003, unsafe { *insns }); } + #[test] + fn test_emit_bake_string() { + let (mut asm, mut cb) = setup_asm(); + + asm.bake_string("Hello, world!"); + asm.compile_with_num_regs(&mut cb, 0); + + // Testing that we pad the string to the nearest 4-byte boundary to make + // it easier to jump over. + assert_eq!(16, cb.get_write_pos()); + } + #[test] fn test_emit_cpush_all() { let (mut asm, mut cb) = setup_asm(); @@ -631,4 +668,18 @@ mod tests { asm.cpop_all(); asm.compile_with_num_regs(&mut cb, 0); } + + #[test] + fn test_emit_lea_label() { + let (mut asm, mut cb) = setup_asm(); + + let label = asm.new_label("label"); + let opnd = asm.lea_label(label); + + asm.write_label(label); + asm.bake_string("Hello, world!"); + asm.store(Opnd::mem(64, SP, 0), opnd); + + asm.compile_with_num_regs(&mut cb, 1); + } } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index f4afa567b34b7c..7e763b1ae9dba9 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -35,6 +35,9 @@ pub enum Op // Add a label into the IR at the point that this instruction is added. Label, + // Bake a string directly into the instruction stream. + BakeString, + // Add two operands together, and return the result as a new operand. This // operand can then be used as the operand on another instruction. It // accepts two operands, which can be of any type @@ -71,6 +74,10 @@ pub enum Op // Load effective address Lea, + // Load effective address relative to the current instruction pointer. It + // accepts a single signed immediate operand. + LeaLabel, + // A low-level mov instruction. It accepts two operands. Mov, @@ -393,7 +400,7 @@ impl Assembler } /// Append an instruction to the list - pub(super) fn push_insn(&mut self, op: Op, opnds: Vec, target: Option) -> Opnd + pub(super) fn push_insn(&mut self, op: Op, opnds: Vec, target: Option, text: Option) -> Opnd { // Index of this instruction let insn_idx = self.insns.len(); @@ -439,11 +446,11 @@ impl Assembler let out_opnd = Opnd::InsnOut{ idx: insn_idx, num_bits: out_num_bits }; let insn = Insn { - op: op, - text: None, - opnds: opnds, + op, + text, + opnds, out: out_opnd, - target: target, + target, pos: None }; @@ -469,6 +476,27 @@ impl Assembler self.live_ranges.push(self.insns.len()); } + /// Bake a string at the current position + pub fn bake_string(&mut self, text: &str) + { + let insn = Insn { + op: Op::BakeString, + text: Some(text.to_owned()), + opnds: vec![], + out: Opnd::None, + target: None, + pos: None + }; + self.insns.push(insn); + self.live_ranges.push(self.insns.len()); + } + + /// Load an address relative to the given label. + #[must_use] + pub fn lea_label(&mut self, target: Target) -> Opnd { + self.push_insn(Op::LeaLabel, vec![], Some(target), None) + } + /// Create a new label instance that we can jump to pub fn new_label(&mut self, name: &str) -> Target { @@ -498,7 +526,7 @@ impl Assembler /// Transform input instructions, consumes the input assembler pub(super) fn forward_pass(mut self, mut map_insn: F) -> Assembler - where F: FnMut(&mut Assembler, usize, Op, Vec, Option) + where F: FnMut(&mut Assembler, usize, Op, Vec, Option, Option) { let mut asm = Assembler { insns: Vec::default(), @@ -534,7 +562,7 @@ impl Assembler asm.comment(insn.text.unwrap().as_str()); }, _ => { - map_insn(&mut asm, index, insn.op, opnds, insn.target); + map_insn(&mut asm, index, insn.op, opnds, insn.target, insn.text); } }; @@ -596,7 +624,7 @@ impl Assembler let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - let asm = self.forward_pass(|asm, index, op, opnds, target| { + let asm = self.forward_pass(|asm, index, op, opnds, target, text| { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. @@ -677,7 +705,7 @@ impl Assembler } ).collect(); - asm.push_insn(op, reg_opnds, target); + asm.push_insn(op, reg_opnds, target, text); // Set the output register for this instruction let num_insns = asm.insns.len(); @@ -728,7 +756,7 @@ impl Assembler pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { let target = Target::FunPtr(fptr); - self.push_insn(Op::CCall, opnds, Some(target)) + self.push_insn(Op::CCall, opnds, Some(target), None) } } @@ -738,7 +766,7 @@ macro_rules! def_push_jcc { { pub fn $op_name(&mut self, target: Target) { - self.push_insn($opcode, vec![], Some(target)); + self.push_insn($opcode, vec![], Some(target), None); } } }; @@ -751,7 +779,7 @@ macro_rules! def_push_0_opnd { #[must_use] pub fn $op_name(&mut self) -> Opnd { - self.push_insn($opcode, vec![], None) + self.push_insn($opcode, vec![], None, None) } } }; @@ -763,7 +791,7 @@ macro_rules! def_push_0_opnd_no_out { { pub fn $op_name(&mut self) { - self.push_insn($opcode, vec![], None); + self.push_insn($opcode, vec![], None, None); } } }; @@ -776,7 +804,7 @@ macro_rules! def_push_1_opnd { #[must_use] pub fn $op_name(&mut self, opnd0: Opnd) -> Opnd { - self.push_insn($opcode, vec![opnd0], None) + self.push_insn($opcode, vec![opnd0], None, None) } } }; @@ -788,7 +816,7 @@ macro_rules! def_push_1_opnd_no_out { { pub fn $op_name(&mut self, opnd0: Opnd) { - self.push_insn($opcode, vec![opnd0], None); + self.push_insn($opcode, vec![opnd0], None, None); } } }; @@ -801,7 +829,7 @@ macro_rules! def_push_2_opnd { #[must_use] pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd { - self.push_insn($opcode, vec![opnd0, opnd1], None) + self.push_insn($opcode, vec![opnd0, opnd1], None, None) } } }; @@ -813,7 +841,7 @@ macro_rules! def_push_2_opnd_no_out { { pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) { - self.push_insn($opcode, vec![opnd0, opnd1], None); + self.push_insn($opcode, vec![opnd0, opnd1], None, None); } } }; diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index d386d31d73d411..dc56bd4abb9edd 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -283,3 +283,11 @@ fn test_jo() asm.compile_with_num_regs(&mut cb, 2); } + +#[test] +fn test_bake_string() { + let (mut asm, mut cb) = setup_asm(); + + asm.bake_string("Hello, world!"); + asm.compile_with_num_regs(&mut cb, 0); +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index dfbd9e990b9f47..cb9d5be74a38b5 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -67,6 +67,11 @@ impl From for X86Opnd { impl Assembler { + // A special scratch register for intermediate processing. + // Note: right now this is only used by LeaLabel because label_ref accepts + // a closure and we don't want it to have to capture anything. + const SCRATCH0: X86Opnd = X86Opnd::Reg(R11_REG); + /// Get the list of registers from which we can allocate on this platform pub fn get_alloc_regs() -> Vec { @@ -79,17 +84,17 @@ impl Assembler /// Get a list of all of the caller-save registers pub fn get_caller_save_regs() -> Vec { vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG] - - // Technically these are also caller-save: R12_REG, R13_REG, R14_REG, - // and R15_REG, but we don't use them so we don't include them here. } + // These are the callee-saved registers in the x86-64 SysV ABI + // RBX, RSP, RBP, and R12–R15 + /// Split IR instructions for the x86 platform fn x86_split(mut self) -> Assembler { let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - self.forward_pass(|asm, index, op, opnds, target| { + self.forward_pass(|asm, index, op, opnds, target, text| { // Load heap object operands into registers because most // instructions can't directly work with 64-bit constants let opnds = match op { @@ -135,7 +140,7 @@ impl Assembler _ => (opnds[0], opnds[1]) }; - asm.push_insn(op, vec![opnd0, opnd1], target); + asm.push_insn(op, vec![opnd0, opnd1], target, text); }, Op::Mov => { match (opnds[0], opnds[1]) { @@ -178,7 +183,7 @@ impl Assembler asm.not(opnd0); }, _ => { - asm.push_insn(op, opnds, target); + asm.push_insn(op, opnds, target, text); } }; }) @@ -206,6 +211,16 @@ impl Assembler cb.write_label(insn.target.unwrap().unwrap_label_idx()); }, + Op::BakeString => { + for byte in insn.text.as_ref().unwrap().as_bytes() { + cb.write_byte(*byte); + } + + // Add a null-terminator byte for safety (in case we pass + // this to C code) + cb.write_byte(0); + }, + Op::Add => { add(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, @@ -243,7 +258,19 @@ impl Assembler // Load effective address Op::Lea => lea(cb, insn.out.into(), insn.opnds[0].into()), - // Push and pop to/from the C stack + // Load relative address + Op::LeaLabel => { + let label_idx = insn.target.unwrap().unwrap_label_idx(); + + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + let disp = dst_addr - src_addr; + lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); + }); + + mov(cb, insn.out.into(), Self::SCRATCH0); + }, + + // Push and pop to the C stack Op::CPush => push(cb, insn.opnds[0].into()), Op::CPop => pop(cb, insn.out.into()), Op::CPopInto => pop(cb, insn.opnds[0].into()), @@ -315,6 +342,14 @@ impl Assembler } } + Op::Jbe => { + match insn.target.unwrap() { + Target::CodePtr(code_ptr) => jbe_ptr(cb, code_ptr), + Target::Label(label_idx) => jbe_label(cb, label_idx), + _ => unreachable!() + } + }, + Op::Jz => { match insn.target.unwrap() { Target::CodePtr(code_ptr) => jz_ptr(cb, code_ptr), @@ -349,6 +384,10 @@ impl Assembler Op::Breakpoint => int3(cb), + // We want to keep the panic here because some instructions that + // we feed to the backend could get lowered into other + // instructions. So it's possible that some of our backend + // instructions can never make it to the emit stage. _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn.op) }; } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 87815902ee4fb7..618586f7a3f865 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -790,7 +790,7 @@ pub fn gen_single_block( // If requested, dump instructions for debugging if get_option!(dump_insns) { println!("compiling {}", insn_name(opcode)); - print_str(cb, &format!("executing {}", insn_name(opcode))); + print_str(&mut asm, &format!("executing {}", insn_name(opcode))); } // Call the code generation function @@ -6622,4 +6622,4 @@ mod tests { gen_leave(&mut jit, &mut context, &mut cb, &mut ocb); } } -*/ \ No newline at end of file +*/ diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index dd89413090a0de..98af6041938b14 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -1,7 +1,6 @@ #![allow(dead_code)] // Some functions for print debugging in here -use crate::asm::x86_64::*; -use crate::asm::*; +use crate::backend::ir::*; use crate::cruby::*; use std::slice; @@ -164,34 +163,7 @@ macro_rules! c_callable { } pub(crate) use c_callable; -// Save caller-save registers on the stack before a C call -fn push_regs(cb: &mut CodeBlock) { - push(cb, RAX); - push(cb, RCX); - push(cb, RDX); - push(cb, RSI); - push(cb, RDI); - push(cb, R8); - push(cb, R9); - push(cb, R10); - push(cb, R11); - pushfq(cb); -} - -// Restore caller-save registers from the after a C call -fn pop_regs(cb: &mut CodeBlock) { - popfq(cb); - pop(cb, R11); - pop(cb, R10); - pop(cb, R9); - pop(cb, R8); - pop(cb, RDI); - pop(cb, RSI); - pop(cb, RDX); - pop(cb, RCX); - pop(cb, RAX); -} - +/* pub fn print_int(cb: &mut CodeBlock, opnd: X86Opnd) { c_callable!{ fn print_int_fn(val: i64) { @@ -220,45 +192,40 @@ pub fn print_int(cb: &mut CodeBlock, opnd: X86Opnd) { call(cb, RAX); pop_regs(cb); } +*/ /// Generate code to print a pointer -pub fn print_ptr(cb: &mut CodeBlock, opnd: X86Opnd) { +pub fn print_ptr(asm: &mut Assembler, opnd: Opnd) { c_callable!{ fn print_ptr_fn(ptr: *const u8) { println!("{:p}", ptr); } } - assert!(opnd.num_bits() == 64); + assert!(opnd.rm_num_bits() == 64); - push_regs(cb); - mov(cb, C_ARG_REGS[0], opnd); - mov(cb, RAX, const_ptr_opnd(print_ptr_fn as *const u8)); - call(cb, RAX); - pop_regs(cb); + asm.cpush_all(); + asm.ccall(print_ptr_fn as *const u8, vec![opnd]); + asm.cpop_all(); } /// Generate code to print a value -pub fn print_value(cb: &mut CodeBlock, opnd: X86Opnd) { +pub fn print_value(asm: &mut Assembler, opnd: Opnd) { c_callable!{ fn print_value_fn(val: VALUE) { unsafe { rb_obj_info_dump(val) } } } - assert!(opnd.num_bits() == 64); - - push_regs(cb); - - mov(cb, RDI, opnd); - mov(cb, RAX, const_ptr_opnd(print_value_fn as *const u8)); - call(cb, RAX); + assert!(matches!(opnd, Opnd::Value(_))); - pop_regs(cb); + asm.cpush_all(); + asm.ccall(print_value_fn as *const u8, vec![opnd]); + asm.cpop_all(); } /// Generate code to print constant string to stdout -pub fn print_str(cb: &mut CodeBlock, str: &str) { +pub fn print_str(asm: &mut Assembler, str: &str) { c_callable!{ fn print_str_cfun(ptr: *const u8, num_bytes: usize) { unsafe { @@ -269,26 +236,18 @@ pub fn print_str(cb: &mut CodeBlock, str: &str) { } } - let bytes = str.as_ptr(); - let num_bytes = str.len(); - - push_regs(cb); + asm.cpush_all(); - // Load the string address and jump over the string data - lea(cb, C_ARG_REGS[0], mem_opnd(8, RIP, 5)); - jmp32(cb, num_bytes as i32); + let string_data = asm.new_label("string_data"); + let after_string = asm.new_label("after_string"); - // Write the string chars and a null terminator - for i in 0..num_bytes { - cb.write_byte(unsafe { *bytes.add(i) }); - } + asm.jmp(after_string); + asm.write_label(string_data); + asm.bake_string(str); + asm.write_label(after_string); - // Pass the string length as an argument - mov(cb, C_ARG_REGS[1], uimm_opnd(num_bytes as u64)); + let opnd = asm.lea_label(string_data); + asm.ccall(print_str_cfun as *const u8, vec![opnd, Opnd::UImm(str.len() as u64)]); - // Call the print function - mov(cb, RAX, const_ptr_opnd(print_str_cfun as *const u8)); - call(cb, RAX); - - pop_regs(cb); + asm.cpop_all(); } From 34ec89252bf4bf1c841a82be4183a3dc40046355 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 15 Jul 2022 13:25:49 -0400 Subject: [PATCH 377/546] Fix comment typo --- yjit/src/codegen.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 618586f7a3f865..89f3171c3ba004 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1862,7 +1862,7 @@ fn gen_jbe_to_target0( } // Generate a jump to a stub that recompiles the current YARV instruction on failure. -// When depth_limitk is exceeded, generate a jump to a side exit. +// When depth_limit is exceeded, generate a jump to a side exit. fn jit_chain_guard( jcc: JCCKinds, jit: &JITState, From 95e5c5227c0492ed33fa41f31a698bf320d2b437 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 15 Jul 2022 13:49:38 -0400 Subject: [PATCH 378/546] Fix bug with LeaLabel on x86, add CI test --- yjit/src/backend/x86_64/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index cb9d5be74a38b5..0001d459772768 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -262,7 +262,7 @@ impl Assembler Op::LeaLabel => { let label_idx = insn.target.unwrap().unwrap_label_idx(); - cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + cb.label_ref(label_idx, 7, |cb, src_addr, dst_addr| { let disp = dst_addr - src_addr; lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); }); @@ -270,7 +270,7 @@ impl Assembler mov(cb, insn.out.into(), Self::SCRATCH0); }, - // Push and pop to the C stack + // Push and pop to/from the C stack Op::CPush => push(cb, insn.opnds[0].into()), Op::CPop => pop(cb, insn.out.into()), Op::CPopInto => pop(cb, insn.opnds[0].into()), From 7787ef29fedb6d638893434c323db6ea85d85640 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 15 Jul 2022 14:32:57 -0400 Subject: [PATCH 379/546] Add --yjit-dump-insns test to Cirrus Arm workflow --- .cirrus.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.cirrus.yml b/.cirrus.yml index a2c2cf84aa052b..7b68a5091a4a12 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -118,6 +118,7 @@ yjit_task: make_miniruby_script: source $HOME/.cargo/env && make -j miniruby make_bindgen_script: source $HOME/.cargo/env && make -j yjit-bindgen boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 + test_dump_insns: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 # output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 bootstrap_tests_script: RUST_BACKTRACE=1 ruby --disable=gems bootstraptest/runner.rb --ruby="./miniruby -I./lib -I. -I.ext/common --disable-gems --yjit-call-threshold=1 --yjit-verify-ctx" bootstraptest/test_yjit_new_backend.rb # full_build_script: make -j From c83198911a82c8dc6196fcfca073eb78822982f4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 15 Jul 2022 16:06:53 -0400 Subject: [PATCH 380/546] Fix jumps (https://github.com/Shopify/ruby/pull/322) --- yjit/src/backend/arm64/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 153237a9b12f32..9539b907ea3733 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -570,7 +570,7 @@ impl Assembler // to assume we can fit into a single b instruction. // It will panic otherwise. cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { - b(cb, A64Opnd::new_imm((dst_addr - src_addr) / 4)); + b(cb, A64Opnd::new_imm((dst_addr - src_addr) / 4 + 1)); }); }, _ => unreachable!() From bf7277b518d5ab634ee708f54fbb8735a8eafdbc Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 15 Jul 2022 16:21:35 -0400 Subject: [PATCH 381/546] Update cirrus workflow --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 7b68a5091a4a12..3e03d0adc3d3c6 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -118,7 +118,7 @@ yjit_task: make_miniruby_script: source $HOME/.cargo/env && make -j miniruby make_bindgen_script: source $HOME/.cargo/env && make -j yjit-bindgen boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 - test_dump_insns: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 + test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 # output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 bootstrap_tests_script: RUST_BACKTRACE=1 ruby --disable=gems bootstraptest/runner.rb --ruby="./miniruby -I./lib -I. -I.ext/common --disable-gems --yjit-call-threshold=1 --yjit-verify-ctx" bootstraptest/test_yjit_new_backend.rb # full_build_script: make -j From 0da253e72cc80c1dbf8517f5217b59a64ec0f44e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 15 Jul 2022 16:14:55 -0400 Subject: [PATCH 382/546] Port print_int to the new backend (https://github.com/Shopify/ruby/pull/321) * Port print_int to the new backend * Tests for print_int and print_str --- yjit/src/asm/arm64/inst/load.rs | 42 +++++++++--- yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/inst/sbfm.rs | 77 ++++++++++++++++++++++ yjit/src/asm/arm64/mod.rs | 40 ++++++++++++ yjit/src/asm/arm64/opnd.rs | 64 +++++++++---------- yjit/src/backend/arm64/mod.rs | 28 ++++++++ yjit/src/backend/ir.rs | 5 ++ yjit/src/backend/x86_64/mod.rs | 4 ++ yjit/src/utils.rs | 110 ++++++++++++++++++-------------- 9 files changed, 284 insertions(+), 88 deletions(-) create mode 100644 yjit/src/asm/arm64/inst/sbfm.rs diff --git a/yjit/src/asm/arm64/inst/load.rs b/yjit/src/asm/arm64/inst/load.rs index 727dad52f7cfb2..b64a6a96ac80fe 100644 --- a/yjit/src/asm/arm64/inst/load.rs +++ b/yjit/src/asm/arm64/inst/load.rs @@ -4,6 +4,12 @@ enum Size { Size64 = 0b11, } +/// The operation to perform for this instruction. +enum Opc { + LDUR = 0b01, + LDURSW = 0b10 +} + /// A convenience function so that we can convert the number of bits of an /// register operand directly into an Sf enum variant. impl From for Size { @@ -22,8 +28,8 @@ impl From for Size { /// LDUR /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | -/// | 1 1 1 0 0 0 0 1 0 0 0 | -/// | size. imm9.......................... rn.............. rt.............. | +/// | 1 1 1 0 0 0 0 0 0 | +/// | size. opc.. imm9.......................... rn.............. rt.............. | /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// pub struct Load { @@ -36,6 +42,9 @@ pub struct Load { /// The optional signed immediate byte offset from the base register. imm9: i16, + /// The operation to perform for this instruction. + opc: Opc, + /// The size of the operands being operated on. size: Size } @@ -44,12 +53,13 @@ impl Load { /// LDUR (load register, unscaled) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { - Self { - rt, - rn, - imm9, - size: num_bits.into() - } + Self { rt, rn, imm9, opc: Opc::LDUR, size: num_bits.into() } + } + + /// LDURSW (load register, unscaled, signed) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en + pub fn ldursw(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, imm9, opc: Opc::LDURSW, size: Size::Size32 } } } @@ -65,7 +75,7 @@ impl From for u32 { | ((inst.size as u32) << 30) | (0b11 << 28) | (FAMILY << 25) - | (1 << 22) + | ((inst.opc as u32) << 22) | (imm9 << 12) | ((inst.rn as u32) << 5) | (inst.rt as u32) @@ -97,4 +107,18 @@ mod tests { let result: u32 = inst.into(); assert_eq!(0xf847b020, result); } + + #[test] + fn test_ldursw() { + let inst = Load::ldursw(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0xb8800020, result); + } + + #[test] + fn test_ldursw_with_imm() { + let inst = Load::ldursw(0, 1, 123); + let result: u32 = inst.into(); + assert_eq!(0xb887b020, result); + } } diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 752ee64aa37488..5d4d252d93a17d 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -15,6 +15,7 @@ mod logical_reg; mod mov; mod nop; mod pc_rel; +mod sbfm; mod shift_imm; mod store; mod sys_reg; @@ -33,6 +34,7 @@ pub use logical_reg::LogicalReg; pub use mov::Mov; pub use nop::Nop; pub use pc_rel::PCRelative; +pub use sbfm::SBFM; pub use shift_imm::ShiftImm; pub use store::Store; pub use sys_reg::SysReg; diff --git a/yjit/src/asm/arm64/inst/sbfm.rs b/yjit/src/asm/arm64/inst/sbfm.rs new file mode 100644 index 00000000000000..4fbb567ed05bcb --- /dev/null +++ b/yjit/src/asm/arm64/inst/sbfm.rs @@ -0,0 +1,77 @@ +use super::super::arg::Sf; + +/// The struct that represents an A64 signed bitfield move instruction that can +/// be encoded. +/// +/// SBFM +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 0 0 1 1 0 | +/// | sf N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct SBFM { + /// The number for the general-purpose register to load the value into. + rd: u8, + + /// The number for the general-purpose register to copy from. + rn: u8, + + /// The leftmost bit number to be moved from the source. + imms: u8, + + // The right rotate amount. + immr: u8, + + /// Whether or not this is a 64-bit operation. + n: bool, + + /// The size of this operation. + sf: Sf +} + +impl SBFM { + /// SXTW + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-?lang=en + pub fn sxtw(rd: u8, rn: u8) -> Self { + Self { rd, rn, immr: 0, imms: 31, n: true, sf: Sf::Sf64 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield +const FAMILY: u32 = 0b1001; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: SBFM) -> Self { + 0 + | ((inst.sf as u32) << 31) + | (FAMILY << 25) + | (1 << 24) + | ((inst.n as u32) << 22) + | ((inst.immr as u32) << 16) + | ((inst.imms as u32) << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: SBFM) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sxtw() { + let inst = SBFM::sxtw(0, 1); + let result: u32 = inst.into(); + assert_eq!(0x93407c20, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index ca69b33d9efb9f..6eebccaa6120b6 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -321,6 +321,21 @@ pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// LDURSW - load a 32-bit memory address into a register and sign-extend it +pub fn ldursw(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + + Load::ldursw(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldursw instruction.") + }; + + cb.write_bytes(&bytes); +} + /// LSL - logical shift left a register by an immediate pub fn lsl(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { let bytes: [u8; 4] = match (rd, rn, shift) { @@ -558,6 +573,21 @@ pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// SXTW - sign extend a 32-bit register into a 64-bit register +pub fn sxtw(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn)) => { + assert_eq!(rd.num_bits, 64, "rd must be 64-bits wide."); + assert_eq!(rn.num_bits, 32, "rn must be 32-bits wide."); + + SBFM::sxtw(rd.reg_no, rn.reg_no).into() + }, + _ => panic!("Invalid operand combination to sxtw instruction."), + }; + + cb.write_bytes(&bytes); +} + /// RET - unconditionally return to a location in a register, defaults to X30 pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) { let bytes: [u8; 4] = match rn { @@ -750,6 +780,11 @@ mod tests { check_bytes("200040f8", |cb| ldur(cb, X0, X1)); } + #[test] + fn test_ldursw() { + check_bytes("6ab187b8", |cb| ldursw(cb, X10, A64Opnd::new_mem(64, X11, 123))); + } + #[test] fn test_lsl() { check_bytes("6ac572d3", |cb| lsl(cb, X10, X11, A64Opnd::new_uimm(14))); @@ -870,6 +905,11 @@ mod tests { check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7))); } + #[test] + fn test_sxtw() { + check_bytes("6a7d4093", |cb| sxtw(cb, X10, W11)); + } + #[test] fn test_tst_register() { check_bytes("1f0001ea", |cb| tst(cb, X0, X1)); diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index e1f95979a9e151..a10e28945573e6 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -146,38 +146,38 @@ pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 }); pub const X31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 31 }); // 32-bit registers -pub const W0: A64Reg = A64Reg { num_bits: 32, reg_no: 0 }; -pub const W1: A64Reg = A64Reg { num_bits: 32, reg_no: 1 }; -pub const W2: A64Reg = A64Reg { num_bits: 32, reg_no: 2 }; -pub const W3: A64Reg = A64Reg { num_bits: 32, reg_no: 3 }; -pub const W4: A64Reg = A64Reg { num_bits: 32, reg_no: 4 }; -pub const W5: A64Reg = A64Reg { num_bits: 32, reg_no: 5 }; -pub const W6: A64Reg = A64Reg { num_bits: 32, reg_no: 6 }; -pub const W7: A64Reg = A64Reg { num_bits: 32, reg_no: 7 }; -pub const W8: A64Reg = A64Reg { num_bits: 32, reg_no: 8 }; -pub const W9: A64Reg = A64Reg { num_bits: 32, reg_no: 9 }; -pub const W10: A64Reg = A64Reg { num_bits: 32, reg_no: 10 }; -pub const W11: A64Reg = A64Reg { num_bits: 32, reg_no: 11 }; -pub const W12: A64Reg = A64Reg { num_bits: 32, reg_no: 12 }; -pub const W13: A64Reg = A64Reg { num_bits: 32, reg_no: 13 }; -pub const W14: A64Reg = A64Reg { num_bits: 32, reg_no: 14 }; -pub const W15: A64Reg = A64Reg { num_bits: 32, reg_no: 15 }; -pub const W16: A64Reg = A64Reg { num_bits: 32, reg_no: 16 }; -pub const W17: A64Reg = A64Reg { num_bits: 32, reg_no: 17 }; -pub const W18: A64Reg = A64Reg { num_bits: 32, reg_no: 18 }; -pub const W19: A64Reg = A64Reg { num_bits: 32, reg_no: 19 }; -pub const W20: A64Reg = A64Reg { num_bits: 32, reg_no: 20 }; -pub const W21: A64Reg = A64Reg { num_bits: 32, reg_no: 21 }; -pub const W22: A64Reg = A64Reg { num_bits: 32, reg_no: 22 }; -pub const W23: A64Reg = A64Reg { num_bits: 32, reg_no: 23 }; -pub const W24: A64Reg = A64Reg { num_bits: 32, reg_no: 24 }; -pub const W25: A64Reg = A64Reg { num_bits: 32, reg_no: 25 }; -pub const W26: A64Reg = A64Reg { num_bits: 32, reg_no: 26 }; -pub const W27: A64Reg = A64Reg { num_bits: 32, reg_no: 27 }; -pub const W28: A64Reg = A64Reg { num_bits: 32, reg_no: 28 }; -pub const W29: A64Reg = A64Reg { num_bits: 32, reg_no: 29 }; -pub const W30: A64Reg = A64Reg { num_bits: 32, reg_no: 30 }; -pub const W31: A64Reg = A64Reg { num_bits: 32, reg_no: 31 }; +pub const W0: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 0 }); +pub const W1: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 1 }); +pub const W2: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 2 }); +pub const W3: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 3 }); +pub const W4: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 4 }); +pub const W5: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 5 }); +pub const W6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 6 }); +pub const W7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 7 }); +pub const W8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 8 }); +pub const W9: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 9 }); +pub const W10: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 10 }); +pub const W11: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 11 }); +pub const W12: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 12 }); +pub const W13: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 13 }); +pub const W14: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 14 }); +pub const W15: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 15 }); +pub const W16: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 16 }); +pub const W17: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 17 }); +pub const W18: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 18 }); +pub const W19: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 19 }); +pub const W20: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 20 }); +pub const W21: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 21 }); +pub const W22: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 22 }); +pub const W23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 23 }); +pub const W24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 24 }); +pub const W25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 25 }); +pub const W26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 26 }); +pub const W27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 27 }); +pub const W28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 28 }); +pub const W29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 29 }); +pub const W30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 30 }); +pub const W31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 31 }); // C argument registers pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3]; diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 9539b907ea3733..8b5576f7be91cb 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -158,6 +158,22 @@ impl Assembler asm.jmp_opnd(opnds[0]); } }, + Op::LoadSExt => { + match opnds[0] { + // We only want to sign extend if the operand is a + // register, instruction output, or memory address that + // is 32 bits. Otherwise we'll just load the value + // directly since there's no need to sign extend. + Opnd::Reg(Reg { num_bits: 32, .. }) | + Opnd::InsnOut { num_bits: 32, .. } | + Opnd::Mem(Mem { num_bits: 32, .. }) => { + asm.load_sext(opnds[0]); + }, + _ => { + asm.load(opnds[0]); + } + }; + }, Op::Mov => { // The value that is being moved must be either a register // or an immediate that can be encoded as a bitmask @@ -448,6 +464,18 @@ impl Assembler } }; }, + Op::LoadSExt => { + match insn.opnds[0] { + Opnd::Reg(Reg { num_bits: 32, .. }) | + Opnd::InsnOut { num_bits: 32, .. } => { + sxtw(cb, insn.out.into(), insn.opnds[0].into()); + }, + Opnd::Mem(Mem { num_bits: 32, .. }) => { + ldursw(cb, insn.out.into(), insn.opnds[0].into()); + }, + _ => unreachable!() + }; + }, Op::Mov => { mov(cb, insn.opnds[0].into(), insn.opnds[1].into()); }, diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 7e763b1ae9dba9..e42a0c50b43be3 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -68,6 +68,10 @@ pub enum Op // A low-level instruction that loads a value into a register. Load, + // A low-level instruction that loads a value into a register and + // sign-extends it to a 64-bit value. + LoadSExt, + // Low-level instruction to store a value to memory. Store, @@ -865,6 +869,7 @@ def_push_0_opnd_no_out!(cpush_all, Op::CPushAll); def_push_0_opnd_no_out!(cpop_all, Op::CPopAll); def_push_1_opnd_no_out!(cret, Op::CRet); def_push_1_opnd!(load, Op::Load); +def_push_1_opnd!(load_sext, Op::LoadSExt); def_push_1_opnd!(lea, Op::Lea); def_push_2_opnd_no_out!(store, Op::Store); def_push_2_opnd_no_out!(mov, Op::Mov); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 0001d459772768..31a907b55e5f45 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -253,6 +253,10 @@ impl Assembler } }, + Op::LoadSExt => { + movsx(cb, insn.out.into(), insn.opnds[0].into()) + }, + Op::Mov => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), // Load effective address diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index 98af6041938b14..5f42ba1fdbd805 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -105,36 +105,6 @@ pub fn iseq_get_location(iseq: IseqPtr) -> String { s } -#[cfg(test)] -mod tests { - #[test] - fn min_max_preserved_after_cast_to_usize() { - use crate::utils::IntoUsize; - - let min: usize = u64::MIN.as_usize(); - assert_eq!(min, u64::MIN.try_into().unwrap()); - let max: usize = u64::MAX.as_usize(); - assert_eq!(max, u64::MAX.try_into().unwrap()); - - let min: usize = u32::MIN.as_usize(); - assert_eq!(min, u32::MIN.try_into().unwrap()); - let max: usize = u32::MAX.as_usize(); - assert_eq!(max, u32::MAX.try_into().unwrap()); - } - - #[test] - fn test_offset_of() { - #[repr(C)] - struct Foo { - a: u8, - b: u64, - } - - assert_eq!(0, offset_of!(Foo, a), "C99 6.7.2.1p13 says no padding at the front"); - assert_eq!(8, offset_of!(Foo, b), "ABI dependent, but should hold"); - } -} - // TODO: we may want to move this function into yjit.c, maybe add a convenient Rust-side wrapper /* // For debugging. Print the bytecode for an iseq. @@ -163,36 +133,31 @@ macro_rules! c_callable { } pub(crate) use c_callable; -/* -pub fn print_int(cb: &mut CodeBlock, opnd: X86Opnd) { +pub fn print_int(asm: &mut Assembler, opnd: Opnd) { c_callable!{ fn print_int_fn(val: i64) { println!("{}", val); } } - push_regs(cb); + asm.cpush_all(); - match opnd { - X86Opnd::Mem(_) | X86Opnd::Reg(_) => { + let argument = match opnd { + Opnd::Mem(_) | Opnd::Reg(_) | Opnd::InsnOut { .. } => { // Sign-extend the value if necessary - if opnd.num_bits() < 64 { - movsx(cb, C_ARG_REGS[0], opnd); + if opnd.rm_num_bits() < 64 { + asm.load_sext(opnd) } else { - mov(cb, C_ARG_REGS[0], opnd); + opnd } - } - X86Opnd::Imm(_) | X86Opnd::UImm(_) => { - mov(cb, C_ARG_REGS[0], opnd); - } + }, + Opnd::Imm(_) | Opnd::UImm(_) => opnd, _ => unreachable!(), - } + }; - mov(cb, RAX, const_ptr_opnd(print_int_fn as *const u8)); - call(cb, RAX); - pop_regs(cb); + asm.ccall(print_int_fn as *const u8, vec![argument]); + asm.cpop_all(); } -*/ /// Generate code to print a pointer pub fn print_ptr(asm: &mut Assembler, opnd: Opnd) { @@ -251,3 +216,54 @@ pub fn print_str(asm: &mut Assembler, str: &str) { asm.cpop_all(); } + +#[cfg(test)] +mod tests { + use super::*; + use crate::asm::CodeBlock; + + #[test] + fn min_max_preserved_after_cast_to_usize() { + use crate::utils::IntoUsize; + + let min: usize = u64::MIN.as_usize(); + assert_eq!(min, u64::MIN.try_into().unwrap()); + let max: usize = u64::MAX.as_usize(); + assert_eq!(max, u64::MAX.try_into().unwrap()); + + let min: usize = u32::MIN.as_usize(); + assert_eq!(min, u32::MIN.try_into().unwrap()); + let max: usize = u32::MAX.as_usize(); + assert_eq!(max, u32::MAX.try_into().unwrap()); + } + + #[test] + fn test_offset_of() { + #[repr(C)] + struct Foo { + a: u8, + b: u64, + } + + assert_eq!(0, offset_of!(Foo, a), "C99 6.7.2.1p13 says no padding at the front"); + assert_eq!(8, offset_of!(Foo, b), "ABI dependent, but should hold"); + } + + #[test] + fn test_print_int() { + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); + + print_int(&mut asm, Opnd::Imm(42)); + asm.compile(&mut cb); + } + + #[test] + fn test_print_str() { + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); + + print_str(&mut asm, "Hello, world!"); + asm.compile(&mut cb); + } +} From f9e24ca8dd5e498cd768eaf65bc07acdb268f175 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 15 Jul 2022 16:24:18 -0400 Subject: [PATCH 383/546] Conditionals (https://github.com/Shopify/ruby/pull/323) * CSEL on AArch64 * Implement various Op::CSel* instructions --- yjit/src/asm/arm64/inst/conditional.rs | 73 ++++++++++++++++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/mod.rs | 22 ++++++++ yjit/src/backend/arm64/mod.rs | 29 ++++++++++ yjit/src/backend/ir.rs | 18 +++++++ yjit/src/backend/x86_64/mod.rs | 44 ++++++++++++++++ 6 files changed, 188 insertions(+) create mode 100644 yjit/src/asm/arm64/inst/conditional.rs diff --git a/yjit/src/asm/arm64/inst/conditional.rs b/yjit/src/asm/arm64/inst/conditional.rs new file mode 100644 index 00000000000000..e1950e95b428d3 --- /dev/null +++ b/yjit/src/asm/arm64/inst/conditional.rs @@ -0,0 +1,73 @@ +use super::super::arg::Sf; + +/// The struct that represents an A64 conditional instruction that can be +/// encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 1 0 1 0 1 0 0 0 0 | +/// | sf rm.............. cond....... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Conditional { + /// The number of the general-purpose destination register. + rd: u8, + + /// The number of the first general-purpose source register. + rn: u8, + + /// The condition to use for the conditional instruction. + cond: u8, + + /// The number of the second general-purpose source register. + rm: u8, + + /// The size of the registers of this instruction. + sf: Sf +} + +impl Conditional { + /// CSEL + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSEL--Conditional-Select-?lang=en + pub fn csel(rd: u8, rn: u8, rm: u8, cond: u8, num_bits: u8) -> Self { + Self { rd, rn, cond, rm, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en#condsel +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Conditional) -> Self { + 0 + | ((inst.sf as u32) << 31) + | (1 << 28) + | (FAMILY << 25) + | (1 << 23) + | ((inst.rm as u32) << 16) + | ((inst.cond as u32) << 12) + | ((inst.rn as u32) << 5) + | (inst.rd as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Conditional) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::super::arg::Condition; + + #[test] + fn test_csel() { + let result: u32 = Conditional::csel(0, 1, 2, Condition::NE, 64).into(); + assert_eq!(0x9a821020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 5d4d252d93a17d..c69bb5e871a404 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -6,6 +6,7 @@ mod branch; mod branch_cond; mod breakpoint; mod call; +mod conditional; mod data_imm; mod data_reg; mod load; @@ -25,6 +26,7 @@ pub use branch::Branch; pub use branch_cond::BranchCond; pub use breakpoint::Breakpoint; pub use call::Call; +pub use conditional::Conditional; pub use data_imm::DataImm; pub use data_reg::DataReg; pub use load::Load; diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 6eebccaa6120b6..3feee65197d421 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -272,6 +272,23 @@ pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// CSEL - conditionally select between two registers +pub fn csel(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd, cond: u8) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + Conditional::csel(rd.reg_no, rn.reg_no, rm.reg_no, cond, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to csel instruction."), + }; + + cb.write_bytes(&bytes); +} + /// LDADDAL - atomic add with acquire and release semantics pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rs, rt, rn) { @@ -760,6 +777,11 @@ mod tests { check_bytes("5f3900f1", |cb| cmp(cb, X10, A64Opnd::new_uimm(14))); } + #[test] + fn test_csel() { + check_bytes("6a018c9a", |cb| csel(cb, X10, X11, X12, Condition::EQ)); + } + #[test] fn test_ldaddal() { check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 8b5576f7be91cb..1128eb225f78c6 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -137,6 +137,17 @@ impl Assembler } asm.cret(C_RET_OPND); }, + Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | + Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE => { + let new_opnds = opnds.into_iter().map(|opnd| { + match opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + _ => asm.load(opnd) + } + }).collect(); + + asm.push_insn(op, new_opnds, target, text); + }, Op::IncrCounter => { // Every operand to the IncrCounter instruction need to be a // register once it gets there. So here we're going to load @@ -624,6 +635,24 @@ impl Assembler }, Op::Breakpoint => { brk(cb, A64Opnd::None); + }, + Op::CSelZ | Op::CSelE => { + csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::EQ); + }, + Op::CSelNZ | Op::CSelNE => { + csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::NE); + }, + Op::CSelL => { + csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::LT); + }, + Op::CSelLE => { + csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::LE); + }, + Op::CSelG => { + csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::GT); + }, + Op::CSelGE => { + csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::GE); } }; } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index e42a0c50b43be3..a5fd012a2b890f 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -104,6 +104,16 @@ pub enum Op Jnz, Jo, + // Conditional select instructions + CSelZ, + CSelNZ, + CSelE, + CSelNE, + CSelL, + CSelLE, + CSelG, + CSelGE, + // Push and pop registers to/from the C stack CPush, CPop, @@ -877,3 +887,11 @@ def_push_2_opnd_no_out!(cmp, Op::Cmp); def_push_2_opnd_no_out!(test, Op::Test); def_push_0_opnd_no_out!(breakpoint, Op::Breakpoint); def_push_2_opnd_no_out!(incr_counter, Op::IncrCounter); +def_push_2_opnd!(csel_z, Op::CSelZ); +def_push_2_opnd!(csel_nz, Op::CSelNZ); +def_push_2_opnd!(csel_e, Op::CSelE); +def_push_2_opnd!(csel_ne, Op::CSelNE); +def_push_2_opnd!(csel_l, Op::CSelL); +def_push_2_opnd!(csel_le, Op::CSelLE); +def_push_2_opnd!(csel_g, Op::CSelG); +def_push_2_opnd!(csel_ge, Op::CSelGE); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 31a907b55e5f45..3ee46e59369f7b 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -142,6 +142,17 @@ impl Assembler asm.push_insn(op, vec![opnd0, opnd1], target, text); }, + Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | + Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE => { + let new_opnds = opnds.into_iter().map(|opnd| { + match opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + _ => asm.load(opnd) + } + }).collect(); + + asm.push_insn(op, new_opnds, target, text); + }, Op::Mov => { match (opnds[0], opnds[1]) { (Opnd::Mem(_), Opnd::Mem(_)) => { @@ -388,6 +399,39 @@ impl Assembler Op::Breakpoint => int3(cb), + Op::CSelZ => { + mov(cb, insn.out.into(), insn.opnds[1].into()); + cmovz(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CSelNZ => { + mov(cb, insn.out.into(), insn.opnds[1].into()); + cmovnz(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CSelE => { + mov(cb, insn.out.into(), insn.opnds[1].into()); + cmove(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CSelNE => { + mov(cb, insn.out.into(), insn.opnds[1].into()); + cmovne(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CSelL => { + mov(cb, insn.out.into(), insn.opnds[1].into()); + cmovl(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CSelLE => { + mov(cb, insn.out.into(), insn.opnds[1].into()); + cmovle(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CSelG => { + mov(cb, insn.out.into(), insn.opnds[1].into()); + cmovg(cb, insn.out.into(), insn.opnds[0].into()); + }, + Op::CSelGE => { + mov(cb, insn.out.into(), insn.opnds[1].into()); + cmovge(cb, insn.out.into(), insn.opnds[0].into()); + }, + // We want to keep the panic here because some instructions that // we feed to the backend could get lowered into other // instructions. So it's possible that some of our backend From e907aaa3fe87a4aacb808d10042425703c059825 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 18 Jul 2022 15:50:17 -0400 Subject: [PATCH 384/546] ADR fixes for LeaLabel and calls --- yjit/src/backend/arm64/mod.rs | 29 +++++++++++++++-------------- yjit/src/codegen.rs | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 1128eb225f78c6..88cc96ee822254 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -510,8 +510,8 @@ impl Assembler Op::LeaLabel => { let label_idx = insn.target.unwrap().unwrap_label_idx(); - cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { - adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - src_addr)); + cb.label_ref(label_idx, 4, |cb, end_addr, dst_addr| { + adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); }); mov(cb, insn.out.into(), Self::SCRATCH0); @@ -519,6 +519,12 @@ impl Assembler Op::CPush => { emit_push(cb, insn.opnds[0].into()); }, + Op::CPop => { + emit_pop(cb, insn.out.into()); + }, + Op::CPopInto => { + emit_pop(cb, insn.opnds[0].into()); + }, Op::CPushAll => { let regs = Assembler::get_caller_save_regs(); @@ -526,18 +532,14 @@ impl Assembler emit_push(cb, A64Opnd::Reg(reg)); } + // Push the flags/state register mrs(cb, Self::SCRATCH0, SystemRegister::NZCV); emit_push(cb, Self::SCRATCH0); }, - Op::CPop => { - emit_pop(cb, insn.out.into()); - }, - Op::CPopInto => { - emit_pop(cb, insn.opnds[0].into()); - }, Op::CPopAll => { let regs = Assembler::get_caller_save_regs(); + // Pop the state/flags register msr(cb, SystemRegister::NZCV, Self::SCRATCH0); emit_pop(cb, Self::SCRATCH0); @@ -546,13 +548,12 @@ impl Assembler } }, Op::CCall => { - let src_addr = cb.get_write_ptr().into_i64() + 4; - let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64; - // The offset between the two instructions in bytes. Note // that when we encode this into a bl instruction, we'll // divide by 4 because it accepts the number of instructions // to jump over. + let src_addr = cb.get_write_ptr().into_i64() + 4; + let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64; let offset = dst_addr - src_addr; // If the offset is short enough, then we'll use the branch @@ -562,9 +563,9 @@ impl Assembler if b_offset_fits_bits(offset) { bl(cb, A64Opnd::new_imm(offset / 4)); } else { - emit_load_value(cb, X30, src_addr as u64); - emit_load_value(cb, X29, dst_addr as u64); - br(cb, X29); + emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); + adr(cb, X30, A64Opnd::Imm(8)); + br(cb, Self::SCRATCH0); } }, Op::CRet => { diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 89f3171c3ba004..17b5b09698c740 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -611,6 +611,21 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O let mut asm = Assembler::new(); + + + // FIXME: need to handle this properly + // Maybe add an asm.entry_prologue() insn that compiles to nothing on x86 + // stp x29, x30, [sp, -16]! + // mov x29, sp + + + // NOTE: we also need a matching asm.exit_epilogue() + // mov sp, x29 + // ldp x29, x30, [sp], 16 + + + + // Save the CFP, EC, SP registers to the C stack asm.cpush(CFP); asm.cpush(EC); From 10d50031e66cc19ebe7e10ce80c1d8bd8b745399 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 19 Jul 2022 11:37:03 -0400 Subject: [PATCH 385/546] AArch64 frames (https://github.com/Shopify/ruby/pull/324) --- yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/inst/reg_pair.rs | 216 ++++++++++++++++++++++++++++ yjit/src/asm/arm64/mod.rs | 120 ++++++++++++++++ yjit/src/backend/arm64/mod.rs | 17 +++ yjit/src/backend/ir.rs | 8 ++ yjit/src/backend/x86_64/mod.rs | 3 + 6 files changed, 366 insertions(+) create mode 100644 yjit/src/asm/arm64/inst/reg_pair.rs diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index c69bb5e871a404..8c82eba43532e5 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -16,6 +16,7 @@ mod logical_reg; mod mov; mod nop; mod pc_rel; +mod reg_pair; mod sbfm; mod shift_imm; mod store; @@ -36,6 +37,7 @@ pub use logical_reg::LogicalReg; pub use mov::Mov; pub use nop::Nop; pub use pc_rel::PCRelative; +pub use reg_pair::RegisterPair; pub use sbfm::SBFM; pub use shift_imm::ShiftImm; pub use store::Store; diff --git a/yjit/src/asm/arm64/inst/reg_pair.rs b/yjit/src/asm/arm64/inst/reg_pair.rs new file mode 100644 index 00000000000000..d8fece2ed64863 --- /dev/null +++ b/yjit/src/asm/arm64/inst/reg_pair.rs @@ -0,0 +1,216 @@ +/// The operation to perform for this instruction. +enum Opc { + /// When the registers are 32-bits wide. + Opc32 = 0b00, + + /// When the registers are 64-bits wide. + Opc64 = 0b10 +} + +/// The kind of indexing to perform for this instruction. +enum Index { + StorePostIndex = 0b010, + LoadPostIndex = 0b011, + StoreSignedOffset = 0b100, + LoadSignedOffset = 0b101, + StorePreIndex = 0b110, + LoadPreIndex = 0b111 +} + +/// A convenience function so that we can convert the number of bits of a +/// register operand directly into an Opc variant. +impl From for Opc { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Opc::Opc64, + 32 => Opc::Opc32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 register pair instruction that can be +/// encoded. +/// +/// STP/LDP +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 0 0 | +/// | opc index..... imm7.................... rt2............. rn.............. rt1............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct RegisterPair { + /// The number of the first register to be transferred. + rt1: u8, + + /// The number of the base register. + rn: u8, + + /// The number of the second register to be transferred. + rt2: u8, + + /// The signed immediate byte offset, a multiple of 8. + imm7: i16, + + /// The kind of indexing to use for this instruction. + index: Index, + + /// The operation to be performed (in terms of size). + opc: Opc +} + +impl RegisterPair { + /// Create a register pair instruction with a given indexing mode. + fn new(rt1: u8, rt2: u8, rn: u8, disp: i16, index: Index, num_bits: u8) -> Self { + Self { rt1, rn, rt2, imm7: disp / 8, index, opc: num_bits.into() } + } + + /// LDP (signed offset) + /// LDP , , [{, #}] + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + pub fn ldp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::LoadSignedOffset, num_bits) + } + + /// LDP (pre-index) + /// LDP , , [, #]! + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + pub fn ldp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::LoadPreIndex, num_bits) + } + + /// LDP (post-index) + /// LDP , , [], # + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + pub fn ldp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::LoadPostIndex, num_bits) + } + + /// STP (signed offset) + /// STP , , [{, #}] + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + pub fn stp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::StoreSignedOffset, num_bits) + } + + /// STP (pre-index) + /// STP , , [, #]! + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + pub fn stp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::StorePreIndex, num_bits) + } + + /// STP (post-index) + /// STP , , [], # + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + pub fn stp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::StorePostIndex, num_bits) + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: RegisterPair) -> Self { + let mut imm7 = (inst.imm7 as u32) & ((1 << 7) - 1); + + if inst.imm7 < 0 { + imm7 |= 1 << 6; + } + + 0 + | ((inst.opc as u32) << 30) + | (1 << 29) + | (FAMILY << 25) + | ((inst.index as u32) << 22) + | (imm7 << 15) + | ((inst.rt2 as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rt1 as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: RegisterPair) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldp() { + let inst = RegisterPair::ldp(0, 1, 2, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9400440, result); + } + + #[test] + fn test_ldp_maximum_displacement() { + let inst = RegisterPair::ldp(0, 1, 2, 504, 64); + let result: u32 = inst.into(); + assert_eq!(0xa95f8440, result); + } + + #[test] + fn test_ldp_minimum_displacement() { + let inst = RegisterPair::ldp(0, 1, 2, -512, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9600440, result); + } + + #[test] + fn test_ldp_pre() { + let inst = RegisterPair::ldp_pre(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9d00440, result); + } + + #[test] + fn test_ldp_post() { + let inst = RegisterPair::ldp_post(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa8d00440, result); + } + + #[test] + fn test_stp() { + let inst = RegisterPair::stp(0, 1, 2, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9000440, result); + } + + #[test] + fn test_stp_maximum_displacement() { + let inst = RegisterPair::stp(0, 1, 2, 504, 64); + let result: u32 = inst.into(); + assert_eq!(0xa91f8440, result); + } + + #[test] + fn test_stp_minimum_displacement() { + let inst = RegisterPair::stp(0, 1, 2, -512, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9200440, result); + } + + #[test] + fn test_stp_pre() { + let inst = RegisterPair::stp_pre(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9900440, result); + } + + #[test] + fn test_stp_post() { + let inst = RegisterPair::stp_post(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa8900440, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 3feee65197d421..333fce495fb29d 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -306,6 +306,51 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// LDP (signed offset) - load a pair of registers from memory +pub fn ldp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + + RegisterPair::ldp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDP (pre-index) - load a pair of registers from memory, update the base pointer before loading it +pub fn ldp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + + RegisterPair::ldp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDP (post-index) - load a pair of registers from memory, update the base pointer after loading it +pub fn ldp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + + RegisterPair::ldp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldp instruction.") + }; + + cb.write_bytes(&bytes); +} + /// LDR - load a PC-relative memory address into a register pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { let bytes: [u8; 4] = match rt { @@ -509,6 +554,51 @@ pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// STP (signed offset) - store a pair of registers to memory +pub fn stp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + + RegisterPair::stp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to stp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STP (pre-index) - store a pair of registers to memory, update the base pointer before loading it +pub fn stp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + + RegisterPair::stp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to stp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STP (post-index) - store a pair of registers to memory, update the base pointer after loading it +pub fn stp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + + RegisterPair::stp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to stp instruction.") + }; + + cb.write_bytes(&bytes); +} + /// STUR - store a value in a register at a memory address pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { @@ -787,6 +877,21 @@ mod tests { check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); } + #[test] + fn test_ldp() { + check_bytes("8a2d4da9", |cb| ldp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_ldp_pre() { + check_bytes("8a2dcda9", |cb| ldp_pre(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_ldp_post() { + check_bytes("8a2dcda8", |cb| ldp_post(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + #[test] fn test_ldr() { check_bytes("40010058", |cb| ldr(cb, X0, 10)); @@ -882,6 +987,21 @@ mod tests { check_bytes("80025fd6", |cb| ret(cb, X20)); } + #[test] + fn test_stp() { + check_bytes("8a2d0da9", |cb| stp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_stp_pre() { + check_bytes("8a2d8da9", |cb| stp_pre(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_stp_post() { + check_bytes("8a2d8da8", |cb| stp_post(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + #[test] fn test_stur() { check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(64, X11, 128))); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 88cc96ee822254..69c35a368820f8 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -426,6 +426,14 @@ impl Assembler Op::Add => { add(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); }, + Op::FrameSetup => { + stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); + mov(cb, X29, C_SP_REG); + }, + Op::FrameTeardown => { + mov(cb, C_SP_REG, X29); + ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); + }, Op::Sub => { sub(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); }, @@ -727,6 +735,15 @@ mod tests { asm.compile_with_num_regs(&mut cb, 0); } + #[test] + fn test_emit_frame() { + let (mut asm, mut cb) = setup_asm(); + + asm.frame_setup(); + asm.frame_teardown(); + asm.compile_with_num_regs(&mut cb, 0); + } + #[test] fn test_emit_lea_label() { let (mut asm, mut cb) = setup_asm(); diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index a5fd012a2b890f..de59b420c1da62 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -137,6 +137,12 @@ pub enum Op // Trigger a debugger breakpoint Breakpoint, + + /// Set up the frame stack as necessary per the architecture. + FrameSetup, + + /// Tear down the frame stack as necessary per the architecture. + FrameTeardown } // Memory operand base @@ -895,3 +901,5 @@ def_push_2_opnd!(csel_l, Op::CSelL); def_push_2_opnd!(csel_le, Op::CSelLE); def_push_2_opnd!(csel_g, Op::CSelG); def_push_2_opnd!(csel_ge, Op::CSelGE); +def_push_0_opnd_no_out!(frame_setup, Op::FrameSetup); +def_push_0_opnd_no_out!(frame_teardown, Op::FrameTeardown); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 3ee46e59369f7b..4cd48ee3c9b595 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -236,6 +236,9 @@ impl Assembler add(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, + Op::FrameSetup => {}, + Op::FrameTeardown => {}, + Op::Sub => { sub(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, From fd97f8ebc7f22df7e3a7bf05c5d9328946cb539e Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 19 Jul 2022 11:45:46 -0400 Subject: [PATCH 386/546] Use frame_setup() and frame_teardown() --- yjit/src/codegen.rs | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 17b5b09698c740..6d4ac72433db3d 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -398,6 +398,8 @@ fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { asm.cpop_into(EC); asm.cpop_into(CFP); + asm.frame_teardown(); + asm.cret(Qundef.into()); asm.compile(ocb); @@ -447,6 +449,8 @@ fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, asm: &mut Assembler) { asm.cpop_into(EC); asm.cpop_into(CFP); + asm.frame_teardown(); + asm.cret(Qundef.into()); } @@ -531,6 +535,8 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { asm.cpop_into(EC); asm.cpop_into(CFP); + asm.frame_teardown(); + asm.cret(Qundef.into()); asm.compile(ocb); @@ -555,6 +561,8 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { asm.cpop_into(EC); asm.cpop_into(CFP); + asm.frame_teardown(); + asm.cret(C_RET_OPND); asm.compile(ocb); @@ -584,6 +592,8 @@ fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) { asm.cpop_into(EC); asm.cpop_into(CFP); + asm.frame_teardown(); + asm.cret(Qundef.into()); // PC should match the expected insn_idx @@ -611,20 +621,7 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O let mut asm = Assembler::new(); - - - // FIXME: need to handle this properly - // Maybe add an asm.entry_prologue() insn that compiles to nothing on x86 - // stp x29, x30, [sp, -16]! - // mov x29, sp - - - // NOTE: we also need a matching asm.exit_epilogue() - // mov sp, x29 - // ldp x29, x30, [sp], 16 - - - + asm.frame_setup(); // Save the CFP, EC, SP registers to the C stack asm.cpush(CFP); From db84d2921fc2ae1397c75cbf5d6752dd10f94219 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 19 Jul 2022 12:10:23 -0400 Subject: [PATCH 387/546] BLR instruction for AArch64 (https://github.com/Shopify/ruby/pull/325) --- yjit/src/asm/arm64/inst/branch.rs | 23 +++++++++++++++++++---- yjit/src/asm/arm64/mod.rs | 15 +++++++++++++++ yjit/src/backend/arm64/mod.rs | 7 +++---- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/yjit/src/asm/arm64/inst/branch.rs b/yjit/src/asm/arm64/inst/branch.rs index 7f93f5e201d88f..f15ef2a9b0fc23 100644 --- a/yjit/src/asm/arm64/inst/branch.rs +++ b/yjit/src/asm/arm64/inst/branch.rs @@ -1,10 +1,13 @@ /// Which operation to perform. enum Op { /// Perform a BR instruction. - Br = 0b00, + BR = 0b00, + + /// Perform a BLR instruction. + BLR = 0b01, /// Perform a RET instruction. - Ret = 0b10 + RET = 0b10 } /// The struct that represents an A64 branch instruction that can be encoded. @@ -27,13 +30,19 @@ impl Branch { /// BR /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BR--Branch-to-Register-?lang=en pub fn br(rn: u8) -> Self { - Self { rn, op: Op::Br } + Self { rn, op: Op::BR } + } + + /// BLR + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BLR--Branch-with-Link-to-Register-?lang=en + pub fn blr(rn: u8) -> Self { + Self { rn, op: Op::BLR } } /// RET /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en pub fn ret(rn: u8) -> Self { - Self { rn, op: Op::Ret } + Self { rn, op: Op::RET } } } @@ -71,6 +80,12 @@ mod tests { assert_eq!(0xd61f0000, result); } + #[test] + fn test_blr() { + let result: u32 = Branch::blr(0).into(); + assert_eq!(0xd63f0000, result); + } + #[test] fn test_ret() { let result: u32 = Branch::ret(30).into(); diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 333fce495fb29d..8be7e6f5680408 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -226,6 +226,16 @@ pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) { cb.write_bytes(&bytes); } +/// BLR - branch with link to a register +pub fn blr(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::Reg(rn) => Branch::blr(rn.reg_no).into(), + _ => panic!("Invalid operand to blr instruction."), + }; + + cb.write_bytes(&bytes); +} + /// BR - branch to a register pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { let bytes: [u8; 4] = match rn { @@ -842,6 +852,11 @@ mod tests { check_bytes("00040094", |cb| bl(cb, A64Opnd::new_imm(1024))); } + #[test] + fn test_blr() { + check_bytes("80023fd6", |cb| blr(cb, X20)); + } + #[test] fn test_br() { check_bytes("80021fd6", |cb| br(cb, X20)); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 69c35a368820f8..729ee06fa98f40 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -359,8 +359,8 @@ impl Assembler let value = dst_addr as u64; b(cb, A64Opnd::new_imm(emit_load_size(value).into())); - emit_load_value(cb, X29, value); - br(cb, X29); + emit_load_value(cb, Assembler::SCRATCH0, value); + br(cb, Assembler::SCRATCH0); } } }, @@ -572,8 +572,7 @@ impl Assembler bl(cb, A64Opnd::new_imm(offset / 4)); } else { emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); - adr(cb, X30, A64Opnd::Imm(8)); - br(cb, Self::SCRATCH0); + blr(cb, Self::SCRATCH0); } }, Op::CRet => { From f09fd032d86a4e73f9434919a4d0814129b40539 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 19 Jul 2022 12:14:26 -0400 Subject: [PATCH 388/546] Assert not the same register in AArch64 --- yjit/src/asm/arm64/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 8be7e6f5680408..1f9efd1629eda8 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -322,6 +322,7 @@ pub fn ldp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); RegisterPair::ldp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() }, @@ -337,6 +338,7 @@ pub fn ldp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); RegisterPair::ldp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() }, @@ -352,6 +354,7 @@ pub fn ldp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); RegisterPair::ldp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() }, @@ -570,6 +573,7 @@ pub fn stp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); RegisterPair::stp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() }, @@ -585,6 +589,7 @@ pub fn stp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); RegisterPair::stp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() }, @@ -600,6 +605,7 @@ pub fn stp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); RegisterPair::stp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() }, From b3cd7a9cd3ab5f66f70bd530761358d1710377aa Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Tue, 19 Jul 2022 18:23:18 +0100 Subject: [PATCH 389/546] Fix some of the codegen.rs tests (https://github.com/Shopify/ruby/pull/327) --- yjit/src/codegen.rs | 89 ++++++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 37 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 6d4ac72433db3d..654e6d7d632045 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -6371,12 +6371,11 @@ impl CodegenGlobals { } } -/* #[cfg(test)] mod tests { use super::*; - fn setup_codegen() -> (JITState, Context, CodeBlock, OutlinedCb) { + fn setup_codegen() -> (JITState, Context, Assembler, CodeBlock, OutlinedCb) { let blockid = BlockId { iseq: ptr::null(), idx: 0, @@ -6386,6 +6385,7 @@ mod tests { return ( JITState::new(&block), Context::new(), + Assembler::new(), CodeBlock::new_dummy(256 * 1024), OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)), ); @@ -6398,31 +6398,35 @@ mod tests { assert!(ocb.unwrap().get_write_pos() > 0); } + /* #[test] fn test_gen_exit() { - let (_, ctx, mut cb, _) = setup_codegen(); - gen_exit(0 as *mut VALUE, &ctx, &mut cb); + let (_, ctx, mut asm, mut cb, _) = setup_codegen(); + gen_exit(0 as *mut VALUE, &ctx, &mut asm); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_get_side_exit() { - let (mut jit, ctx, _, mut ocb) = setup_codegen(); - get_side_exit(&mut jit, &mut ocb, &ctx); + let (mut jit, ctx, _, _, mut ocb) = setup_codegen(); + get_side_exit(&mut jit, &mut ocb, &ctx); assert!(ocb.unwrap().get_write_pos() > 0); } + */ #[test] fn test_gen_check_ints() { - let (_, _ctx, mut cb, mut ocb) = setup_codegen(); + let (_, _ctx, mut asm, mut cb, mut ocb) = setup_codegen(); let side_exit = ocb.unwrap().get_write_ptr(); - gen_check_ints(&mut cb, side_exit); + gen_check_ints(&mut asm, side_exit); } #[test] fn test_gen_nop() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - let status = gen_nop(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_nop(&mut jit, &mut context, &mut asm, &mut ocb); + asm.compile(&mut cb); assert_eq!(status, KeepCompiling); assert_eq!(context.diff(&Context::new()), 0); @@ -6431,9 +6435,9 @@ mod tests { #[test] fn test_gen_pop() { - let (mut jit, _, mut cb, mut ocb) = setup_codegen(); + let (mut jit, _, mut asm, mut cb, mut ocb) = setup_codegen(); let mut context = Context::new_with_stack_size(1); - let status = gen_pop(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_pop(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); assert_eq!(context.diff(&Context::new()), 0); @@ -6441,9 +6445,9 @@ mod tests { #[test] fn test_gen_dup() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Fixnum); - let status = gen_dup(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_dup(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -6451,12 +6455,14 @@ mod tests { assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(0))); assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1))); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); // Write some movs } + /* #[test] fn test_gen_dupn() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Fixnum); context.stack_push(Type::Flonum); @@ -6464,7 +6470,7 @@ mod tests { let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_dupn(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_dupn(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -6473,16 +6479,19 @@ mod tests { assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1))); assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + // TODO: this is writing zero bytes on x86. Why? + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); // Write some movs } + */ #[test] fn test_gen_swap() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Fixnum); context.stack_push(Type::Flonum); - let status = gen_swap(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_swap(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); let (_, tmp_type_next) = context.get_opnd_mapping(StackOpnd(1)); @@ -6494,58 +6503,61 @@ mod tests { #[test] fn test_putnil() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - let status = gen_putnil(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_putnil(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); assert_eq!(status, KeepCompiling); assert_eq!(tmp_type_top, Type::Nil); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_putobject_qtrue() { // Test gen_putobject with Qtrue - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); let mut value_array: [u64; 2] = [0, Qtrue.into()]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_putobject(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); assert_eq!(status, KeepCompiling); assert_eq!(tmp_type_top, Type::True); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_putobject_fixnum() { // Test gen_putobject with a Fixnum to test another conditional branch - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); // The Fixnum 7 is encoded as 7 * 2 + 1, or 15 let mut value_array: [u64; 2] = [0, 15]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_putobject(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); assert_eq!(status, KeepCompiling); assert_eq!(tmp_type_top, Type::Fixnum); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_int2fix() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); jit.opcode = YARVINSN_putobject_INT2FIX_0_.as_usize(); - let status = gen_putobject_int2fix(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_putobject_int2fix(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); @@ -6556,16 +6568,17 @@ mod tests { #[test] fn test_putself() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - let status = gen_putself(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_putself(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_gen_setn() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Fixnum); context.stack_push(Type::Flonum); context.stack_push(Type::CString); @@ -6574,7 +6587,7 @@ mod tests { let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_setn(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_setn(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -6582,12 +6595,13 @@ mod tests { assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(1))); assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(0))); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_gen_topn() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Flonum); context.stack_push(Type::CString); @@ -6595,7 +6609,7 @@ mod tests { let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_topn(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_topn(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -6603,12 +6617,13 @@ mod tests { assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(1))); assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] fn test_gen_adjuststack() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Flonum); context.stack_push(Type::CString); context.stack_push(Type::Fixnum); @@ -6617,21 +6632,21 @@ mod tests { let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_adjuststack(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_adjuststack(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + asm.compile(&mut cb); assert!(cb.get_write_pos() == 0); // No instructions written } #[test] fn test_gen_leave() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); // Push return value context.stack_push(Type::Fixnum); - gen_leave(&mut jit, &mut context, &mut cb, &mut ocb); + gen_leave(&mut jit, &mut context, &mut asm, &mut ocb); } } -*/ From c9484fe0c1f2897521d08780a66fab89e5e2f5b1 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 19 Jul 2022 15:26:06 -0400 Subject: [PATCH 390/546] Fix push/pop and frame setup/teardown with Kevin & Alan --- yjit/src/backend/arm64/mod.rs | 12 ++++++++---- yjit/src/codegen.rs | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 729ee06fa98f40..c48c03fe045de5 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -380,7 +380,7 @@ impl Assembler /// Emit a push instruction for the given operand by adding to the stack /// pointer and then storing the given value. fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { - add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); stur(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, 0)); } @@ -388,7 +388,7 @@ impl Assembler /// and then subtracting from the stack pointer. fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { ldur(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, 0)); - sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); } // dbg!(&self.insns); @@ -428,10 +428,14 @@ impl Assembler }, Op::FrameSetup => { stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); - mov(cb, X29, C_SP_REG); + + // X29 (frame_pointer) = SP + add(cb, X29, C_SP_REG, A64Opnd::new_uimm(0)); }, Op::FrameTeardown => { - mov(cb, C_SP_REG, X29); + // SP = X29 (frame pointer) + add(cb, C_SP_REG, X29, A64Opnd::new_uimm(0)); + ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); }, Op::Sub => { diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 654e6d7d632045..fa6b4e41b0b31f 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -6232,7 +6232,7 @@ impl CodegenGlobals { let mut codegen_globals = CodegenGlobals { inline_cb: cb, outlined_cb: ocb, - leave_exit_code: leave_exit_code, + leave_exit_code, stub_exit_code: stub_exit_code, outline_full_cfunc_return_pos: cfunc_exit_code, global_inval_patches: Vec::new(), From 4ae2c744ac6b5b84f2bfebb9046c0c364863d7a4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 19 Jul 2022 17:44:39 -0400 Subject: [PATCH 391/546] A lot of fixes coming from our pairing session (https://github.com/Shopify/ruby/pull/329) * Move to/from SP on AArch64 * Consolidate loads and stores * Implement LDR post-index and LDR pre-index for AArch64 * Implement STR post-index and STR pre-index for AArch64 * Module entrypoints for LDR pre/post -index and STR pre/post -index * Use STR (pre-index) and LDR (post-index) to implement push/pop * Go back to using MOV for to/from SP --- yjit/src/asm/arm64/inst/load.rs | 124 --------------- yjit/src/asm/arm64/inst/load_store.rs | 215 ++++++++++++++++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 6 +- yjit/src/asm/arm64/inst/store.rs | 105 ------------- yjit/src/asm/arm64/mod.rs | 108 ++++++++++++- yjit/src/backend/arm64/mod.rs | 12 +- 6 files changed, 326 insertions(+), 244 deletions(-) delete mode 100644 yjit/src/asm/arm64/inst/load.rs create mode 100644 yjit/src/asm/arm64/inst/load_store.rs delete mode 100644 yjit/src/asm/arm64/inst/store.rs diff --git a/yjit/src/asm/arm64/inst/load.rs b/yjit/src/asm/arm64/inst/load.rs deleted file mode 100644 index b64a6a96ac80fe..00000000000000 --- a/yjit/src/asm/arm64/inst/load.rs +++ /dev/null @@ -1,124 +0,0 @@ -/// The size of the operands being operated on. -enum Size { - Size32 = 0b10, - Size64 = 0b11, -} - -/// The operation to perform for this instruction. -enum Opc { - LDUR = 0b01, - LDURSW = 0b10 -} - -/// A convenience function so that we can convert the number of bits of an -/// register operand directly into an Sf enum variant. -impl From for Size { - fn from(num_bits: u8) -> Self { - match num_bits { - 64 => Size::Size64, - 32 => Size::Size32, - _ => panic!("Invalid number of bits: {}", num_bits) - } - } -} - -/// The struct that represents an A64 data processing -- immediate instruction -/// that can be encoded. -/// -/// LDUR -/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ -/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | -/// | 1 1 1 0 0 0 0 0 0 | -/// | size. opc.. imm9.......................... rn.............. rt.............. | -/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ -/// -pub struct Load { - /// The number of the register to load the value into. - rt: u8, - - /// The base register with which to form the address. - rn: u8, - - /// The optional signed immediate byte offset from the base register. - imm9: i16, - - /// The operation to perform for this instruction. - opc: Opc, - - /// The size of the operands being operated on. - size: Size -} - -impl Load { - /// LDUR (load register, unscaled) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en - pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { - Self { rt, rn, imm9, opc: Opc::LDUR, size: num_bits.into() } - } - - /// LDURSW (load register, unscaled, signed) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en - pub fn ldursw(rt: u8, rn: u8, imm9: i16) -> Self { - Self { rt, rn, imm9, opc: Opc::LDURSW, size: Size::Size32 } - } -} - -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en -const FAMILY: u32 = 0b0100; - -impl From for u32 { - /// Convert an instruction into a 32-bit value. - fn from(inst: Load) -> Self { - let imm9 = (inst.imm9 as u32) & ((1 << 9) - 1); - - 0 - | ((inst.size as u32) << 30) - | (0b11 << 28) - | (FAMILY << 25) - | ((inst.opc as u32) << 22) - | (imm9 << 12) - | ((inst.rn as u32) << 5) - | (inst.rt as u32) - } -} - -impl From for [u8; 4] { - /// Convert an instruction into a 4 byte array. - fn from(inst: Load) -> [u8; 4] { - let result: u32 = inst.into(); - result.to_le_bytes() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_ldur() { - let inst = Load::ldur(0, 1, 0, 64); - let result: u32 = inst.into(); - assert_eq!(0xf8400020, result); - } - - #[test] - fn test_ldur_with_imm() { - let inst = Load::ldur(0, 1, 123, 64); - let result: u32 = inst.into(); - assert_eq!(0xf847b020, result); - } - - #[test] - fn test_ldursw() { - let inst = Load::ldursw(0, 1, 0); - let result: u32 = inst.into(); - assert_eq!(0xb8800020, result); - } - - #[test] - fn test_ldursw_with_imm() { - let inst = Load::ldursw(0, 1, 123); - let result: u32 = inst.into(); - assert_eq!(0xb887b020, result); - } -} diff --git a/yjit/src/asm/arm64/inst/load_store.rs b/yjit/src/asm/arm64/inst/load_store.rs new file mode 100644 index 00000000000000..80a67c837e025b --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_store.rs @@ -0,0 +1,215 @@ +/// The size of the operands being operated on. +enum Size { + Size32 = 0b10, + Size64 = 0b11, +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The operation to perform for this instruction. +enum Opc { + STR = 0b00, + LDR = 0b01, + LDURSW = 0b10 +} + +/// What kind of indexing to perform for this instruction. +enum Index { + None = 0b00, + PostIndex = 0b01, + PreIndex = 0b11 +} + +/// The struct that represents an A64 load or store instruction that can be +/// encoded. +/// +/// LDR/LDUR/LDURSW/STR/STUR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 0 | +/// | size. opc.. imm9.......................... idx.. rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadStore { + /// The number of the register to load the value into. + rt: u8, + + /// The base register with which to form the address. + rn: u8, + + /// What kind of indexing to perform for this instruction. + idx: Index, + + /// The optional signed immediate byte offset from the base register. + imm9: i16, + + /// The operation to perform for this instruction. + opc: Opc, + + /// The size of the operands being operated on. + size: Size +} + +impl LoadStore { + /// LDR (immediate, post-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + pub fn ldr_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::LDR, size: num_bits.into() } + } + + /// LDR (immediate, pre-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + pub fn ldr_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::LDR, size: num_bits.into() } + } + + /// LDUR (load register, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en + pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: num_bits.into() } + } + + /// LDURSW (load register, unscaled, signed) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en + pub fn ldursw(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDURSW, size: Size::Size32 } + } + + /// STR (immediate, post-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-- + pub fn str_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::STR, size: num_bits.into() } + } + + /// STR (immediate, pre-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-- + pub fn str_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::STR, size: num_bits.into() } + } + + /// STUR (store register, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en + pub fn stur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadStore) -> Self { + let imm9 = (inst.imm9 as u32) & ((1 << 9) - 1); + + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | ((inst.opc as u32) << 22) + | (imm9 << 12) + | ((inst.idx as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadStore) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr_post() { + let inst = LoadStore::ldr_post(0, 1, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8410420, result); + } + + #[test] + fn test_ldr_pre() { + let inst = LoadStore::ldr_pre(0, 1, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8410c20, result); + } + + #[test] + fn test_ldur() { + let inst = LoadStore::ldur(0, 1, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8400020, result); + } + + #[test] + fn test_ldur_with_imm() { + let inst = LoadStore::ldur(0, 1, 123, 64); + let result: u32 = inst.into(); + assert_eq!(0xf847b020, result); + } + + #[test] + fn test_ldursw() { + let inst = LoadStore::ldursw(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0xb8800020, result); + } + + #[test] + fn test_ldursw_with_imm() { + let inst = LoadStore::ldursw(0, 1, 123); + let result: u32 = inst.into(); + assert_eq!(0xb887b020, result); + } + + #[test] + fn test_str_post() { + let inst = LoadStore::str_post(0, 1, -16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81f0420, result); + } + + #[test] + fn test_str_pre() { + let inst = LoadStore::str_pre(0, 1, -16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81f0c20, result); + } + + #[test] + fn test_stur() { + let inst = LoadStore::stur(0, 1, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8000020, result); + } + + #[test] + fn test_stur_negative_offset() { + let inst = LoadStore::stur(0, 1, -1, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81ff020, result); + } + + #[test] + fn test_stur_positive_offset() { + let inst = LoadStore::stur(0, 1, 255, 64); + let result: u32 = inst.into(); + assert_eq!(0xf80ff020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 8c82eba43532e5..42df2d137a7741 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -9,8 +9,8 @@ mod call; mod conditional; mod data_imm; mod data_reg; -mod load; mod load_literal; +mod load_store; mod logical_imm; mod logical_reg; mod mov; @@ -19,7 +19,6 @@ mod pc_rel; mod reg_pair; mod sbfm; mod shift_imm; -mod store; mod sys_reg; pub use atomic::Atomic; @@ -30,8 +29,8 @@ pub use call::Call; pub use conditional::Conditional; pub use data_imm::DataImm; pub use data_reg::DataReg; -pub use load::Load; pub use load_literal::LoadLiteral; +pub use load_store::LoadStore; pub use logical_imm::LogicalImm; pub use logical_reg::LogicalReg; pub use mov::Mov; @@ -40,5 +39,4 @@ pub use pc_rel::PCRelative; pub use reg_pair::RegisterPair; pub use sbfm::SBFM; pub use shift_imm::ShiftImm; -pub use store::Store; pub use sys_reg::SysReg; diff --git a/yjit/src/asm/arm64/inst/store.rs b/yjit/src/asm/arm64/inst/store.rs deleted file mode 100644 index 42b9055ae80622..00000000000000 --- a/yjit/src/asm/arm64/inst/store.rs +++ /dev/null @@ -1,105 +0,0 @@ -/// The size of the operands being operated on. -enum Size { - Size32 = 0b10, - Size64 = 0b11, -} - -/// A convenience function so that we can convert the number of bits of an -/// register operand directly into an Sf enum variant. -impl From for Size { - fn from(num_bits: u8) -> Self { - match num_bits { - 64 => Size::Size64, - 32 => Size::Size32, - _ => panic!("Invalid number of bits: {}", num_bits) - } - } -} - -/// The struct that represents an A64 store instruction that can be encoded. -/// -/// STUR -/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ -/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | -/// | 1 1 1 0 0 0 0 0 0 0 0 | -/// | size. imm9.......................... rn.............. rt.............. | -/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ -/// -pub struct Store { - /// The number of the register to be transferred. - rt: u8, - - /// The register holding the memory location. - rn: u8, - - /// The optional signed immediate byte offset from the base register. - imm9: i16, - - /// The size of the operands being operated on. - size: Size -} - -impl Store { - /// STUR (store register, unscaled) - /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en - pub fn stur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { - Self { - rt, - rn, - imm9, - size: num_bits.into() - } - } -} - -/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en -const FAMILY: u32 = 0b0100; - -impl From for u32 { - /// Convert an instruction into a 32-bit value. - fn from(inst: Store) -> Self { - let imm9 = (inst.imm9 as u32) & ((1 << 9) - 1); - - 0 - | ((inst.size as u32) << 30) - | (0b11 << 28) - | (FAMILY << 25) - | (imm9 << 12) - | ((inst.rn as u32) << 5) - | (inst.rt as u32) - } -} - -impl From for [u8; 4] { - /// Convert an instruction into a 4 byte array. - fn from(inst: Store) -> [u8; 4] { - let result: u32 = inst.into(); - result.to_le_bytes() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_stur() { - let inst = Store::stur(0, 1, 0, 64); - let result: u32 = inst.into(); - assert_eq!(0xf8000020, result); - } - - #[test] - fn test_stur_negative_offset() { - let inst = Store::stur(0, 1, -1, 64); - let result: u32 = inst.into(); - assert_eq!(0xf81ff020, result); - } - - #[test] - fn test_stur_positive_offset() { - let inst = Store::stur(0, 1, 255, 64); - let result: u32 = inst.into(); - assert_eq!(0xf80ff020, result); - } -} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 1f9efd1629eda8..8b59d6c354aff1 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -376,19 +376,49 @@ pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { cb.write_bytes(&bytes); } +/// LDR (post-index) - load a register from memory, update the base pointer after loading it +pub fn ldr_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + LoadStore::ldr_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDR (pre-index) - load a register from memory, update the base pointer before loading it +pub fn ldr_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + LoadStore::ldr_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + /// LDUR - load a memory address into a register pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); - Load::ldur(rt.reg_no, rn.reg_no, 0, rt.num_bits).into() + LoadStore::ldur(rt.reg_no, rn.reg_no, 0, rt.num_bits).into() }, (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); - Load::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + LoadStore::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, _ => panic!("Invalid operands for LDUR") }; @@ -403,7 +433,7 @@ pub fn ldursw(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); - Load::ldursw(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + LoadStore::ldursw(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() }, _ => panic!("Invalid operand combination to ldursw instruction.") }; @@ -444,6 +474,16 @@ pub fn lsr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { /// MOV - move a value in a register to another register pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rm) { + (A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 }), A64Opnd::Reg(rm)) => { + assert!(rm.num_bits == 64, "Expected rm to be 64 bits"); + + DataImm::add(31, rm.reg_no, 0, 64).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 })) => { + assert!(rd.num_bits == 64, "Expected rd to be 64 bits"); + + DataImm::add(rd.reg_no, 31, 0, 64).into() + }, (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); @@ -615,6 +655,36 @@ pub fn stp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// STR (post-index) - store a register to memory, update the base pointer after loading it +pub fn str_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + LoadStore::str_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to str instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// STR (pre-index) - store a register to memory, update the base pointer before loading it +pub fn str_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + LoadStore::str_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to str instruction."), + }; + + cb.write_bytes(&bytes); +} + /// STUR - store a value in a register at a memory address pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { @@ -622,7 +692,7 @@ pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); - Store::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + LoadStore::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, _ => panic!("Invalid operand combination to stur instruction.") }; @@ -918,6 +988,16 @@ mod tests { check_bytes("40010058", |cb| ldr(cb, X0, 10)); } + #[test] + fn test_ldr_post() { + check_bytes("6a0541f8", |cb| ldr_post(cb, X10, A64Opnd::new_mem(64, X11, 16))); + } + + #[test] + fn test_ldr_pre() { + check_bytes("6a0d41f8", |cb| ldr_pre(cb, X10, A64Opnd::new_mem(64, X11, 16))); + } + #[test] fn test_ldur_memory() { check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123))); @@ -953,6 +1033,16 @@ mod tests { check_bytes("eaf300b2", |cb| mov(cb, X10, A64Opnd::new_uimm(0x5555555555555555))); } + #[test] + fn test_mov_into_sp() { + check_bytes("1f000091", |cb| mov(cb, X31, X0)); + } + + #[test] + fn test_mov_from_sp() { + check_bytes("e0030091", |cb| mov(cb, X0, X31)); + } + #[test] fn test_movk() { check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); @@ -1023,6 +1113,16 @@ mod tests { check_bytes("8a2d8da8", |cb| stp_post(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); } + #[test] + fn test_str_post() { + check_bytes("6a051ff8", |cb| str_post(cb, X10, A64Opnd::new_mem(64, X11, -16))); + } + + #[test] + fn test_str_pre() { + check_bytes("6a0d1ff8", |cb| str_pre(cb, X10, A64Opnd::new_mem(64, X11, -16))); + } + #[test] fn test_stur() { check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(64, X11, 128))); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index c48c03fe045de5..df4fcceec6c6b3 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -34,7 +34,7 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); // These constants define the way we work with Arm64's stack pointer. The stack // pointer always needs to be aligned to a 16-byte boundary. pub const C_SP_REG: A64Opnd = X31; -pub const C_SP_STEP: A64Opnd = A64Opnd::UImm(16); +pub const C_SP_STEP: i32 = 16; /// Map Opnd to A64Opnd impl From for A64Opnd { @@ -380,15 +380,13 @@ impl Assembler /// Emit a push instruction for the given operand by adding to the stack /// pointer and then storing the given value. fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { - sub(cb, C_SP_REG, C_SP_REG, C_SP_STEP); - stur(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, 0)); + str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP)); } /// Emit a pop instruction into the given operand by loading the value /// and then subtracting from the stack pointer. fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { - ldur(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, 0)); - add(cb, C_SP_REG, C_SP_REG, C_SP_STEP); + ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); } // dbg!(&self.insns); @@ -430,11 +428,11 @@ impl Assembler stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); // X29 (frame_pointer) = SP - add(cb, X29, C_SP_REG, A64Opnd::new_uimm(0)); + mov(cb, X29, C_SP_REG); }, Op::FrameTeardown => { // SP = X29 (frame pointer) - add(cb, C_SP_REG, X29, A64Opnd::new_uimm(0)); + mov(cb, C_SP_REG, X29); ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); }, From 90137f519459764a78ae8eb777e3f396f7cffd98 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 20 Jul 2022 10:43:14 -0400 Subject: [PATCH 392/546] Implement PosMarker instruction (https://github.com/Shopify/ruby/pull/328) * Implement PosMarker instruction * Implement PosMarker in the arm backend * Make bindgen run only for clang image * Fix if-else in cirrus CI file * Add missing semicolon * Try removing trailing semicolon * Try to fix shell/YAML syntax Co-authored-by: Alan Wu --- .cirrus.yml | 7 +++- yjit/src/backend/arm64/mod.rs | 21 +++++++---- yjit/src/backend/ir.rs | 64 +++++++++++++++++++++------------- yjit/src/backend/x86_64/mod.rs | 15 +++++--- yjit/src/codegen.rs | 19 +++++----- yjit/src/core.rs | 20 +++++++++-- 6 files changed, 99 insertions(+), 47 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 3e03d0adc3d3c6..293873af5b919d 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -116,7 +116,12 @@ yjit_task: --prefix="$RUBY_PREFIX" --enable-yjit=dev make_miniruby_script: source $HOME/.cargo/env && make -j miniruby - make_bindgen_script: source $HOME/.cargo/env && make -j yjit-bindgen + make_bindgen_script: | + if [[ "$CC" = "clang-12" ]]; then + source $HOME/.cargo/env && make -j yjit-bindgen + else + echo "only running bindgen on clang image" + fi boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 # output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index df4fcceec6c6b3..be329f61cfbaa1 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -81,7 +81,7 @@ impl Assembler /// have no memory operands. fn arm64_split(mut self) -> Assembler { - self.forward_pass(|asm, index, op, opnds, target, text| { + self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { // Load all Value operands into registers that aren't already a part // of Load instructions. let opnds = match op { @@ -103,15 +103,15 @@ impl Assembler (Opnd::Mem(_), Opnd::Mem(_)) => { let opnd0 = asm.load(opnds[0]); let opnd1 = asm.load(opnds[1]); - asm.push_insn(op, vec![opnd0, opnd1], target, text); + asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); }, (mem_opnd @ Opnd::Mem(_), other_opnd) | (other_opnd, mem_opnd @ Opnd::Mem(_)) => { let opnd0 = asm.load(mem_opnd); - asm.push_insn(op, vec![opnd0, other_opnd], target, text); + asm.push_insn(op, vec![opnd0, other_opnd], target, text, pos_marker); }, _ => { - asm.push_insn(op, opnds, target, text); + asm.push_insn(op, opnds, target, text, pos_marker); } } }, @@ -146,7 +146,7 @@ impl Assembler } }).collect(); - asm.push_insn(op, new_opnds, target, text); + asm.push_insn(op, new_opnds, target, text, pos_marker); }, Op::IncrCounter => { // Every operand to the IncrCounter instruction need to be a @@ -250,7 +250,7 @@ impl Assembler asm.test(opnd0, opnds[1]); }, _ => { - asm.push_insn(op, opnds, target, text); + asm.push_insn(op, opnds, target, text, pos_marker); } }; }) @@ -402,9 +402,18 @@ impl Assembler cb.add_comment(&insn.text.as_ref().unwrap()); } }, + Op::Label => { cb.write_label(insn.target.unwrap().unwrap_label_idx()); }, + + // Report back the current position in the generated code + Op::PosMarker => { + let pos = cb.get_write_ptr(); + let pos_marker_fn = insn.pos_marker.as_ref().unwrap(); + pos_marker_fn(pos); + } + Op::BakeString => { let str = insn.text.as_ref().unwrap(); for byte in str.as_bytes() { diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index de59b420c1da62..13a5c5c3d3d7a5 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -35,6 +35,9 @@ pub enum Op // Add a label into the IR at the point that this instruction is added. Label, + // Mark a position in the generated code + PosMarker, + // Bake a string directly into the instruction stream. BakeString, @@ -342,8 +345,9 @@ impl From for Target { } } +type PosMarkerFn = Box; + /// YJIT IR instruction -#[derive(Clone)] pub struct Insn { // Opcode for the instruction @@ -361,9 +365,9 @@ pub struct Insn // List of branch targets (branch instructions only) pub(super) target: Option, - // Position in the generated machine code - // Useful for comments and for patching jumps - pub(super) pos: Option, + // Callback to mark the position of this instruction + // in the generated code + pub(super) pos_marker: Option, } impl fmt::Debug for Insn { @@ -387,9 +391,6 @@ impl fmt::Debug for Insn { if let Some(target) = self.target { write!(fmt, " target={target:?}")?; } - if let Some(pos) = self.pos { - write!(fmt, " pos={pos:?}")?; - } write!(fmt, " -> {:?}", self.out) } @@ -420,7 +421,14 @@ impl Assembler } /// Append an instruction to the list - pub(super) fn push_insn(&mut self, op: Op, opnds: Vec, target: Option, text: Option) -> Opnd + pub(super) fn push_insn( + &mut self, + op: Op, + opnds: Vec, + target: Option, + text: Option, + pos_marker: Option + ) -> Opnd { // Index of this instruction let insn_idx = self.insns.len(); @@ -471,7 +479,7 @@ impl Assembler opnds, out: out_opnd, target, - pos: None + pos_marker, }; self.insns.push(insn); @@ -490,7 +498,7 @@ impl Assembler opnds: vec![], out: Opnd::None, target: None, - pos: None + pos_marker: None, }; self.insns.push(insn); self.live_ranges.push(self.insns.len()); @@ -505,7 +513,7 @@ impl Assembler opnds: vec![], out: Opnd::None, target: None, - pos: None + pos_marker: None, }; self.insns.push(insn); self.live_ranges.push(self.insns.len()); @@ -514,7 +522,7 @@ impl Assembler /// Load an address relative to the given label. #[must_use] pub fn lea_label(&mut self, target: Target) -> Opnd { - self.push_insn(Op::LeaLabel, vec![], Some(target), None) + self.push_insn(Op::LeaLabel, vec![], Some(target), None, None) } /// Create a new label instance that we can jump to @@ -538,7 +546,7 @@ impl Assembler opnds: vec![], out: Opnd::None, target: Some(label), - pos: None + pos_marker: None, }; self.insns.push(insn); self.live_ranges.push(self.insns.len()); @@ -546,7 +554,7 @@ impl Assembler /// Transform input instructions, consumes the input assembler pub(super) fn forward_pass(mut self, mut map_insn: F) -> Assembler - where F: FnMut(&mut Assembler, usize, Op, Vec, Option, Option) + where F: FnMut(&mut Assembler, usize, Op, Vec, Option, Option, Option) { let mut asm = Assembler { insns: Vec::default(), @@ -582,7 +590,7 @@ impl Assembler asm.comment(insn.text.unwrap().as_str()); }, _ => { - map_insn(&mut asm, index, insn.op, opnds, insn.target, insn.text); + map_insn(&mut asm, index, insn.op, opnds, insn.target, insn.text, insn.pos_marker); } }; @@ -644,7 +652,7 @@ impl Assembler let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - let asm = self.forward_pass(|asm, index, op, opnds, target, text| { + let asm = self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. @@ -725,7 +733,7 @@ impl Assembler } ).collect(); - asm.push_insn(op, reg_opnds, target, text); + asm.push_insn(op, reg_opnds, target, text, pos_marker); // Set the output register for this instruction let num_insns = asm.insns.len(); @@ -776,7 +784,13 @@ impl Assembler pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { let target = Target::FunPtr(fptr); - self.push_insn(Op::CCall, opnds, Some(target), None) + self.push_insn(Op::CCall, opnds, Some(target), None, None) + } + + //pub fn pos_marker(&mut self, marker_fn: F) + pub fn pos_marker(&mut self, marker_fn: PosMarkerFn) + { + self.push_insn(Op::PosMarker, vec![], None, None, Some(marker_fn)); } } @@ -786,7 +800,7 @@ macro_rules! def_push_jcc { { pub fn $op_name(&mut self, target: Target) { - self.push_insn($opcode, vec![], Some(target), None); + self.push_insn($opcode, vec![], Some(target), None, None); } } }; @@ -799,7 +813,7 @@ macro_rules! def_push_0_opnd { #[must_use] pub fn $op_name(&mut self) -> Opnd { - self.push_insn($opcode, vec![], None, None) + self.push_insn($opcode, vec![], None, None, None) } } }; @@ -811,7 +825,7 @@ macro_rules! def_push_0_opnd_no_out { { pub fn $op_name(&mut self) { - self.push_insn($opcode, vec![], None, None); + self.push_insn($opcode, vec![], None, None, None); } } }; @@ -824,7 +838,7 @@ macro_rules! def_push_1_opnd { #[must_use] pub fn $op_name(&mut self, opnd0: Opnd) -> Opnd { - self.push_insn($opcode, vec![opnd0], None, None) + self.push_insn($opcode, vec![opnd0], None, None, None) } } }; @@ -836,7 +850,7 @@ macro_rules! def_push_1_opnd_no_out { { pub fn $op_name(&mut self, opnd0: Opnd) { - self.push_insn($opcode, vec![opnd0], None, None); + self.push_insn($opcode, vec![opnd0], None, None, None); } } }; @@ -849,7 +863,7 @@ macro_rules! def_push_2_opnd { #[must_use] pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd { - self.push_insn($opcode, vec![opnd0, opnd1], None, None) + self.push_insn($opcode, vec![opnd0, opnd1], None, None, None) } } }; @@ -861,7 +875,7 @@ macro_rules! def_push_2_opnd_no_out { { pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) { - self.push_insn($opcode, vec![opnd0, opnd1], None, None); + self.push_insn($opcode, vec![opnd0, opnd1], None, None, None); } } }; diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 4cd48ee3c9b595..2fb7e393464466 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -94,7 +94,7 @@ impl Assembler { let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - self.forward_pass(|asm, index, op, opnds, target, text| { + self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { // Load heap object operands into registers because most // instructions can't directly work with 64-bit constants let opnds = match op { @@ -140,7 +140,7 @@ impl Assembler _ => (opnds[0], opnds[1]) }; - asm.push_insn(op, vec![opnd0, opnd1], target, text); + asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); }, Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE => { @@ -151,7 +151,7 @@ impl Assembler } }).collect(); - asm.push_insn(op, new_opnds, target, text); + asm.push_insn(op, new_opnds, target, text, pos_marker); }, Op::Mov => { match (opnds[0], opnds[1]) { @@ -194,7 +194,7 @@ impl Assembler asm.not(opnd0); }, _ => { - asm.push_insn(op, opnds, target, text); + asm.push_insn(op, opnds, target, text, pos_marker); } }; }) @@ -222,6 +222,13 @@ impl Assembler cb.write_label(insn.target.unwrap().unwrap_label_idx()); }, + // Report back the current position in the generated code + Op::PosMarker => { + let pos = cb.get_write_ptr(); + let pos_marker_fn = insn.pos_marker.as_ref().unwrap(); + pos_marker_fn(pos); + } + Op::BakeString => { for byte in insn.text.as_ref().unwrap().as_bytes() { cb.write_byte(*byte); diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index fa6b4e41b0b31f..d8f663fa05527e 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -297,8 +297,10 @@ fn jit_prepare_routine_call( /// Record the current codeblock write position for rewriting into a jump into /// the outlined block later. Used to implement global code invalidation. -fn record_global_inval_patch(cb: &mut CodeBlock, outline_block_target_pos: CodePtr) { - CodegenGlobals::push_global_inval_patch(cb.get_write_ptr(), outline_block_target_pos); +fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) { + asm.pos_marker(Box::new(move |code_ptr| { + CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos); + })); } /// Verify the ctx's types and mappings against the compile-time stack, self, @@ -681,7 +683,7 @@ fn gen_check_ints(asm: &mut Assembler, side_exit: CodePtr) { fn jump_to_next_insn( jit: &mut JITState, current_context: &Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) { // Reset the depth since in current usages we only ever jump to to @@ -704,10 +706,13 @@ fn jump_to_next_insn( record_global_inval_patch(cb, exit_pos); jit.record_boundary_patch_point = false; */ + + + } // Generate the jump instruction - gen_direct_jump(jit, &reset_depth, jump_block, cb); + gen_direct_jump(jit, &reset_depth, jump_block, asm); } // Compile a sequence of bytecode instructions for a given basic block version. @@ -763,7 +768,7 @@ pub fn gen_single_block( // opt_getinlinecache wants to be in a block all on its own. Cut the block short // if we run into it. See gen_opt_getinlinecache() for details. if opcode == YARVINSN_opt_getinlinecache.as_usize() && insn_idx > starting_insn_idx { - jump_to_next_insn(&mut jit, &ctx, cb, ocb); + jump_to_next_insn(&mut jit, &ctx, &mut asm, ocb); break; } @@ -775,11 +780,9 @@ pub fn gen_single_block( // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { - // FIXME: is this sound with the new assembler? - // Generate an exit to this instruction and record it let exit_pos = gen_outlined_exit(jit.pc, &ctx, ocb); - record_global_inval_patch(cb, exit_pos); + record_global_inval_patch(&mut asm, exit_pos); jit.record_boundary_patch_point = false; } diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 6c7044c8433a5f..63c373b70a6977 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1861,7 +1861,7 @@ fn gen_jump_branch( } } -pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut CodeBlock) { +pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mut Assembler) { let branchref = make_branch_entry(&jit.get_block(), ctx, gen_jump_branch); let mut branch = branchref.borrow_mut(); @@ -1897,6 +1897,17 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut + asm.pos_marker(Box::new(move |code_ptr| { + let mut branch = branchref.borrow_mut(); + branch.start_addr = Some(code_ptr); + })); + + + + + + + } else { @@ -1904,8 +1915,11 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut // target block right after this one (fallthrough). branch.dst_addrs[0] = None; branch.shape = BranchShape::Next0; - branch.start_addr = Some(cb.get_write_ptr()); - branch.end_addr = Some(cb.get_write_ptr()); + + todo!(); + + //branch.start_addr = Some(cb.get_write_ptr()); + //branch.end_addr = Some(cb.get_write_ptr()); } } From cbf7a7cd23e03c47274ed2755ef5ba1ca099bd50 Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Wed, 20 Jul 2022 15:54:39 +0100 Subject: [PATCH 393/546] Fix dupn (https://github.com/Shopify/ruby/pull/330) * get_dupn was allocating and throwing away an Assembler object instead of using the one passed in * Uncomment remaining tests in codegen.rs, which seem to work now --- yjit/src/backend/ir.rs | 1 - yjit/src/codegen.rs | 6 ------ 2 files changed, 7 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 13a5c5c3d3d7a5..8d58da88f29256 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -7,7 +7,6 @@ use std::convert::From; use crate::cruby::{VALUE}; use crate::virtualmem::{CodePtr}; use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; -use crate::asm::x86_64::{X86Opnd, X86Imm, X86UImm, X86Reg, X86Mem, RegType}; use crate::core::{Context, Type, TempMapping}; use crate::codegen::{JITState}; diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index d8f663fa05527e..d2f483c79d4a2d 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -943,8 +943,6 @@ fn gen_dupn( _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let mut asm = Assembler::new(); - let nval: VALUE = jit_get_arg(jit, 0); let VALUE(n) = nval; @@ -6401,7 +6399,6 @@ mod tests { assert!(ocb.unwrap().get_write_pos() > 0); } - /* #[test] fn test_gen_exit() { let (_, ctx, mut asm, mut cb, _) = setup_codegen(); @@ -6416,7 +6413,6 @@ mod tests { get_side_exit(&mut jit, &mut ocb, &ctx); assert!(ocb.unwrap().get_write_pos() > 0); } - */ #[test] fn test_gen_check_ints() { @@ -6462,7 +6458,6 @@ mod tests { assert!(cb.get_write_pos() > 0); // Write some movs } - /* #[test] fn test_gen_dupn() { let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); @@ -6486,7 +6481,6 @@ mod tests { asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); // Write some movs } - */ #[test] fn test_gen_swap() { From f5f58d82834cf84fe15dd1b28754923404a9fc75 Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Wed, 20 Jul 2022 15:56:13 +0100 Subject: [PATCH 394/546] Update disasm to work on ARM (https://github.com/Shopify/ruby/pull/331) --- yjit/src/backend/arm64/mod.rs | 24 ++++++++++++++++++++++++ yjit/src/disasm.rs | 9 +++++++++ 2 files changed, 33 insertions(+) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index be329f61cfbaa1..c5ddbea7c1ef3c 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -767,4 +767,28 @@ mod tests { asm.compile_with_num_regs(&mut cb, 1); } + + #[test] + #[cfg(feature = "disasm")] + fn test_simple_disasm() -> std::result::Result<(), capstone::Error> { + // Test drive Capstone with simple input + use capstone::prelude::*; + + let cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .build()?; + + let insns = cs.disasm_all(&[0x60, 0x0f, 0x80, 0xF2], 0x1000)?; + + match insns.as_ref() { + [insn] => { + assert_eq!(Some("movk"), insn.mnemonic()); + Ok(()) + } + _ => Err(capstone::Error::CustomError( + "expected to disassemble to movk", + )), + } + } } diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 83c80d6c668d6f..62b270bce9cc94 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -69,6 +69,8 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> St // Initialize capstone use capstone::prelude::*; + + #[cfg(target_arch = "x86_64")] let cs = Capstone::new() .x86() .mode(arch::x86::ArchMode::Mode64) @@ -76,6 +78,13 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> St .build() .unwrap(); + #[cfg(target_arch = "aarch64")] + let cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .build() + .unwrap(); + out.push_str(&format!("NUM BLOCK VERSIONS: {}\n", block_list.len())); out.push_str(&format!( "TOTAL INLINE CODE SIZE: {} bytes\n", From f833d75bee13ecb485db1591898cb871b24a2991 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 20 Jul 2022 14:50:00 -0400 Subject: [PATCH 395/546] Refactor YJIT branches to use PosMarker (https://github.com/Shopify/ruby/pull/333) * Refactor defer_compilation to use PosMarker * Port gen_direct_jump() to use PosMarker * Port gen_branch, branchunless * Port over gen_jump() * Port over branchif and branchnil * Fix use od record_boundary_patch_point in jump_to_next_insn --- yjit/src/backend/arm64/mod.rs | 3 + yjit/src/backend/ir.rs | 2 + yjit/src/backend/x86_64/mod.rs | 8 +++ yjit/src/codegen.rs | 104 ++++++++++----------------------- yjit/src/core.rs | 96 +++++++++++++++--------------- 5 files changed, 94 insertions(+), 119 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index c5ddbea7c1ef3c..35026a520bd28b 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -637,6 +637,9 @@ impl Assembler Op::Je => { emit_conditional_jump::<{Condition::EQ}>(cb, insn.target.unwrap()); }, + Op::Jne => { + emit_conditional_jump::<{Condition::NE}>(cb, insn.target.unwrap()); + }, Op::Jbe => { emit_conditional_jump::<{Condition::LS}>(cb, insn.target.unwrap()); }, diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 8d58da88f29256..c55a8f609b25e9 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -102,6 +102,7 @@ pub enum Op // Low-level conditional jump instructions Jbe, Je, + Jne, Jz, Jnz, Jo, @@ -883,6 +884,7 @@ macro_rules! def_push_2_opnd_no_out { def_push_1_opnd_no_out!(jmp_opnd, Op::JmpOpnd); def_push_jcc!(jmp, Op::Jmp); def_push_jcc!(je, Op::Je); +def_push_jcc!(jne, Op::Jne); def_push_jcc!(jbe, Op::Jbe); def_push_jcc!(jz, Op::Jz); def_push_jcc!(jnz, Op::Jnz); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 2fb7e393464466..2efe920ddfb8c5 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -367,6 +367,14 @@ impl Assembler } } + Op::Jne => { + match insn.target.unwrap() { + Target::CodePtr(code_ptr) => jne_ptr(cb, code_ptr), + Target::Label(label_idx) => jne_label(cb, label_idx), + _ => unreachable!() + } + } + Op::Jbe => { match insn.target.unwrap() { Target::CodePtr(code_ptr) => jbe_ptr(cb, code_ptr), diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index d2f483c79d4a2d..0b906970f7dd24 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -37,7 +37,6 @@ enum CodegenStatus { KeepCompiling, CantCompile, EndBlock, - DeferCompilation, } /// Code generation function signature @@ -698,17 +697,10 @@ fn jump_to_next_insn( // We are at the end of the current instruction. Record the boundary. if jit.record_boundary_patch_point { - todo!(); - - /* - let next_insn = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) }; - let exit_pos = gen_exit(next_insn, &reset_depth, ocb.unwrap()); - record_global_inval_patch(cb, exit_pos); + let exit_pc = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) }; + let exit_pos = gen_outlined_exit(exit_pc, &reset_depth, ocb); + record_global_inval_patch(asm, exit_pos); jit.record_boundary_patch_point = false; - */ - - - } // Generate the jump instruction @@ -752,9 +744,6 @@ pub fn gen_single_block( // Create a backend assembler instance let mut asm = Assembler::new(); - // Codegen status for the last instruction compiled - let mut status = CantCompile; - // For each instruction to compile // NOTE: could rewrite this loop with a std::iter::Iterator while insn_idx < iseq_size { @@ -792,7 +781,7 @@ pub fn gen_single_block( } // Lookup the codegen function for this instruction - status = CantCompile; + let mut status = CantCompile; if let Some(gen_fn) = get_gen_fn(VALUE(opcode)) { // :count-placement: // Count bytecode instructions that execute in generated code. @@ -835,11 +824,6 @@ pub fn gen_single_block( break; } - // If we are deferring compilation for this instruction - if status == DeferCompilation { - break; - } - // For now, reset the chain depth after each instruction as only the // first instruction in the block can concern itself with the depth. ctx.reset_chain_depth(); @@ -870,24 +854,9 @@ pub fn gen_single_block( block.set_end_idx(insn_idx); } - // If we are deferring compilation for the current instruction - if status == DeferCompilation { - defer_compilation(&jit.block, insn_idx, &ctx, cb, ocb); - - // Mark the end position of the block - let mut block = jit.block.borrow_mut(); - block.set_end_addr(cb.get_write_ptr()); - } - - - // We currently can't handle cases where the request is for a block that // doesn't go to the next instruction. - //assert!(!jit.record_boundary_patch_point); - - - - + assert!(!jit.record_boundary_patch_point); // If code for the block doesn't fit, fail if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() { @@ -1140,7 +1109,8 @@ fn gen_opt_plus( ocb: &mut OutlinedCb, ) -> CodegenStatus { if !jit_at_current_insn(jit) { - return DeferCompilation; + defer_compilation(jit, ctx, asm, ocb); + return EndBlock; } let comptime_a = jit_peek_at_stack(jit, ctx, 1); @@ -3208,9 +3178,10 @@ fn gen_opt_case_dispatch( KeepCompiling // continue with the next instruction } +*/ fn gen_branchif_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, target1: Option, shape: BranchShape, @@ -3218,14 +3189,14 @@ fn gen_branchif_branch( assert!(target1 != None); match shape { BranchShape::Next0 => { - jz_ptr(cb, target1.unwrap()); + asm.jz(target1.unwrap().into()); } BranchShape::Next1 => { - jnz_ptr(cb, target0); + asm.jnz(target0.into()); } BranchShape::Default => { - jnz_ptr(cb, target0); - jmp_ptr(cb, target1.unwrap()); + asm.jnz(target0.into()); + asm.jmp(target1.unwrap().into()); } } } @@ -3233,7 +3204,7 @@ fn gen_branchif_branch( fn gen_branchif( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0).as_i32(); @@ -3241,14 +3212,14 @@ fn gen_branchif( // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); } // Test if any bit (outside of the Qnil bit) is on // RUBY_Qfalse /* ...0000 0000 */ // RUBY_Qnil /* ...0000 1000 */ let val_opnd = ctx.stack_pop(1); - test(cb, val_opnd, imm_opnd(!Qnil.as_i64())); + asm.test(val_opnd, Opnd::Imm(!Qnil.as_i64())); // Get the branch target instruction offsets let next_idx = jit_next_insn_idx(jit); @@ -3266,7 +3237,7 @@ fn gen_branchif( gen_branch( jit, ctx, - cb, + asm, ocb, jump_block, ctx, @@ -3277,7 +3248,6 @@ fn gen_branchif( EndBlock } -*/ fn gen_branchunless_branch( asm: &mut Assembler, @@ -3328,18 +3298,11 @@ fn gen_branchunless( idx: jump_idx.try_into().unwrap(), }; - - - - // TODO: port gen_branch logic - todo!("complete branchunless implementation"); - - /* // Generate the branch instructions gen_branch( jit, ctx, - cb, + asm, ocb, jump_block, ctx, @@ -3349,24 +3312,20 @@ fn gen_branchunless( ); EndBlock - */ - - } -/* fn gen_branchnil_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, target1: Option, shape: BranchShape, ) { match shape { - BranchShape::Next0 => jne_ptr(cb, target1.unwrap()), - BranchShape::Next1 => je_ptr(cb, target0), + BranchShape::Next0 => asm.jne(target1.unwrap().into()), + BranchShape::Next1 => asm.je(target0.into()), BranchShape::Default => { - je_ptr(cb, target0); - jmp_ptr(cb, target1.unwrap()); + asm.je(target0.into()); + asm.jmp(target1.unwrap().into()); } } } @@ -3374,7 +3333,7 @@ fn gen_branchnil_branch( fn gen_branchnil( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0).as_i32(); @@ -3382,13 +3341,13 @@ fn gen_branchnil( // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); } // Test if the value is Qnil // RUBY_Qnil /* ...0000 1000 */ let val_opnd = ctx.stack_pop(1); - cmp(cb, val_opnd, uimm_opnd(Qnil.into())); + asm.cmp(val_opnd, Opnd::UImm(Qnil.into())); // Get the branch target instruction offsets let next_idx = jit_next_insn_idx(jit) as i32; @@ -3406,7 +3365,7 @@ fn gen_branchnil( gen_branch( jit, ctx, - cb, + asm, ocb, jump_block, ctx, @@ -3421,7 +3380,7 @@ fn gen_branchnil( fn gen_jump( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0).as_i32(); @@ -3429,7 +3388,7 @@ fn gen_jump( // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); } // Get the branch target instruction offsets @@ -3440,11 +3399,12 @@ fn gen_jump( }; // Generate the jump instruction - gen_direct_jump(jit, ctx, jump_block, cb); + gen_direct_jump(jit, ctx, jump_block, asm); EndBlock } +/* /// Guard that self or a stack operand has the same class as `known_klass`, using /// `sample_instance` to speculate about the shape of the runtime value. /// FIXNUM and on-heap integers are treated as if they have distinct classes, and @@ -6067,11 +6027,11 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_invokebuiltin_delegate => Some(gen_opt_invokebuiltin_delegate), YARVINSN_opt_invokebuiltin_delegate_leave => Some(gen_opt_invokebuiltin_delegate), YARVINSN_opt_case_dispatch => Some(gen_opt_case_dispatch), + */ YARVINSN_branchif => Some(gen_branchif), YARVINSN_branchunless => Some(gen_branchunless), YARVINSN_branchnil => Some(gen_branchnil), YARVINSN_jump => Some(gen_jump), - */ //YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), //YARVINSN_getblockparam => Some(gen_getblockparam), diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 63c373b70a6977..1afa5c537ac567 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1805,10 +1805,39 @@ fn get_branch_target( } } +impl Assembler +{ + // Mark the start position of a patchable branch in the machine code + fn mark_branch_start(&mut self, branchref: &BranchRef) + { + // We need to create our own branch rc object + // so that we can move the closure below + let branchref = branchref.clone(); + + self.pos_marker(Box::new(move |code_ptr| { + let mut branch = branchref.borrow_mut(); + branch.start_addr = Some(code_ptr); + })); + } + + // Mark the end position of a patchable branch in the machine code + fn mark_branch_end(&mut self, branchref: &BranchRef) + { + // We need to create our own branch rc object + // so that we can move the closure below + let branchref = branchref.clone(); + + self.pos_marker(Box::new(move |code_ptr| { + let mut branch = branchref.borrow_mut(); + branch.end_addr = Some(code_ptr); + })); + } +} + pub fn gen_branch( jit: &JITState, src_ctx: &Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, target0: BlockId, ctx0: &Context, @@ -1842,8 +1871,9 @@ pub fn gen_branch( }; // Call the branch generation function - branch.start_addr = Some(cb.get_write_ptr()); - regenerate_branch(cb, &mut branch); + asm.mark_branch_start(&branchref); + gen_fn(asm, branch.dst_addrs[0].unwrap(), branch.dst_addrs[1], BranchShape::Default); + asm.mark_branch_end(&branchref); } fn gen_jump_branch( @@ -1880,55 +1910,27 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu branch.blocks[0] = Some(blockref.clone()); branch.shape = BranchShape::Default; - - - todo!("convert gen_direct_jump to using new asm"); - - - // TODO: could we use regenerate_branch logic here? - - /* // Call the branch generation function - branch.start_addr = Some(cb.get_write_ptr()); - gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); - branch.end_addr = Some(cb.get_write_ptr()); - */ - - - - - asm.pos_marker(Box::new(move |code_ptr| { - let mut branch = branchref.borrow_mut(); - branch.start_addr = Some(code_ptr); - })); - - - - - - - - - + asm.mark_branch_start(&branchref); + gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); + asm.mark_branch_end(&branchref); } else { // This None target address signals gen_block_series() to compile the // target block right after this one (fallthrough). branch.dst_addrs[0] = None; branch.shape = BranchShape::Next0; - todo!(); - - //branch.start_addr = Some(cb.get_write_ptr()); - //branch.end_addr = Some(cb.get_write_ptr()); + // The branch is effectively empty (a noop) + asm.mark_branch_start(&branchref); + asm.mark_branch_end(&branchref); } } /// Create a stub to force the code up to this point to be executed pub fn defer_compilation( - block: &BlockRef, - insn_idx: u32, + jit: &JITState, cur_ctx: &Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) { if cur_ctx.chain_depth != 0 { @@ -1942,23 +1944,23 @@ pub fn defer_compilation( } next_ctx.chain_depth += 1; - let branch_rc = make_branch_entry(block, cur_ctx, gen_jump_branch); + let block_rc = jit.get_block(); + let branch_rc = make_branch_entry(&jit.get_block(), cur_ctx, gen_jump_branch); let mut branch = branch_rc.borrow_mut(); + let block = block_rc.borrow(); let blockid = BlockId { - iseq: block.borrow().blockid.iseq, - idx: insn_idx, + iseq: block.blockid.iseq, + idx: jit.get_insn_idx(), }; branch.target_ctxs[0] = next_ctx; branch.targets[0] = Some(blockid); branch.dst_addrs[0] = get_branch_target(blockid, &next_ctx, &branch_rc, 0, ocb); // Call the branch generation function - branch.start_addr = Some(cb.get_write_ptr()); - let mut asm = Assembler::new(); - gen_jump_branch(&mut asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); - asm.compile(cb); - branch.end_addr = Some(cb.get_write_ptr()); + asm.mark_branch_start(&branch_rc); + gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); + asm.mark_branch_end(&branch_rc); } // Remove all references to a block then free it. From 8259813bc30497986974633202f3052353295f95 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 20 Jul 2022 15:58:31 -0400 Subject: [PATCH 396/546] Temporarily simplify code for emit_conditional_jump to fix a bug --- yjit/src/backend/arm64/mod.rs | 24 ++++++++++++++++++++---- yjit/src/virtualmem.rs | 5 +++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 35026a520bd28b..72cbd938b0a7d1 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -324,10 +324,24 @@ impl Assembler fn emit_conditional_jump(cb: &mut CodeBlock, target: Target) { match target { Target::CodePtr(dst_ptr) => { - let src_addr = cb.get_write_ptr().into_i64() + 4; - let dst_addr = dst_ptr.into_i64(); - let offset = dst_addr - src_addr; + let dst_addr = dst_ptr.into_u64(); + //let src_addr = cb.get_write_ptr().into_i64() + 4; + //let offset = dst_addr - src_addr; + + // If the condition is met, then we'll skip past the + // next instruction, put the address in a register, and + // jump to it. + bcond(cb, CONDITION, A64Opnd::new_imm(8)); + + // If we get to this instruction, then the condition + // wasn't met, in which case we'll jump past the + // next instruction that perform the direct jump. + + b(cb, A64Opnd::new_imm(2i64 + emit_load_size(dst_addr) as i64)); + emit_load_value(cb, Assembler::SCRATCH0, dst_addr); + br(cb, Assembler::SCRATCH0); + /* // If the jump offset fits into the conditional jump as an // immediate value and it's properly aligned, then we can // use the b.cond instruction directly. Otherwise, we need @@ -339,7 +353,7 @@ impl Assembler // If the condition is met, then we'll skip past the // next instruction, put the address in a register, and // jump to it. - bcond(cb, CONDITION, A64Opnd::new_imm(4)); + bcond(cb, CONDITION, A64Opnd::new_imm(8)); // If the offset fits into a direct jump, then we'll use // that and the number of instructions will be shorter. @@ -351,6 +365,7 @@ impl Assembler b(cb, A64Opnd::new_imm(1)); // Here we'll perform the direct jump to the target. + let offset = dst_addr - cb.get_write_ptr().into_i64() + 4; b(cb, A64Opnd::new_imm(offset / 4)); } else { // If we get to this instruction, then the condition @@ -363,6 +378,7 @@ impl Assembler br(cb, Assembler::SCRATCH0); } } + */ }, Target::Label(label_idx) => { // Here we're going to save enough space for ourselves and diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs index 6a8e27447e1549..8d34e521b9c5d1 100644 --- a/yjit/src/virtualmem.rs +++ b/yjit/src/virtualmem.rs @@ -192,6 +192,11 @@ impl CodePtr { ptr as i64 } + pub fn into_u64(self) -> u64 { + let CodePtr(ptr) = self; + ptr as u64 + } + pub fn into_usize(self) -> usize { let CodePtr(ptr) = self; ptr as usize From 8d2560f1f533c078ed23d71084129ce13ba33abf Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 20 Jul 2022 16:35:14 -0400 Subject: [PATCH 397/546] Port over setlocal and getglobal --- yjit/src/codegen.rs | 46 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 0b906970f7dd24..d1965d3be3dd77 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1625,11 +1625,10 @@ fn gen_setlocal_wc0( KeepCompiling } -/* fn gen_setlocal_generic( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, local_idx: i32, level: u32, @@ -1637,34 +1636,33 @@ fn gen_setlocal_generic( let value_type = ctx.get_opnd_type(StackOpnd(0)); // Load environment pointer EP at level - gen_get_ep(cb, REG0, level); + let ep_opnd = gen_get_ep(asm, level); // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers // only affect heap objects being written. If we know an immediate value is being written we // can skip this check. if !value_type.is_imm() { // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = mem_opnd( + let flags_opnd = Opnd::mem( 64, - REG0, + ep_opnd, SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); - test(cb, flags_opnd, uimm_opnd(VM_ENV_FLAG_WB_REQUIRED.into())); + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // Create a side-exit to fall back to the interpreter let side_exit = get_side_exit(jit, ocb, ctx); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - jnz_ptr(cb, side_exit); + asm.jnz(side_exit.into()); } // Pop the value to write from the stack let stack_top = ctx.stack_pop(1); - mov(cb, REG1, stack_top); // Write the value at the environment pointer let offs = -(SIZEOF_VALUE as i32 * local_idx); - mov(cb, mem_opnd(64, REG0, offs), REG1); + asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top); KeepCompiling } @@ -1672,24 +1670,23 @@ fn gen_setlocal_generic( fn gen_setlocal( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let idx = jit_get_arg(jit, 0).as_i32(); let level = jit_get_arg(jit, 1).as_u32(); - gen_setlocal_generic(jit, ctx, cb, ocb, idx, level) + gen_setlocal_generic(jit, ctx, asm, ocb, idx, level) } fn gen_setlocal_wc1( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let idx = jit_get_arg(jit, 0).as_i32(); - gen_setlocal_generic(jit, ctx, cb, ocb, idx, 1) + gen_setlocal_generic(jit, ctx, asm, ocb, idx, 1) } -*/ // new hash initialized from top N values fn gen_newhash( @@ -5290,28 +5287,29 @@ fn gen_leave( EndBlock } -/* fn gen_getglobal( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let gid = jit_get_arg(jit, 0); // Save the PC and SP because we might make a Ruby call for warning - jit_prepare_routine_call(jit, ctx, cb, REG0); - - mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64())); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_gvar_get as *const u8); + let val_opnd = asm.ccall( + rb_gvar_get as *const u8, + vec![ gid.into() ] + ); let top = ctx.stack_push(Type::Unknown); - mov(cb, top, RAX); + asm.mov(top, val_opnd); KeepCompiling } +/* fn gen_setglobal( jit: &mut JITState, ctx: &mut Context, @@ -5974,9 +5972,9 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_getlocal => Some(gen_getlocal), YARVINSN_getlocal_WC_0 => Some(gen_getlocal_wc0), YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), - //YARVINSN_setlocal => Some(gen_setlocal), + YARVINSN_setlocal => Some(gen_setlocal), YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), - //YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), + YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), YARVINSN_opt_plus => Some(gen_opt_plus), /* YARVINSN_opt_minus => Some(gen_opt_minus), @@ -6040,8 +6038,8 @@ fn get_gen_fn(opcode: VALUE) -> Option { //YARVINSN_invokesuper => Some(gen_invokesuper), YARVINSN_leave => Some(gen_leave), - /* YARVINSN_getglobal => Some(gen_getglobal), + /* YARVINSN_setglobal => Some(gen_setglobal), YARVINSN_anytostring => Some(gen_anytostring), YARVINSN_objtostring => Some(gen_objtostring), From 85872eecddf0e5a13ba2218bb2068e99d5ce1313 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 20 Jul 2022 16:43:19 -0400 Subject: [PATCH 398/546] Port over newrange --- yjit/src/codegen.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index d1965d3be3dd77..611c42c562f540 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1253,32 +1253,38 @@ fn gen_splatarray( KeepCompiling } +*/ // new range initialized from top 2 values fn gen_newrange( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let flag = jit_get_arg(jit, 0); // rb_range_new() allocates and can raise - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // val = rb_range_new(low, high, (int)flag); - mov(cb, C_ARG_REGS[0], ctx.stack_opnd(1)); - mov(cb, C_ARG_REGS[1], ctx.stack_opnd(0)); - mov(cb, C_ARG_REGS[2], uimm_opnd(flag.into())); - call_ptr(cb, REG0, rb_range_new as *const u8); + let range_opnd = asm.ccall( + rb_range_new as *const u8, + vec![ + ctx.stack_opnd(1), + ctx.stack_opnd(0), + flag.into() + ] + ); ctx.stack_pop(2); let stack_ret = ctx.stack_push(Type::UnknownHeap); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, range_opnd); KeepCompiling } +/* fn guard_object_is_heap( cb: &mut CodeBlock, object_opnd: X86Opnd, @@ -5995,8 +6001,8 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), YARVINSN_splatarray => Some(gen_splatarray), - YARVINSN_newrange => Some(gen_newrange), */ + YARVINSN_newrange => Some(gen_newrange), YARVINSN_putstring => Some(gen_putstring), /* YARVINSN_expandarray => Some(gen_expandarray), From 8605efdd942d2a6b139abe66503427683e5dcad3 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 21 Jul 2022 12:31:31 -0400 Subject: [PATCH 399/546] Fix corrupted X29 causing segfault, thanks Alan! --- yjit/src/backend/arm64/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 72cbd938b0a7d1..25e155dafa870c 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -633,8 +633,8 @@ impl Assembler if b_offset_fits_bits(offset) { b(cb, A64Opnd::new_imm(offset / 4)); } else { - emit_load_value(cb, X29, dst_addr as u64); - br(cb, X29); + emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); + br(cb, Self::SCRATCH0); } }, Target::Label(label_idx) => { From b1dbc5f1a683e4727f463c0a5a3e0195e5c2cc7f Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 21 Jul 2022 12:43:54 -0400 Subject: [PATCH 400/546] Fix crash in newhash ccall --- yjit/src/backend/arm64/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 25e155dafa870c..1f93441c030e14 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -121,7 +121,9 @@ impl Assembler // For each of the operands we're going to first load them // into a register and then move them into the correct // argument register. - for (idx, opnd) in opnds.into_iter().enumerate() { + // Note: the iteration order is reversed to avoid corrupting x0, + // which is both the return value and first argument register + for (idx, opnd) in opnds.into_iter().enumerate().rev() { let value = asm.load(opnd); asm.mov(Opnd::Reg(C_ARG_REGREGS[idx]), value); } From 76b05ba9e8f72ce98057d3817f6f353c9e62a892 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 21 Jul 2022 14:48:44 -0400 Subject: [PATCH 401/546] Better splitting for Op::Test on AArch64 (https://github.com/Shopify/ruby/pull/335) --- yjit/src/asm/arm64/arg/bitmask_imm.rs | 10 ++- yjit/src/backend/arm64/mod.rs | 94 ++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs index 7e5a21c7b4dce5..847b735eaa1923 100644 --- a/yjit/src/asm/arm64/arg/bitmask_imm.rs +++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs @@ -41,13 +41,19 @@ impl TryFrom for BitmaskImmediate { /// Attempt to convert a u64 into a BitmaskImm. fn try_from(value: u64) -> Result { + // 0 is not encodable as a bitmask immediate. Immediately return here so + // that we don't have any issues with underflow. + if value == 0 { + return Err(()); + } + /// Is this number's binary representation all 1s? fn is_mask(imm: u64) -> bool { if imm == u64::MAX { true } else { ((imm + 1) & imm) == 0 } } - /// Is this number's binary representation one or more 1s followed by one or - /// more 0s? + /// Is this number's binary representation one or more 1s followed by + /// one or more 0s? fn is_shifted_mask(imm: u64) -> bool { is_mask((imm - 1) | imm) } diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 1f93441c030e14..b1f4d63d0f695e 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -249,7 +249,33 @@ impl Assembler _ => asm.load(opnds[0]) }; - asm.test(opnd0, opnds[1]); + // The second value must be either a register or an + // unsigned immediate that can be encoded as a bitmask + // immediate. If it's not one of those, we'll need to load + // it first. + let opnd1 = match opnds[1] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], + Opnd::Mem(_) => asm.load(opnds[1]), + Opnd::Imm(imm) => { + if imm <= 0 { + asm.load(opnds[1]) + } else if BitmaskImmediate::try_from(imm as u64).is_ok() { + Opnd::UImm(imm as u64) + } else { + asm.load(opnds[1]) + } + }, + Opnd::UImm(uimm) => { + if BitmaskImmediate::try_from(uimm).is_ok() { + opnds[1] + } else { + asm.load(opnds[1]) + } + }, + Opnd::None | Opnd::Value(_) => unreachable!() + }; + + asm.test(opnd0, opnd1); }, _ => { asm.push_insn(op, opnds, target, text, pos_marker); @@ -789,6 +815,72 @@ mod tests { asm.compile_with_num_regs(&mut cb, 1); } + #[test] + fn test_emit_test() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_encodable_unsigned_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::UImm(7)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_unencodable_unsigned_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_encodable_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(7)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_unencodable_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(5)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_negative_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(-7)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + #[test] #[cfg(feature = "disasm")] fn test_simple_disasm() -> std::result::Result<(), capstone::Error> { From e9a2effd74a3c1a40bd82e75ac24e8570ba57364 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 21 Jul 2022 14:53:33 -0400 Subject: [PATCH 402/546] Enable more btests in the CI workflows (https://github.com/Shopify/ruby/pull/334) * Enable more btests in the CI workflows * Update workflows --- .cirrus.yml | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 293873af5b919d..a6d29cd3eb65fe 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -125,5 +125,25 @@ yjit_task: boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 # output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 - bootstrap_tests_script: RUST_BACKTRACE=1 ruby --disable=gems bootstraptest/runner.rb --ruby="./miniruby -I./lib -I. -I.ext/common --disable-gems --yjit-call-threshold=1 --yjit-verify-ctx" bootstraptest/test_yjit_new_backend.rb + bootstrap_tests_script: | + RUST_BACKTRACE=1 ruby --disable=gems bootstraptest/runner.rb --ruby="./miniruby -I./lib -I. -I.ext/common --disable-gems --yjit-call-threshold=1 --yjit-verify-ctx" \ + bootstraptest/test_attr.rb \ + bootstraptest/test_autoload.rb \ + bootstraptest/test_class.rb \ + bootstraptest/test_constant_cache.rb \ + bootstraptest/test_env.rb \ + bootstraptest/test_eval.rb \ + bootstraptest/test_exception.rb \ + bootstraptest/test_fiber.rb \ + bootstraptest/test_finalizer.rb \ + bootstraptest/test_flip.rb \ + bootstraptest/test_flow.rb \ + bootstraptest/test_fork.rb \ + bootstraptest/test_gc.rb \ + bootstraptest/test_jump.rb \ + bootstraptest/test_literal_suffix.rb \ + bootstraptest/test_load.rb \ + bootstraptest/test_massign.rb \ + bootstraptest/test_string.rb \ + bootstraptest/test_yjit_new_backend.rb # full_build_script: make -j From 70e117d512636465d8dc2094b22dd6535602050a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 21 Jul 2022 16:25:21 -0400 Subject: [PATCH 403/546] Fixes (https://github.com/Shopify/ruby/pull/336) * Fix bitmask encoding to u32 * Fix splitting for Op::And to account for bitmask immediate --- yjit/src/asm/arm64/arg/bitmask_imm.rs | 9 ++- yjit/src/backend/arm64/mod.rs | 98 +++++++++++++++------------ 2 files changed, 61 insertions(+), 46 deletions(-) diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs index 847b735eaa1923..220a7d697ee6a1 100644 --- a/yjit/src/asm/arm64/arg/bitmask_imm.rs +++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs @@ -136,7 +136,7 @@ impl From for u32 { fn from(bitmask: BitmaskImmediate) -> Self { 0 | (((bitmask.n as u32) & 1) << 12) - | (bitmask.immr << 6) as u32 + | ((bitmask.immr as u32) << 6) | bitmask.imms as u32 } } @@ -152,6 +152,13 @@ mod tests { }); } + #[test] + fn test_negative() { + let bitmask: BitmaskImmediate = (-9_i64 as u64).try_into().unwrap(); + let encoded: u32 = bitmask.into(); + assert_eq!(7998, encoded); + } + #[test] fn test_size_2_minimum() { let bitmask = BitmaskImmediate::try_from(0x5555555555555555); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index b1f4d63d0f695e..d3db3877dc347b 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -81,6 +81,30 @@ impl Assembler /// have no memory operands. fn arm64_split(mut self) -> Assembler { + fn load_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Mem(_) => asm.load(opnd), + Opnd::Imm(imm) => { + if imm <= 0 { + asm.load(opnd) + } else if BitmaskImmediate::try_from(imm as u64).is_ok() { + Opnd::UImm(imm as u64) + } else { + asm.load(opnd) + } + }, + Opnd::UImm(uimm) => { + if BitmaskImmediate::try_from(uimm).is_ok() { + opnd + } else { + asm.load(opnd) + } + }, + Opnd::None | Opnd::Value(_) => unreachable!() + } + } + self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { // Load all Value operands into registers that aren't already a part // of Load instructions. @@ -96,7 +120,7 @@ impl Assembler }; match op { - Op::Add | Op::And | Op::Sub => { + Op::Add | Op::Sub => { // Check if one of the operands is a register. If it is, // then we'll make that the first operand. match (opnds[0], opnds[1]) { @@ -115,6 +139,23 @@ impl Assembler } } }, + Op::And => { + match (opnds[0], opnds[1]) { + (Opnd::Reg(_), Opnd::Reg(_)) => { + asm.and(opnds[0], opnds[1]); + }, + (reg_opnd @ Opnd::Reg(_), other_opnd) | + (other_opnd, reg_opnd @ Opnd::Reg(_)) => { + let opnd1 = load_bitmask_immediate(asm, other_opnd); + asm.and(reg_opnd, opnd1); + }, + _ => { + let opnd0 = asm.load(opnds[0]); + let opnd1 = load_bitmask_immediate(asm, opnds[1]); + asm.and(opnd0, opnd1); + } + } + }, Op::CCall => { assert!(opnds.len() < C_ARG_REGS.len()); @@ -188,29 +229,16 @@ impl Assembler }; }, Op::Mov => { - // The value that is being moved must be either a register - // or an immediate that can be encoded as a bitmask - // immediate. Otherwise, we'll need to split the move into - // multiple instructions. - let value = match opnds[1] { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], - Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnds[1]), - Opnd::UImm(uimm) => { - if let Ok(encoded) = BitmaskImmediate::try_from(uimm) { - if let Opnd::Mem(_) = opnds[0] { - // If the first operand is a memory operand, - // we're going to transform this into a - // store instruction, so we'll need to load - // this anyway. - asm.load(opnds[1]) - } else { - opnds[1] - } - } else { - asm.load(opnds[1]) - } - }, - _ => unreachable!() + let value = match (opnds[0], opnds[1]) { + // If the first operand is a memory operand, we're going + // to transform this into a store instruction, so we'll + // need to load this anyway. + (Opnd::Mem(_), Opnd::UImm(_)) => asm.load(opnds[1]), + // The value that is being moved must be either a + // register or an immediate that can be encoded as a + // bitmask immediate. Otherwise, we'll need to split the + // move into multiple instructions. + _ => load_bitmask_immediate(asm, opnds[1]) }; // If we're attempting to load into a memory operand, then @@ -253,27 +281,7 @@ impl Assembler // unsigned immediate that can be encoded as a bitmask // immediate. If it's not one of those, we'll need to load // it first. - let opnd1 = match opnds[1] { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], - Opnd::Mem(_) => asm.load(opnds[1]), - Opnd::Imm(imm) => { - if imm <= 0 { - asm.load(opnds[1]) - } else if BitmaskImmediate::try_from(imm as u64).is_ok() { - Opnd::UImm(imm as u64) - } else { - asm.load(opnds[1]) - } - }, - Opnd::UImm(uimm) => { - if BitmaskImmediate::try_from(uimm).is_ok() { - opnds[1] - } else { - asm.load(opnds[1]) - } - }, - Opnd::None | Opnd::Value(_) => unreachable!() - }; + let opnd1 = load_bitmask_immediate(asm, opnds[1]); asm.test(opnd0, opnd1); }, From 477c2df3fad22271958b92bdfafbae7155fbebb4 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 21 Jul 2022 17:06:01 -0400 Subject: [PATCH 404/546] Work on opt_lt, fix x86 backend bug in cmp() --- yjit/src/backend/x86_64/mod.rs | 6 +++--- yjit/src/codegen.rs | 32 +++++++++++++++----------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 2efe920ddfb8c5..3140c86b2e9629 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -113,7 +113,7 @@ impl Assembler }; match op { - Op::Add | Op::Sub | Op::And => { + Op::Add | Op::Sub | Op::And | Op::Cmp => { let (opnd0, opnd1) = match (opnds[0], opnds[1]) { (Opnd::Mem(_), Opnd::Mem(_)) => { (asm.load(opnds[0]), asm.load(opnds[1])) @@ -133,7 +133,7 @@ impl Assembler (opnds[0], opnds[1]) } }, - // We have to load memory and register operands to avoid corrupting them + // We have to load memory operands to avoid corrupting them (Opnd::Mem(_) | Opnd::Reg(_), _) => { (asm.load(opnds[0]), opnds[1]) }, @@ -343,7 +343,7 @@ impl Assembler } // Compare - Op::Cmp => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), + Op::Cmp => cmp(cb, insn.opnds[0].into(), insn.opnds[1].into()), // Test and set flags Op::Test => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 611c42c562f540..a1fd4df35d826e 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2363,20 +2363,19 @@ fn guard_two_fixnums(ctx: &mut Context, asm: &mut Assembler, side_exit: CodePtr) ctx.upgrade_opnd_type(StackOpnd(1), Type::Fixnum); } -/* // Conditional move operation used by comparison operators -type CmovFn = fn(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) -> (); +type CmovFn = fn(cb: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> Opnd; fn gen_fixnum_cmp( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, cmov_op: CmovFn, ) -> CodegenStatus { // Defer compilation so we can specialize base on a runtime receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2393,38 +2392,37 @@ fn gen_fixnum_cmp( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Compare the arguments - xor(cb, REG0_32, REG0_32); // REG0 = Qfalse - mov(cb, REG1, arg0); - cmp(cb, REG1, arg1); - mov(cb, REG1, uimm_opnd(Qtrue.into())); - cmov_op(cb, REG0, REG1); + asm.cmp(arg0, arg1); + let bool_opnd = cmov_op(asm, Qtrue.into(), Qfalse.into()); // Push the output on the stack let dst = ctx.stack_push(Type::Unknown); - mov(cb, dst, REG0); + asm.mov(dst, bool_opnd); KeepCompiling } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + todo!("compare send path not yet implemented"); + //gen_opt_send_without_block(jit, ctx, cb, ocb) } } fn gen_opt_lt( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovl) + gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_l) } +/* fn gen_opt_le( jit: &mut JITState, ctx: &mut Context, @@ -5990,10 +5988,10 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_newhash => Some(gen_newhash), YARVINSN_duphash => Some(gen_duphash), YARVINSN_newarray => Some(gen_newarray), + //YARVINSN_duparray => Some(gen_duparray), + //YARVINSN_checktype => Some(gen_checktype), + //YARVINSN_opt_lt => Some(gen_opt_lt), /* - YARVINSN_duparray => Some(gen_duparray), - YARVINSN_checktype => Some(gen_checktype), - YARVINSN_opt_lt => Some(gen_opt_lt), YARVINSN_opt_le => Some(gen_opt_le), YARVINSN_opt_gt => Some(gen_opt_gt), YARVINSN_opt_ge => Some(gen_opt_ge), From e9f9b8f43b2afdc437ab1b8ab883bab0525a3f43 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 22 Jul 2022 12:01:24 -0400 Subject: [PATCH 405/546] Fix bug with opt_lt, csel on x86 --- yjit/src/backend/x86_64/mod.rs | 32 ++++++++++++++++---------------- yjit/src/codegen.rs | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 3140c86b2e9629..7b84e6213447d0 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -418,36 +418,36 @@ impl Assembler Op::Breakpoint => int3(cb), Op::CSelZ => { - mov(cb, insn.out.into(), insn.opnds[1].into()); - cmovz(cb, insn.out.into(), insn.opnds[0].into()); + mov(cb, insn.out.into(), insn.opnds[0].into()); + cmovnz(cb, insn.out.into(), insn.opnds[1].into()); }, Op::CSelNZ => { - mov(cb, insn.out.into(), insn.opnds[1].into()); - cmovnz(cb, insn.out.into(), insn.opnds[0].into()); + mov(cb, insn.out.into(), insn.opnds[0].into()); + cmovz(cb, insn.out.into(), insn.opnds[1].into()); }, Op::CSelE => { - mov(cb, insn.out.into(), insn.opnds[1].into()); - cmove(cb, insn.out.into(), insn.opnds[0].into()); + mov(cb, insn.out.into(), insn.opnds[0].into()); + cmovne(cb, insn.out.into(), insn.opnds[1].into()); }, Op::CSelNE => { - mov(cb, insn.out.into(), insn.opnds[1].into()); - cmovne(cb, insn.out.into(), insn.opnds[0].into()); + mov(cb, insn.out.into(), insn.opnds[0].into()); + cmove(cb, insn.out.into(), insn.opnds[1].into()); }, Op::CSelL => { - mov(cb, insn.out.into(), insn.opnds[1].into()); - cmovl(cb, insn.out.into(), insn.opnds[0].into()); + mov(cb, insn.out.into(), insn.opnds[0].into()); + cmovge(cb, insn.out.into(), insn.opnds[1].into()); }, Op::CSelLE => { - mov(cb, insn.out.into(), insn.opnds[1].into()); - cmovle(cb, insn.out.into(), insn.opnds[0].into()); + mov(cb, insn.out.into(), insn.opnds[0].into()); + cmovg(cb, insn.out.into(), insn.opnds[1].into()); }, Op::CSelG => { - mov(cb, insn.out.into(), insn.opnds[1].into()); - cmovg(cb, insn.out.into(), insn.opnds[0].into()); + mov(cb, insn.out.into(), insn.opnds[0].into()); + cmovle(cb, insn.out.into(), insn.opnds[1].into()); }, Op::CSelGE => { - mov(cb, insn.out.into(), insn.opnds[1].into()); - cmovge(cb, insn.out.into(), insn.opnds[0].into()); + mov(cb, insn.out.into(), insn.opnds[0].into()); + cmovl(cb, insn.out.into(), insn.opnds[1].into()); }, // We want to keep the panic here because some instructions that diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index a1fd4df35d826e..9f39c77bb6f303 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5990,7 +5990,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_newarray => Some(gen_newarray), //YARVINSN_duparray => Some(gen_duparray), //YARVINSN_checktype => Some(gen_checktype), - //YARVINSN_opt_lt => Some(gen_opt_lt), + YARVINSN_opt_lt => Some(gen_opt_lt), /* YARVINSN_opt_le => Some(gen_opt_le), YARVINSN_opt_gt => Some(gen_opt_gt), From b1ed4d9b947e650dda7bfb9578652d4edb2608b4 Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Fri, 22 Jul 2022 17:08:35 +0100 Subject: [PATCH 406/546] Port and test duparray and splatarray (https://github.com/Shopify/ruby/pull/337) * Port duparray opcode * Port and test splatarray --- yjit/src/codegen.rs | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 9f39c77bb6f303..fa0394eed54536 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1181,29 +1181,29 @@ fn gen_newarray( KeepCompiling } -/* // dup array fn gen_duparray( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let ary = jit_get_arg(jit, 0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // call rb_ary_resurrect(VALUE ary); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary); - call_ptr(cb, REG0, rb_ary_resurrect as *const u8); + let new_ary = asm.ccall( + rb_ary_resurrect as *const u8, + vec![ary.into()], + ); let stack_ret = ctx.stack_push(Type::Array); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, new_ary); KeepCompiling } -*/ // dup hash fn gen_duphash( @@ -1226,34 +1226,30 @@ fn gen_duphash( KeepCompiling } -/* // call to_a on the array on the stack fn gen_splatarray( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let flag = jit_get_arg(jit, 0); // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Get the operands from the stack let ary_opnd = ctx.stack_pop(1); // Call rb_vm_splat_array(flag, ary) - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag); - mov(cb, C_ARG_REGS[1], ary_opnd); - call_ptr(cb, REG1, rb_vm_splat_array as *const u8); + let ary = asm.ccall(rb_vm_splat_array as *const u8, vec![flag.into(), ary_opnd]); let stack_ret = ctx.stack_push(Type::Array); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, ary); KeepCompiling } -*/ // new range initialized from top 2 values fn gen_newrange( @@ -5988,7 +5984,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_newhash => Some(gen_newhash), YARVINSN_duphash => Some(gen_duphash), YARVINSN_newarray => Some(gen_newarray), - //YARVINSN_duparray => Some(gen_duparray), + YARVINSN_duparray => Some(gen_duparray), //YARVINSN_checktype => Some(gen_checktype), YARVINSN_opt_lt => Some(gen_opt_lt), /* @@ -5998,8 +5994,8 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_mod => Some(gen_opt_mod), YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), - YARVINSN_splatarray => Some(gen_splatarray), */ + YARVINSN_splatarray => Some(gen_splatarray), YARVINSN_newrange => Some(gen_newrange), YARVINSN_putstring => Some(gen_putstring), /* From 7e22ec7439211d43aea850a4e849f37e631ed131 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 22 Jul 2022 12:18:21 -0400 Subject: [PATCH 407/546] Clear the icache on arm --- yjit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yjit.c b/yjit.c index 0dddcfdc5aee28..a0c5c09091bc2e 100644 --- a/yjit.c +++ b/yjit.c @@ -78,6 +78,10 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size) rb_bug("Couldn't make JIT page (%p, %lu bytes) executable, errno: %s\n", mem_block, (unsigned long)mem_size, strerror(errno)); } + + // Clear/invalidate the instruction cache. Compiles to nothing on x86_64 + // but required on ARM. On Darwin it's the same as calling sys_icache_invalidate(). + __builtin___clear_cache(mem_block, (char *)mem_block + mem_size); } # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x))) From 4024553d131012c313d08b4939f3596b6044c077 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 22 Jul 2022 12:21:19 -0400 Subject: [PATCH 408/546] Add ifdef to clear cache --- yjit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yjit.c b/yjit.c index a0c5c09091bc2e..584f909473f126 100644 --- a/yjit.c +++ b/yjit.c @@ -81,7 +81,9 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size) // Clear/invalidate the instruction cache. Compiles to nothing on x86_64 // but required on ARM. On Darwin it's the same as calling sys_icache_invalidate(). +#ifdef __GNUC__ __builtin___clear_cache(mem_block, (char *)mem_block + mem_size); +#endif } # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x))) From 96303342e417cb2e5980d3e3f0909d32bf004431 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 22 Jul 2022 12:26:59 -0400 Subject: [PATCH 409/546] Enable more btests --- .cirrus.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.cirrus.yml b/.cirrus.yml index a6d29cd3eb65fe..d71ac851e3560a 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -143,7 +143,11 @@ yjit_task: bootstraptest/test_jump.rb \ bootstraptest/test_literal_suffix.rb \ bootstraptest/test_load.rb \ + bootstraptest/test_marshal.rb \ bootstraptest/test_massign.rb \ + bootstraptest/test_method.rb \ + bootstraptest/test_objectspace.rb \ bootstraptest/test_string.rb \ + bootstraptest/test_struct.rb \ bootstraptest/test_yjit_new_backend.rb # full_build_script: make -j From f593b2c6db622de6f973e4e847e959855c341a25 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 22 Jul 2022 14:01:21 -0400 Subject: [PATCH 410/546] Fixes for AArch64 (https://github.com/Shopify/ruby/pull/338) * Better splitting for Op::Add, Op::Sub, and Op::Cmp * Split stores if the displacement is too large * Use a shifted immediate argument * Split all places where shifted immediates are used * Add more tests to the cirrus workflow --- .cirrus.yml | 3 + yjit/src/asm/arm64/arg/mod.rs | 2 + yjit/src/asm/arm64/arg/shifted_imm.rs | 75 ++++++++++++++++++ yjit/src/asm/arm64/inst/data_imm.rs | 80 +++++-------------- yjit/src/asm/arm64/mod.rs | 40 ++++------ yjit/src/backend/arm64/mod.rs | 109 ++++++++++++++++++++------ 6 files changed, 200 insertions(+), 109 deletions(-) create mode 100644 yjit/src/asm/arm64/arg/shifted_imm.rs diff --git a/.cirrus.yml b/.cirrus.yml index d71ac851e3560a..74ab6bd979277a 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -140,6 +140,7 @@ yjit_task: bootstraptest/test_flow.rb \ bootstraptest/test_fork.rb \ bootstraptest/test_gc.rb \ + bootstraptest/test_io.rb \ bootstraptest/test_jump.rb \ bootstraptest/test_literal_suffix.rb \ bootstraptest/test_load.rb \ @@ -147,7 +148,9 @@ yjit_task: bootstraptest/test_massign.rb \ bootstraptest/test_method.rb \ bootstraptest/test_objectspace.rb \ + bootstraptest/test_proc.rb \ bootstraptest/test_string.rb \ bootstraptest/test_struct.rb \ bootstraptest/test_yjit_new_backend.rb + bootstraptest/test_yjit_rust_port.rb # full_build_script: make -j diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs index bb779ab6dfccba..30f3cc3dfedb6b 100644 --- a/yjit/src/asm/arm64/arg/mod.rs +++ b/yjit/src/asm/arm64/arg/mod.rs @@ -4,9 +4,11 @@ mod bitmask_imm; mod condition; mod sf; +mod shifted_imm; mod sys_reg; pub use bitmask_imm::BitmaskImmediate; pub use condition::Condition; pub use sf::Sf; +pub use shifted_imm::ShiftedImmediate; pub use sys_reg::SystemRegister; diff --git a/yjit/src/asm/arm64/arg/shifted_imm.rs b/yjit/src/asm/arm64/arg/shifted_imm.rs new file mode 100644 index 00000000000000..5d1eeaf26dab80 --- /dev/null +++ b/yjit/src/asm/arm64/arg/shifted_imm.rs @@ -0,0 +1,75 @@ +/// How much to shift the immediate by. +pub enum Shift { + LSL0 = 0b0, // no shift + LSL12 = 0b1 // logical shift left by 12 bits +} + +/// Some instructions accept a 12-bit immediate that has an optional shift +/// attached to it. This allows encoding larger values than just fit into 12 +/// bits. We attempt to encode those here. If the values are too large we have +/// to bail out. +pub struct ShiftedImmediate { + shift: Shift, + value: u16 +} + +impl TryFrom for ShiftedImmediate { + type Error = (); + + /// Attempt to convert a u64 into a BitmaskImm. + fn try_from(value: u64) -> Result { + let mut current = value; + if current < 2_u64.pow(12) { + return Ok(ShiftedImmediate { shift: Shift::LSL0, value: current as u16 }); + } + + if (current & (2_u64.pow(12) - 1) == 0) && ((current >> 12) < 2_u64.pow(12)) { + return Ok(ShiftedImmediate { shift: Shift::LSL12, value: (current >> 12) as u16 }); + } + + Err(()) + } +} + +impl From for u32 { + /// Encode a bitmask immediate into a 32-bit value. + fn from(imm: ShiftedImmediate) -> Self { + 0 + | (((imm.shift as u32) & 1) << 12) + | (imm.value as u32) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_shift() { + let value = 256; + let result = ShiftedImmediate::try_from(value); + + assert!(matches!(result, Ok(ShiftedImmediate { shift: Shift::LSL0, value }))); + } + + #[test] + fn test_maximum_no_shift() { + let value = (1 << 12) - 1; + let result = ShiftedImmediate::try_from(value); + + assert!(matches!(result, Ok(ShiftedImmediate { shift: Shift::LSL0, value }))); + } + + #[test] + fn test_with_shift() { + let result = ShiftedImmediate::try_from(256 << 12); + + assert!(matches!(result, Ok(ShiftedImmediate { shift: Shift::LSL12, value: 256 }))); + } + + #[test] + fn test_unencodable() { + let result = ShiftedImmediate::try_from((256 << 12) + 1); + assert!(matches!(result, Err(()))); + } +} diff --git a/yjit/src/asm/arm64/inst/data_imm.rs b/yjit/src/asm/arm64/inst/data_imm.rs index 19e2bfa1992ef5..b474b00a527d10 100644 --- a/yjit/src/asm/arm64/inst/data_imm.rs +++ b/yjit/src/asm/arm64/inst/data_imm.rs @@ -1,4 +1,4 @@ -use super::super::arg::Sf; +use super::super::arg::{Sf, ShiftedImmediate}; /// The operation being performed by this instruction. enum Op { @@ -12,12 +12,6 @@ enum S { UpdateFlags = 0b1 } -/// How much to shift the immediate by. -enum Shift { - LSL0 = 0b0, // no shift - LSL12 = 0b1 // logical shift left by 12 bits -} - /// The struct that represents an A64 data processing -- immediate instruction /// that can be encoded. /// @@ -35,11 +29,8 @@ pub struct DataImm { /// The register number of the first operand register. rn: u8, - /// The value of the immediate. - imm12: u16, - /// How much to shift the immediate by. - shift: Shift, + imm: ShiftedImmediate, /// Whether or not to update the flags when this instruction is performed. s: S, @@ -54,64 +45,32 @@ pub struct DataImm { impl DataImm { /// ADD (immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en - pub fn add(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { - Self { - rd, - rn, - imm12, - shift: Shift::LSL0, - s: S::LeaveFlags, - op: Op::Add, - sf: num_bits.into() - } + pub fn add(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Add, sf: num_bits.into() } } /// ADDS (immediate, set flags) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en - pub fn adds(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { - Self { - rd, - rn, - imm12, - shift: Shift::LSL0, - s: S::UpdateFlags, - op: Op::Add, - sf: num_bits.into() - } + pub fn adds(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Add, sf: num_bits.into() } } /// CMP (immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en - pub fn cmp(rn: u8, imm12: u16, num_bits: u8) -> Self { - Self::subs(31, rn, imm12, num_bits) + pub fn cmp(rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self::subs(31, rn, imm, num_bits) } /// SUB (immediate) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en - pub fn sub(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { - Self { - rd, - rn, - imm12, - shift: Shift::LSL0, - s: S::LeaveFlags, - op: Op::Sub, - sf: num_bits.into() - } + pub fn sub(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Sub, sf: num_bits.into() } } /// SUBS (immediate, set flags) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en - pub fn subs(rd: u8, rn: u8, imm12: u16, num_bits: u8) -> Self { - Self { - rd, - rn, - imm12, - shift: Shift::LSL0, - s: S::UpdateFlags, - op: Op::Sub, - sf: num_bits.into() - } + pub fn subs(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Sub, sf: num_bits.into() } } } @@ -121,7 +80,7 @@ const FAMILY: u32 = 0b1000; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: DataImm) -> Self { - let imm12 = (inst.imm12 as u32) & ((1 << 12) - 1); + let imm: u32 = inst.imm.into(); 0 | ((inst.sf as u32) << 31) @@ -129,8 +88,7 @@ impl From for u32 { | ((inst.s as u32) << 29) | (FAMILY << 25) | (1 << 24) - | ((inst.shift as u32) << 22) - | (imm12 << 10) + | (imm << 10) | ((inst.rn as u32) << 5) | inst.rd as u32 } @@ -150,35 +108,35 @@ mod tests { #[test] fn test_add() { - let inst = DataImm::add(0, 1, 7, 64); + let inst = DataImm::add(0, 1, 7.try_into().unwrap(), 64); let result: u32 = inst.into(); assert_eq!(0x91001c20, result); } #[test] fn test_adds() { - let inst = DataImm::adds(0, 1, 7, 64); + let inst = DataImm::adds(0, 1, 7.try_into().unwrap(), 64); let result: u32 = inst.into(); assert_eq!(0xb1001c20, result); } #[test] fn test_cmp() { - let inst = DataImm::cmp(0, 7, 64); + let inst = DataImm::cmp(0, 7.try_into().unwrap(), 64); let result: u32 = inst.into(); assert_eq!(0xf1001c1f, result); } #[test] fn test_sub() { - let inst = DataImm::sub(0, 1, 7, 64); + let inst = DataImm::sub(0, 1, 7.try_into().unwrap(), 64); let result: u32 = inst.into(); assert_eq!(0xd1001c20, result); } #[test] fn test_subs() { - let inst = DataImm::subs(0, 1, 7, 64); + let inst = DataImm::subs(0, 1, 7.try_into().unwrap(), 64); let result: u32 = inst.into(); assert_eq!(0xf1001c20, result); } diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 8b59d6c354aff1..0eba37ee15de99 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -41,18 +41,16 @@ pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::add(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into() + DataImm::add(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); if imm12 < 0 { - DataImm::sub(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + DataImm::sub(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() } else { - DataImm::add(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + DataImm::add(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() } }, _ => panic!("Invalid operand combination to add instruction."), @@ -74,18 +72,16 @@ pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + DataImm::adds(rd.reg_no, rn.reg_no, imm12.try_into().unwrap(), rd.num_bits).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); if imm12 < 0 { - DataImm::subs(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + DataImm::subs(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() } else { - DataImm::adds(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + DataImm::adds(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() } }, _ => panic!("Invalid operand combination to adds instruction."), @@ -272,9 +268,7 @@ pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { DataReg::cmp(rn.reg_no, rm.reg_no, rn.num_bits).into() }, (A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { - assert!(uimm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); - - DataImm::cmp(rn.reg_no, imm12 as u16, rn.num_bits).into() + DataImm::cmp(rn.reg_no, imm12.try_into().unwrap(), rn.num_bits).into() }, _ => panic!("Invalid operand combination to cmp instruction."), }; @@ -477,12 +471,12 @@ pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { (A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 }), A64Opnd::Reg(rm)) => { assert!(rm.num_bits == 64, "Expected rm to be 64 bits"); - DataImm::add(31, rm.reg_no, 0, 64).into() + DataImm::add(31, rm.reg_no, 0.try_into().unwrap(), 64).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 })) => { assert!(rd.num_bits == 64, "Expected rd to be 64 bits"); - DataImm::add(rd.reg_no, 31, 0, 64).into() + DataImm::add(rd.reg_no, 31, 0.try_into().unwrap(), 64).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); @@ -713,18 +707,16 @@ pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::sub(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into() + DataImm::sub(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); if imm12 < 0 { - DataImm::add(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + DataImm::add(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() } else { - DataImm::sub(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + DataImm::sub(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() } }, _ => panic!("Invalid operand combination to sub instruction."), @@ -746,18 +738,16 @@ pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(uimm_fits_bits(uimm12, 12), "The immediate operand must be 12 bits or less."); - DataImm::subs(rd.reg_no, rn.reg_no, uimm12 as u16, rd.num_bits).into() + DataImm::subs(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into() }, (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); - assert!(imm_fits_bits(imm12, 12), "The immediate operand must be 12 bits or less."); if imm12 < 0 { - DataImm::adds(rd.reg_no, rn.reg_no, -imm12 as u16, rd.num_bits).into() + DataImm::adds(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() } else { - DataImm::subs(rd.reg_no, rn.reg_no, imm12 as u16, rd.num_bits).into() + DataImm::subs(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() } }, _ => panic!("Invalid operand combination to subs instruction."), diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index d3db3877dc347b..57943ce58fac38 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -81,7 +81,10 @@ impl Assembler /// have no memory operands. fn arm64_split(mut self) -> Assembler { - fn load_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { + /// Operands that take the place of bitmask immediates must follow a + /// certain encoding. In this function we ensure that those operands + /// do follow that encoding, and if they don't then we load them first. + fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, Opnd::Mem(_) => asm.load(opnd), @@ -105,6 +108,41 @@ impl Assembler } } + /// Operands that take the place of a shifted immediate must fit within + /// a certain size. If they don't then we need to load them first. + fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnd), + Opnd::UImm(uimm) => { + if ShiftedImmediate::try_from(uimm).is_ok() { + opnd + } else { + asm.load(opnd) + } + }, + Opnd::None | Opnd::Value(_) => unreachable!() + } + } + + /// When you're storing a register into a memory location, the + /// displacement from the base register of the memory location must fit + /// into 9 bits. If it doesn't, then we need to load that memory address + /// into a register first. + fn split_store(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(mem) => { + if imm_fits_bits(mem.disp.into(), 9) { + opnd + } else { + let base = asm.lea(opnd); + Opnd::mem(64, base, 0) + } + }, + _ => unreachable!("Can only store memory addresses.") + } + } + self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { // Load all Value operands into registers that aren't already a part // of Load instructions. @@ -120,22 +158,20 @@ impl Assembler }; match op { - Op::Add | Op::Sub => { - // Check if one of the operands is a register. If it is, - // then we'll make that the first operand. + Op::Add => { match (opnds[0], opnds[1]) { - (Opnd::Mem(_), Opnd::Mem(_)) => { - let opnd0 = asm.load(opnds[0]); - let opnd1 = asm.load(opnds[1]); - asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => { + asm.add(opnds[0], opnds[1]); }, - (mem_opnd @ Opnd::Mem(_), other_opnd) | - (other_opnd, mem_opnd @ Opnd::Mem(_)) => { - let opnd0 = asm.load(mem_opnd); - asm.push_insn(op, vec![opnd0, other_opnd], target, text, pos_marker); + (reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. }), other_opnd) | + (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. })) => { + let opnd1 = split_shifted_immediate(asm, other_opnd); + asm.add(reg_opnd, opnd1); }, _ => { - asm.push_insn(op, opnds, target, text, pos_marker); + let opnd0 = asm.load(opnds[0]); + let opnd1 = split_shifted_immediate(asm, opnds[1]); + asm.add(opnd0, opnd1); } } }, @@ -146,12 +182,12 @@ impl Assembler }, (reg_opnd @ Opnd::Reg(_), other_opnd) | (other_opnd, reg_opnd @ Opnd::Reg(_)) => { - let opnd1 = load_bitmask_immediate(asm, other_opnd); + let opnd1 = split_bitmask_immediate(asm, other_opnd); asm.and(reg_opnd, opnd1); }, _ => { let opnd0 = asm.load(opnds[0]); - let opnd1 = load_bitmask_immediate(asm, opnds[1]); + let opnd1 = split_bitmask_immediate(asm, opnds[1]); asm.and(opnd0, opnd1); } } @@ -173,6 +209,16 @@ impl Assembler // just performs the call. asm.ccall(target.unwrap().unwrap_fun_ptr(), vec![]); }, + Op::Cmp => { + let opnd0 = match opnds[0] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], + _ => asm.load(opnds[0]) + }; + + let opnd1 = split_shifted_immediate(asm, opnds[1]); + + asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + }, Op::CRet => { if opnds[0] != Opnd::Reg(C_RET_REG) { let value = asm.load(opnds[0]); @@ -238,15 +284,20 @@ impl Assembler // register or an immediate that can be encoded as a // bitmask immediate. Otherwise, we'll need to split the // move into multiple instructions. - _ => load_bitmask_immediate(asm, opnds[1]) + _ => split_bitmask_immediate(asm, opnds[1]) }; // If we're attempting to load into a memory operand, then // we'll switch over to the store instruction. Otherwise // we'll use the normal mov instruction. match opnds[0] { - Opnd::Mem(_) => asm.store(opnds[0], value), - _ => asm.mov(opnds[0], value) + Opnd::Mem(_) => { + let opnd0 = split_store(asm, opnds[0]); + asm.store(opnd0, value); + }, + _ => { + asm.mov(opnds[0], value); + } }; }, Op::Not => { @@ -260,6 +311,11 @@ impl Assembler asm.not(opnd0); }, Op::Store => { + // The displacement for the STUR instruction can't be more + // than 9 bits long. If it's longer, we need to load the + // memory address into a register first. + let opnd0 = split_store(asm, opnds[0]); + // The value being stored must be in a register, so if it's // not already one we'll load it first. let opnd1 = match opnds[1] { @@ -267,7 +323,17 @@ impl Assembler _ => asm.load(opnds[1]) }; - asm.store(opnds[0], opnd1); + asm.store(opnd0, opnd1); + }, + Op::Sub => { + let opnd0 = match opnds[0] { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], + _ => asm.load(opnds[0]) + }; + + let opnd1 = split_shifted_immediate(asm, opnds[1]); + + asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); }, Op::Test => { // The value being tested must be in a register, so if it's @@ -281,7 +347,7 @@ impl Assembler // unsigned immediate that can be encoded as a bitmask // immediate. If it's not one of those, we'll need to load // it first. - let opnd1 = load_bitmask_immediate(asm, opnds[1]); + let opnd1 = split_bitmask_immediate(asm, opnds[1]); asm.test(opnd0, opnd1); }, @@ -454,18 +520,15 @@ impl Assembler cb.add_comment(&insn.text.as_ref().unwrap()); } }, - Op::Label => { cb.write_label(insn.target.unwrap().unwrap_label_idx()); }, - // Report back the current position in the generated code Op::PosMarker => { let pos = cb.get_write_ptr(); let pos_marker_fn = insn.pos_marker.as_ref().unwrap(); pos_marker_fn(pos); } - Op::BakeString => { let str = insn.text.as_ref().unwrap(); for byte in str.as_bytes() { From 13e5b56a5d8f36815fb9aa3834d82a54b69e087a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 22 Jul 2022 16:06:37 -0400 Subject: [PATCH 411/546] Fixes (https://github.com/Shopify/ruby/pull/340) * Fix conditional jumps to label * Bitmask immediates cannot be u64::MAX --- yjit/src/asm/arm64/arg/bitmask_imm.rs | 10 ++++++++-- yjit/src/backend/arm64/mod.rs | 13 +++++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs index 220a7d697ee6a1..b3a821fe94a2e7 100644 --- a/yjit/src/asm/arm64/arg/bitmask_imm.rs +++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs @@ -43,7 +43,7 @@ impl TryFrom for BitmaskImmediate { fn try_from(value: u64) -> Result { // 0 is not encodable as a bitmask immediate. Immediately return here so // that we don't have any issues with underflow. - if value == 0 { + if value == 0 || value == u64::MAX { return Err(()); } @@ -137,7 +137,7 @@ impl From for u32 { 0 | (((bitmask.n as u32) & 1) << 12) | ((bitmask.immr as u32) << 6) - | bitmask.imms as u32 + | (bitmask.imms as u32) } } @@ -260,4 +260,10 @@ mod tests { let bitmask = BitmaskImmediate::try_from(0xfffffffffffffffe); assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b111111, imms: 0b111110 }))); } + + #[test] + fn test_size_64_invalid() { + let bitmask = BitmaskImmediate::try_from(u64::MAX); + assert!(matches!(bitmask, Err(()))); + } } diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 57943ce58fac38..9726a0f8f24a23 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -295,9 +295,10 @@ impl Assembler let opnd0 = split_store(asm, opnds[0]); asm.store(opnd0, value); }, - _ => { + Opnd::Reg(_) => { asm.mov(opnds[0], value); - } + }, + _ => unreachable!() }; }, Op::Not => { @@ -488,7 +489,7 @@ impl Assembler // offset. We're going to assume we can fit into a single // b.cond instruction. It will panic otherwise. cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { - bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - src_addr)); + bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - (src_addr - 4))); }); }, Target::FunPtr(_) => unreachable!() @@ -595,8 +596,8 @@ impl Assembler // references to GC'd Value operands. If the value // being loaded is a heap object, we'll report that // back out to the gc_offsets list. - ldr(cb, insn.out.into(), 1); - b(cb, A64Opnd::new_imm((SIZEOF_VALUE as i64) / 4)); + ldr(cb, insn.out.into(), 2); + b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4)); cb.write_bytes(&value.as_u64().to_le_bytes()); if !value.special_const_p() { @@ -743,7 +744,7 @@ impl Assembler // to assume we can fit into a single b instruction. // It will panic otherwise. cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { - b(cb, A64Opnd::new_imm((dst_addr - src_addr) / 4 + 1)); + b(cb, A64Opnd::new_imm((dst_addr - (src_addr - 4)) / 4)); }); }, _ => unreachable!() From 133ad38777db991e20a1feba1acbfe5d97cc2fa0 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 22 Jul 2022 16:13:29 -0400 Subject: [PATCH 412/546] Fix CI workflows --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 74ab6bd979277a..85a569e1a0fa17 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -151,6 +151,6 @@ yjit_task: bootstraptest/test_proc.rb \ bootstraptest/test_string.rb \ bootstraptest/test_struct.rb \ - bootstraptest/test_yjit_new_backend.rb + bootstraptest/test_yjit_new_backend.rb \ bootstraptest/test_yjit_rust_port.rb # full_build_script: make -j From 813df1f27aa52a3050d90dab23bc72093da00e6c Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 22 Jul 2022 16:24:18 -0400 Subject: [PATCH 413/546] Add LiveReg IR instruction to fix stats leave exit code (https://github.com/Shopify/ruby/pull/341) It allows for reserving a specific register and prevents the register allocator from clobbering it. Without this `./miniruby --yjit-stats --yjit-callthreshold=1 -e0` was crashing because the counter incrementing code was clobbering RAX incorrectly. --- yjit/src/backend/arm64/mod.rs | 1 + yjit/src/backend/ir.rs | 15 ++++++++++++--- yjit/src/backend/x86_64/mod.rs | 3 ++- yjit/src/codegen.rs | 7 ++++--- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 9726a0f8f24a23..c6cd1b882ca7e6 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -792,6 +792,7 @@ impl Assembler Op::CSelGE => { csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::GE); } + Op::LiveReg => (), // just a reg alloc signal, no code }; } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index c55a8f609b25e9..dc0e450df4de73 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -145,7 +145,10 @@ pub enum Op FrameSetup, /// Tear down the frame stack as necessary per the architecture. - FrameTeardown + FrameTeardown, + + /// Take a specific register. Signal the register allocator to not use it. + LiveReg, } // Memory operand base @@ -633,7 +636,6 @@ impl Assembler if let Some(reg_index) = reg_index { assert_eq!(*pool & (1 << reg_index), 0); *pool |= 1 << reg_index; - //return regs[reg_index]; } return *reg; @@ -713,7 +715,13 @@ impl Assembler // Allocate a new register for this instruction if out_reg == Opnd::None { - out_reg = Opnd::Reg(alloc_reg(&mut pool, ®s)) + out_reg = if op == Op::LiveReg { + // Allocate a specific register + let reg = opnds[0].unwrap_reg(); + Opnd::Reg(take_reg(&mut pool, ®s, ®)) + } else { + Opnd::Reg(alloc_reg(&mut pool, ®s)) + } } } @@ -902,6 +910,7 @@ def_push_1_opnd_no_out!(cret, Op::CRet); def_push_1_opnd!(load, Op::Load); def_push_1_opnd!(load_sext, Op::LoadSExt); def_push_1_opnd!(lea, Op::Lea); +def_push_1_opnd!(live_reg_opnd, Op::LiveReg); def_push_2_opnd_no_out!(store, Op::Store); def_push_2_opnd_no_out!(mov, Op::Mov); def_push_2_opnd_no_out!(cmp, Op::Cmp); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 7b84e6213447d0..5bae5c7f29e5bb 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -448,7 +448,8 @@ impl Assembler Op::CSelGE => { mov(cb, insn.out.into(), insn.opnds[0].into()); cmovl(cb, insn.out.into(), insn.opnds[1].into()); - }, + } + Op::LiveReg => (), // just a reg alloc signal, no code // We want to keep the panic here because some instructions that // we feed to the backend could get lowered into other diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index fa0394eed54536..e122d67910eaba 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -213,7 +213,7 @@ macro_rules! gen_counter_incr { let counter_opnd = Opnd::mem(64, ptr_reg, 0); // Increment and store the updated value - $asm.incr_counter(counter_opnd, 1.into() ); + $asm.incr_counter(counter_opnd, 1.into()); } }; } @@ -552,8 +552,9 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { let code_ptr = ocb.get_write_ptr(); let mut asm = Assembler::new(); - // NOTE: gen_leave() fully reconstructs interpreter state and leaves the + // gen_leave() fully reconstructs interpreter state and leaves the // return value in C_RET_OPND before coming here. + let ret_opnd = asm.live_reg_opnd(C_RET_OPND); // Every exit to the interpreter should be counted gen_counter_incr!(asm, leave_interp_return); @@ -564,7 +565,7 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { asm.frame_teardown(); - asm.cret(C_RET_OPND); + asm.cret(ret_opnd); asm.compile(ocb); From e131b217cfa9f29a0677e65cf573494279eda8a5 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 22 Jul 2022 16:25:37 -0400 Subject: [PATCH 414/546] Test --yjit-stats in our CI workflows --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 85a569e1a0fa17..ef91abd3b26e05 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -124,7 +124,7 @@ yjit_task: fi boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 - # output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 + output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 bootstrap_tests_script: | RUST_BACKTRACE=1 ruby --disable=gems bootstraptest/runner.rb --ruby="./miniruby -I./lib -I. -I.ext/common --disable-gems --yjit-call-threshold=1 --yjit-verify-ctx" \ bootstraptest/test_attr.rb \ From 8617bac950fbee712e621f79bf96ca30fa9aa2ec Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 22 Jul 2022 18:02:24 -0400 Subject: [PATCH 415/546] Fix IncrCounter on ARM The order of operands to LDADDAL were flipped and the destination pointer was dereferenced instead of passed as an address. --- yjit/src/backend/arm64/mod.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index c6cd1b882ca7e6..12cd267245b028 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -238,17 +238,17 @@ impl Assembler asm.push_insn(op, new_opnds, target, text, pos_marker); }, Op::IncrCounter => { - // Every operand to the IncrCounter instruction need to be a - // register once it gets there. So here we're going to load - // anything that isn't a register first. - let new_opnds: Vec = opnds.into_iter().map(|opnd| { - match opnd { - Opnd::Mem(_) | Opnd::Imm(_) | Opnd::UImm(_) => asm.load(opnd), - _ => opnd, - } - }).collect(); + // We'll use LDADD later which only works with registers + // ... Load pointer into register + let counter_addr = asm.lea(opnds[0]); + + // Load immediates into a register + let addend = match opnds[1] { + opnd @ Opnd::Imm(_) | opnd @ Opnd::UImm(_) => asm.load(opnd), + opnd => opnd, + }; - asm.incr_counter(new_opnds[0], new_opnds[1]); + asm.incr_counter(counter_addr, addend); }, Op::JmpOpnd => { if let Opnd::Mem(_) = opnds[0] { @@ -769,7 +769,7 @@ impl Assembler emit_conditional_jump::<{Condition::VS}>(cb, insn.target.unwrap()); }, Op::IncrCounter => { - ldaddal(cb, insn.opnds[0].into(), insn.opnds[0].into(), insn.opnds[1].into()); + ldaddal(cb, insn.opnds[1].into(), insn.opnds[1].into(), insn.opnds[0].into()); }, Op::Breakpoint => { brk(cb, A64Opnd::None); From c9a947e5d81088907f774ddfbcd274a88ba7d8ff Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Mon, 25 Jul 2022 15:38:09 +0100 Subject: [PATCH 416/546] Port and test checkkeyword (https://github.com/Shopify/ruby/pull/339) --- yjit/src/codegen.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index e122d67910eaba..e29e570de16834 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1765,13 +1765,12 @@ fn gen_putstring( KeepCompiling } -/* // Push Qtrue or Qfalse depending on whether the given keyword was supplied by // the caller fn gen_checkkeyword( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // When a keyword is unspecified past index 32, a hash will be used @@ -1787,27 +1786,26 @@ fn gen_checkkeyword( let index: i64 = jit_get_arg(jit, 1).as_i64(); // Load environment pointer EP - gen_get_ep(cb, REG0, 0); + let ep_opnd = gen_get_ep(asm, 0); // VALUE kw_bits = *(ep - bits); - let bits_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * -bits_offset); + let bits_opnd = Opnd::mem(64, ep_opnd, (SIZEOF_VALUE as i32) * -bits_offset); // unsigned int b = (unsigned int)FIX2ULONG(kw_bits); // if ((b & (0x01 << idx))) { // // We can skip the FIX2ULONG conversion by shifting the bit we test let bit_test: i64 = 0x01 << (index + 1); - test(cb, bits_opnd, imm_opnd(bit_test)); - mov(cb, REG0, uimm_opnd(Qfalse.into())); - mov(cb, REG1, uimm_opnd(Qtrue.into())); - cmovz(cb, REG0, REG1); + asm.test(bits_opnd, Opnd::Imm(bit_test)); + let ret_opnd = asm.csel_z(Qtrue.into(), Qfalse.into()); let stack_ret = ctx.stack_push(Type::UnknownImm); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, ret_opnd); KeepCompiling } +/* fn gen_jnz_to_target0( cb: &mut CodeBlock, target0: CodePtr, @@ -6002,7 +6000,9 @@ fn get_gen_fn(opcode: VALUE) -> Option { /* YARVINSN_expandarray => Some(gen_expandarray), YARVINSN_defined => Some(gen_defined), + */ YARVINSN_checkkeyword => Some(gen_checkkeyword), + /* YARVINSN_concatstrings => Some(gen_concatstrings), YARVINSN_getinstancevariable => Some(gen_getinstancevariable), YARVINSN_setinstancevariable => Some(gen_setinstancevariable), From 2d9b98f9bc918b1161d308c5d202fcb3be01de07 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 25 Jul 2022 15:22:25 -0400 Subject: [PATCH 417/546] Fix a bug in the x86 backend wrt large integer values, enable more tests --- yjit/src/backend/x86_64/mod.rs | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 5bae5c7f29e5bb..c1bcdca3e70b10 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -2,7 +2,7 @@ #![allow(unused_variables)] #![allow(unused_imports)] -use crate::asm::{uimm_num_bits, CodeBlock}; +use crate::asm::*; use crate::asm::x86_64::*; use crate::codegen::{JITState}; use crate::cruby::*; @@ -119,7 +119,15 @@ impl Assembler (asm.load(opnds[0]), asm.load(opnds[1])) }, (Opnd::Mem(_), Opnd::UImm(value)) => { - if uimm_num_bits(value) > 32 { + // 32-bit values will be sign-extended + if imm_num_bits(value as i64) > 32 { + (asm.load(opnds[0]), asm.load(opnds[1])) + } else { + (asm.load(opnds[0]), opnds[1]) + } + }, + (Opnd::Mem(_), Opnd::Imm(value)) => { + if imm_num_bits(value) > 32 { (asm.load(opnds[0]), asm.load(opnds[1])) } else { (asm.load(opnds[0]), opnds[1]) @@ -161,7 +169,16 @@ impl Assembler asm.mov(opnds[0], opnd1); }, (Opnd::Mem(_), Opnd::UImm(value)) => { - if uimm_num_bits(value) > 32 { + // 32-bit values will be sign-extended + if imm_num_bits(value as i64) > 32 { + let opnd1 = asm.load(opnds[1]); + asm.mov(opnds[0], opnd1); + } else { + asm.mov(opnds[0], opnds[1]); + } + }, + (Opnd::Mem(_), Opnd::Imm(value)) => { + if imm_num_bits(value) > 32 { let opnd1 = asm.load(opnds[1]); asm.mov(opnds[0], opnd1); } else { From b2d255ad3cb34262494df3c55352215dcbd4d881 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Tue, 26 Jul 2022 13:14:06 -0400 Subject: [PATCH 418/546] A64: Fix off by one in offset encoding for BL (https://github.com/Shopify/ruby/pull/344) * A64: Fix off by one in offset encoding for BL It's relative to the address of the instruction not the end of it. * A64: Fix off by one when encoding B It's relative to the start of the instruction not the end. * A64: Add some tests for boundary offsets --- yjit/src/asm/arm64/mod.rs | 32 ++++++++++++++++++++++++++++++-- yjit/src/backend/arm64/mod.rs | 22 ++++++++++------------ 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 0eba37ee15de99..d114f64a22c3ec 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -910,12 +910,40 @@ mod tests { #[test] fn test_b() { - check_bytes("00040014", |cb| b(cb, A64Opnd::new_imm(1024))); + check_bytes("ffffff15", |cb| b(cb, A64Opnd::new_imm((1 << 25) - 1))); + } + + #[test] + #[should_panic] + fn test_b_too_big() { + // There are 26 bits available + check_bytes("", |cb| b(cb, A64Opnd::new_imm(1 << 25))); + } + + #[test] + #[should_panic] + fn test_b_too_small() { + // There are 26 bits available + check_bytes("", |cb| b(cb, A64Opnd::new_imm(-(1 << 25) - 1))); } #[test] fn test_bl() { - check_bytes("00040094", |cb| bl(cb, A64Opnd::new_imm(1024))); + check_bytes("00000096", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25)))); + } + + #[test] + #[should_panic] + fn test_bl_too_big() { + // There are 26 bits available + check_bytes("", |cb| bl(cb, A64Opnd::new_imm(1 << 25))); + } + + #[test] + #[should_panic] + fn test_bl_too_small() { + // There are 26 bits available + check_bytes("", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25) - 1))); } #[test] diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 12cd267245b028..018dd79ae440bf 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -683,20 +683,17 @@ impl Assembler } }, Op::CCall => { - // The offset between the two instructions in bytes. Note - // that when we encode this into a bl instruction, we'll - // divide by 4 because it accepts the number of instructions - // to jump over. - let src_addr = cb.get_write_ptr().into_i64() + 4; + // The offset to the call target in bytes + let src_addr = cb.get_write_ptr().into_i64(); let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64; let offset = dst_addr - src_addr; + // The offset in instruction count for BL's immediate + let offset = offset / 4; - // If the offset is short enough, then we'll use the branch - // link instruction. Otherwise, we'll move the destination - // and return address into appropriate registers and use the - // branch register instruction. + // Use BL if the offset is short enough to encode as an immediate. + // Otherwise, use BLR with a register. if b_offset_fits_bits(offset) { - bl(cb, A64Opnd::new_imm(offset / 4)); + bl(cb, A64Opnd::new_imm(offset)); } else { emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); blr(cb, Self::SCRATCH0); @@ -717,7 +714,7 @@ impl Assembler Op::Jmp => { match insn.target.unwrap() { Target::CodePtr(dst_ptr) => { - let src_addr = cb.get_write_ptr().into_i64() + 4; + let src_addr = cb.get_write_ptr().into_i64(); let dst_addr = dst_ptr.into_i64(); // The offset between the two instructions in bytes. @@ -725,13 +722,14 @@ impl Assembler // instruction, we'll divide by 4 because it accepts // the number of instructions to jump over. let offset = dst_addr - src_addr; + let offset = offset / 4; // If the offset is short enough, then we'll use the // branch instruction. Otherwise, we'll move the // destination into a register and use the branch // register instruction. if b_offset_fits_bits(offset) { - b(cb, A64Opnd::new_imm(offset / 4)); + b(cb, A64Opnd::new_imm(offset)); } else { emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); br(cb, Self::SCRATCH0); From 4b1ab009c4ceea2232d1c71a1af7bf1a04af2165 Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Wed, 27 Jul 2022 17:42:52 +0100 Subject: [PATCH 419/546] Port the YJIT defined opcode; fix C_ARG_REGS (https://github.com/Shopify/ruby/pull/342) --- yjit/src/asm/x86_64/mod.rs | 3 --- yjit/src/backend/arm64/mod.rs | 4 ++-- yjit/src/backend/x86_64/mod.rs | 4 ++-- yjit/src/codegen.rs | 25 +++++++++---------------- 4 files changed, 13 insertions(+), 23 deletions(-) diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index d23279f2770d61..3f865b82a52fe4 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -264,9 +264,6 @@ pub const R13B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType:: pub const R14B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 14 }); pub const R15B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 15 }); -// C argument registers on this platform -pub const C_ARG_REGS: [X86Opnd; 6] = [RDI, RSI, RDX, RCX, R8, R9]; - //=========================================================================== /// Shorthand for memory operand with base register and displacement diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 018dd79ae440bf..99cf08c09cdce2 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -193,7 +193,7 @@ impl Assembler } }, Op::CCall => { - assert!(opnds.len() < C_ARG_REGS.len()); + assert!(opnds.len() < C_ARG_OPNDS.len()); // For each of the operands we're going to first load them // into a register and then move them into the correct @@ -202,7 +202,7 @@ impl Assembler // which is both the return value and first argument register for (idx, opnd) in opnds.into_iter().enumerate().rev() { let value = asm.load(opnd); - asm.mov(Opnd::Reg(C_ARG_REGREGS[idx]), value); + asm.mov(C_ARG_OPNDS[idx], value); } // Now we push the CCall without any arguments so that it diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index c1bcdca3e70b10..13bb106b970989 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -339,11 +339,11 @@ impl Assembler // C function call Op::CCall => { // Temporary - assert!(insn.opnds.len() < C_ARG_REGS.len()); + assert!(insn.opnds.len() < _C_ARG_OPNDS.len()); // For each operand for (idx, opnd) in insn.opnds.iter().enumerate() { - mov(cb, C_ARG_REGS[idx], insn.opnds[idx].into()); + mov(cb, X86Opnd::Reg(_C_ARG_OPNDS[idx].unwrap_reg()), insn.opnds[idx].into()); } let ptr = insn.target.unwrap().unwrap_fun_ptr(); diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index e29e570de16834..cb4b23e26ee834 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2177,11 +2177,12 @@ fn gen_setinstancevariable( KeepCompiling } +*/ fn gen_defined( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let op_type = jit_get_arg(jit, 0); @@ -2190,26 +2191,19 @@ fn gen_defined( // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Get the operands from the stack let v_opnd = ctx.stack_pop(1); // Call vm_defined(ec, reg_cfp, op_type, obj, v) - mov(cb, C_ARG_REGS[0], REG_EC); - mov(cb, C_ARG_REGS[1], REG_CFP); - mov(cb, C_ARG_REGS[2], uimm_opnd(op_type.into())); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], obj); - mov(cb, C_ARG_REGS[4], v_opnd); - call_ptr(cb, REG0, rb_vm_defined as *const u8); + let def_result = asm.ccall(rb_vm_defined as *const u8, vec![EC, CFP, op_type.into(), obj.into(), v_opnd]); // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) { // val = pushval; // } - jit_mov_gc_ptr(jit, cb, REG1, pushval); - cmp(cb, AL, imm_opnd(0)); - mov(cb, RAX, uimm_opnd(Qnil.into())); - cmovnz(cb, RAX, REG1); + asm.test(def_result, Opnd::UImm(255)); + let out_value = asm.csel_nz(pushval.into(), Qnil.into()); // Push the return value onto the stack let out_type = if pushval.special_const_p() { @@ -2218,11 +2212,12 @@ fn gen_defined( Type::Unknown }; let stack_ret = ctx.stack_push(out_type); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, out_value); KeepCompiling } +/* fn gen_checktype( jit: &mut JITState, ctx: &mut Context, @@ -5997,10 +5992,8 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_splatarray => Some(gen_splatarray), YARVINSN_newrange => Some(gen_newrange), YARVINSN_putstring => Some(gen_putstring), - /* - YARVINSN_expandarray => Some(gen_expandarray), + //YARVINSN_expandarray => Some(gen_expandarray), YARVINSN_defined => Some(gen_defined), - */ YARVINSN_checkkeyword => Some(gen_checkkeyword), /* YARVINSN_concatstrings => Some(gen_concatstrings), From 6ab71a8598c6eece25975ca262eb880462e47b06 Mon Sep 17 00:00:00 2001 From: Zack Deveau Date: Wed, 27 Jul 2022 14:31:41 -0400 Subject: [PATCH 420/546] Port gen_checktype to the new IR assembler backend (https://github.com/Shopify/ruby/pull/343) --- bootstraptest/test_yjit.rb | 8 ++++++++ yjit/src/codegen.rs | 39 +++++++++++++++++--------------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 966a5f30021499..d82f3de95ea957 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3066,3 +3066,11 @@ def pt_inspect(pt) a.length } + +# checktype +assert_equal 'false', %q{ + def function() + [1, 2] in [Integer, String] + end + function() +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index cb4b23e26ee834..b34cc7409a1a8f 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2217,11 +2217,10 @@ fn gen_defined( KeepCompiling } -/* fn gen_checktype( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let type_val = jit_get_arg(jit, 0).as_u32(); @@ -2235,42 +2234,37 @@ fn gen_checktype( match val_type.known_value_type() { Some(value_type) => { if value_type == type_val { - jit_putobject(jit, ctx, cb, Qtrue); + jit_putobject(jit, ctx, asm, Qtrue); return KeepCompiling; } else { - jit_putobject(jit, ctx, cb, Qfalse); + jit_putobject(jit, ctx, asm, Qfalse); return KeepCompiling; } }, _ => (), } - mov(cb, REG0, val); - mov(cb, REG1, uimm_opnd(Qfalse.as_u64())); - - let ret = cb.new_label("ret".to_string()); + let ret = asm.new_label("ret"); if !val_type.is_heap() { // if (SPECIAL_CONST_P(val)) { // Return Qfalse via REG1 if not on heap - test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_label(cb, ret); - cmp(cb, REG0, uimm_opnd(Qnil.as_u64())); - jbe_label(cb, ret); + asm.test(val, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); + asm.jnz(ret); + asm.cmp(val, Opnd::UImm(Qnil.into())); + asm.jbe(ret); } // Check type on object - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS)); - and(cb, REG0, uimm_opnd(RUBY_T_MASK as u64)); - cmp(cb, REG0, uimm_opnd(type_val as u64)); - mov(cb, REG0, uimm_opnd(Qtrue.as_u64())); - // REG1 contains Qfalse from above - cmove(cb, REG1, REG0); + let object_type = asm.and( + Opnd::mem(64, val, RUBY_OFFSET_RBASIC_FLAGS), + Opnd::UImm(RUBY_T_MASK.into())); + asm.cmp(object_type, Opnd::UImm(type_val.into())); + let ret_opnd = asm.csel_e(Opnd::UImm(Qfalse.into()), Opnd::UImm(Qtrue.into())); - cb.write_label(ret); + asm.write_label(ret); let stack_ret = ctx.stack_push(Type::UnknownImm); - mov(cb, stack_ret, REG1); - cb.link_labels(); + asm.mov(stack_ret, ret_opnd); KeepCompiling } else { @@ -2278,6 +2272,7 @@ fn gen_checktype( } } +/* fn gen_concatstrings( jit: &mut JITState, ctx: &mut Context, @@ -5979,7 +5974,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_duphash => Some(gen_duphash), YARVINSN_newarray => Some(gen_newarray), YARVINSN_duparray => Some(gen_duparray), - //YARVINSN_checktype => Some(gen_checktype), + YARVINSN_checktype => Some(gen_checktype), YARVINSN_opt_lt => Some(gen_opt_lt), /* YARVINSN_opt_le => Some(gen_opt_le), From 869b0ba6e00168d739830af766c3abb0dec01f12 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 28 Jul 2022 11:08:30 -0400 Subject: [PATCH 421/546] Minor cleanups (https://github.com/Shopify/ruby/pull/345) * Move allocation into Assembler::pos_marker We wanted to do this to begin with but didn't because we were confused about the lifetime parameter. It's actually talking about the lifetime of the references that the closure captures. Since all of our usages capture no references (they use `move`), it's fine to put a `+ 'static` here. * Use optional token syntax for calling convention macro * Explicitly request C ABI on ARM It looks like the Rust calling convention for functions are the same as the C ABI for now and it's unlikely to change, but it's easy for us to be explicit here. I also tried saying `extern "aapcs"` but that unfortunately doesn't work. --- yjit/src/backend/ir.rs | 4 ++-- yjit/src/codegen.rs | 4 ++-- yjit/src/core.rs | 8 ++++---- yjit/src/utils.rs | 6 ++---- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index dc0e450df4de73..ce821616937adc 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -796,9 +796,9 @@ impl Assembler } //pub fn pos_marker(&mut self, marker_fn: F) - pub fn pos_marker(&mut self, marker_fn: PosMarkerFn) + pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr) + 'static) { - self.push_insn(Op::PosMarker, vec![], None, None, Some(marker_fn)); + self.push_insn(Op::PosMarker, vec![], None, None, Some(Box::new(marker_fn))); } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index b34cc7409a1a8f..93d835d7f167e1 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -297,9 +297,9 @@ fn jit_prepare_routine_call( /// Record the current codeblock write position for rewriting into a jump into /// the outlined block later. Used to implement global code invalidation. fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) { - asm.pos_marker(Box::new(move |code_ptr| { + asm.pos_marker(move |code_ptr| { CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos); - })); + }); } /// Verify the ctx's types and mappings against the compile-time stack, self, diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 1afa5c537ac567..3b33360b90c6d0 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1814,10 +1814,10 @@ impl Assembler // so that we can move the closure below let branchref = branchref.clone(); - self.pos_marker(Box::new(move |code_ptr| { + self.pos_marker(move |code_ptr| { let mut branch = branchref.borrow_mut(); branch.start_addr = Some(code_ptr); - })); + }); } // Mark the end position of a patchable branch in the machine code @@ -1827,10 +1827,10 @@ impl Assembler // so that we can move the closure below let branchref = branchref.clone(); - self.pos_marker(Box::new(move |code_ptr| { + self.pos_marker(move |code_ptr| { let mut branch = branchref.borrow_mut(); branch.end_addr = Some(code_ptr); - })); + }); } } diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index 5f42ba1fdbd805..bea57e4fc24a5d 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -122,14 +122,12 @@ yjit_print_iseq(const rb_iseq_t *iseq) #[cfg(target_arch = "aarch64")] macro_rules! c_callable { - (fn $f:ident $args:tt -> $ret:ty $body:block) => { fn $f $args -> $ret $body }; - (fn $f:ident $args:tt $body:block) => { fn $f $args $body }; + (fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => { extern "C" fn $f $args $(-> $ret)? $body }; } #[cfg(target_arch = "x86_64")] macro_rules! c_callable { - (fn $f:ident $args:tt -> $ret:ty $body:block) => { extern "sysv64" fn $f $args -> $ret $body }; - (fn $f:ident $args:tt $body:block) => { extern "sysv64" fn $f $args $body }; + (fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => { extern "sysv64" fn $f $args $(-> $ret)? $body }; } pub(crate) use c_callable; From 1446e22aeba09c55227c56b9c2f6cb90facc9512 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 1 Aug 2022 10:16:01 -0700 Subject: [PATCH 422/546] Port setglobal to the new backend (https://github.com/Shopify/ruby/pull/347) --- yjit/src/codegen.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 93d835d7f167e1..940ffed3df4913 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5298,30 +5298,30 @@ fn gen_getglobal( KeepCompiling } -/* fn gen_setglobal( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let gid = jit_get_arg(jit, 0); // Save the PC and SP because we might make a Ruby call for // Kernel#set_trace_var - jit_prepare_routine_call(jit, ctx, cb, REG0); - - mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64())); - - let val = ctx.stack_pop(1); - - mov(cb, C_ARG_REGS[1], val); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_gvar_set as *const u8); + asm.ccall( + rb_gvar_set as *const u8, + vec![ + gid.into(), + ctx.stack_pop(1), + ], + ); KeepCompiling } +/* fn gen_anytostring( jit: &mut JITState, ctx: &mut Context, @@ -6028,8 +6028,8 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_leave => Some(gen_leave), YARVINSN_getglobal => Some(gen_getglobal), - /* YARVINSN_setglobal => Some(gen_setglobal), + /* YARVINSN_anytostring => Some(gen_anytostring), YARVINSN_objtostring => Some(gen_objtostring), YARVINSN_intern => Some(gen_intern), From a674b8d8a13c9dbffb92dbcab7ff297a8b99591b Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 1 Aug 2022 10:34:15 -0700 Subject: [PATCH 423/546] Port class variable instructions (https://github.com/Shopify/ruby/pull/346) --- yjit/src/codegen.rs | 50 +++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 940ffed3df4913..73d76759a621c1 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5525,26 +5525,29 @@ fn gen_getspecial( KeepCompiling } } +*/ fn gen_getclassvariable( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // rb_vm_getclassvariable can raise exceptions. - jit_prepare_routine_call(jit, ctx, cb, REG0); - - let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ); - mov(cb, C_ARG_REGS[0], cfp_iseq_opnd); - mov(cb, C_ARG_REGS[1], REG_CFP); - mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64())); - mov(cb, C_ARG_REGS[3], uimm_opnd(jit_get_arg(jit, 1).as_u64())); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_vm_getclassvariable as *const u8); + let val_opnd = asm.ccall( + rb_vm_getclassvariable as *const u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + CFP, + Opnd::UImm(jit_get_arg(jit, 0).as_u64()), + Opnd::UImm(jit_get_arg(jit, 1).as_u64()), + ], + ); - let stack_top = ctx.stack_push(Type::Unknown); - mov(cb, stack_top, RAX); + let top = ctx.stack_push(Type::Unknown); + asm.mov(top, val_opnd); KeepCompiling } @@ -5552,24 +5555,27 @@ fn gen_getclassvariable( fn gen_setclassvariable( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // rb_vm_setclassvariable can raise exceptions. - jit_prepare_routine_call(jit, ctx, cb, REG0); - - let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ); - mov(cb, C_ARG_REGS[0], cfp_iseq_opnd); - mov(cb, C_ARG_REGS[1], REG_CFP); - mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64())); - mov(cb, C_ARG_REGS[3], ctx.stack_pop(1)); - mov(cb, C_ARG_REGS[4], uimm_opnd(jit_get_arg(jit, 1).as_u64())); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_vm_setclassvariable as *const u8); + asm.ccall( + rb_vm_setclassvariable as *const u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + CFP, + Opnd::UImm(jit_get_arg(jit, 0).as_u64()), + ctx.stack_pop(1), + Opnd::UImm(jit_get_arg(jit, 1).as_u64()), + ], + ); KeepCompiling } +/* fn gen_opt_getinlinecache( jit: &mut JITState, ctx: &mut Context, @@ -6035,9 +6041,9 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_intern => Some(gen_intern), YARVINSN_toregexp => Some(gen_toregexp), YARVINSN_getspecial => Some(gen_getspecial), + */ YARVINSN_getclassvariable => Some(gen_getclassvariable), YARVINSN_setclassvariable => Some(gen_setclassvariable), - */ // Unimplemented opcode, YJIT won't generate code for this yet _ => None, From 45da6974500070872a2b20fafe2b50bc1dce1052 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 1 Aug 2022 16:12:51 -0400 Subject: [PATCH 424/546] Push first pass at SSA IR sketch --- yjit/src/backend/ir.rs | 1 - yjit/src/backend/ir_ssa.rs | 1165 ++++++++++++++++++++++++++++++++++++ yjit/src/backend/mod.rs | 1 + yjit/src/lib.rs | 3 - 4 files changed, 1166 insertions(+), 4 deletions(-) create mode 100644 yjit/src/backend/ir_ssa.rs diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index ce821616937adc..45d4378eb4337d 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -8,7 +8,6 @@ use crate::cruby::{VALUE}; use crate::virtualmem::{CodePtr}; use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; use crate::core::{Context, Type, TempMapping}; -use crate::codegen::{JITState}; #[cfg(target_arch = "x86_64")] use crate::backend::x86_64::*; diff --git a/yjit/src/backend/ir_ssa.rs b/yjit/src/backend/ir_ssa.rs new file mode 100644 index 00000000000000..49974b90b75ace --- /dev/null +++ b/yjit/src/backend/ir_ssa.rs @@ -0,0 +1,1165 @@ +#![allow(dead_code)] +#![allow(unused_variables)] +#![allow(unused_imports)] + +use std::fmt; +use std::convert::From; +use crate::cruby::{VALUE}; +use crate::virtualmem::{CodePtr}; +use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; +use crate::core::{Context, Type, TempMapping}; + +/* +#[cfg(target_arch = "x86_64")] +use crate::backend::x86_64::*; + +#[cfg(target_arch = "aarch64")] +use crate::backend::arm64::*; + + +pub const EC: Opnd = _EC; +pub const CFP: Opnd = _CFP; +pub const SP: Opnd = _SP; + +pub const C_ARG_OPNDS: [Opnd; 6] = _C_ARG_OPNDS; +pub const C_RET_OPND: Opnd = _C_RET_OPND; +*/ + + + +// Dummy reg struct +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub struct Reg +{ + reg_no: u8, + num_bits: u8, +} + + + + + + + +/// Instruction opcodes +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Op +{ + // Add a comment into the IR at the point that this instruction is added. + // It won't have any impact on that actual compiled code. + Comment, + + // Add a label into the IR at the point that this instruction is added. + Label, + + // Mark a position in the generated code + PosMarker, + + // Bake a string directly into the instruction stream. + BakeString, + + // Add two operands together, and return the result as a new operand. This + // operand can then be used as the operand on another instruction. It + // accepts two operands, which can be of any type + // + // Under the hood when allocating registers, the IR will determine the most + // efficient way to get these values into memory. For example, if both + // operands are immediates, then it will load the first one into a register + // first with a mov instruction and then add them together. If one of them + // is a register, however, it will just perform a single add instruction. + Add, + + // This is the same as the OP_ADD instruction, except for subtraction. + Sub, + + // This is the same as the OP_ADD instruction, except that it performs the + // binary AND operation. + And, + + // Perform the NOT operation on an individual operand, and return the result + // as a new operand. This operand can then be used as the operand on another + // instruction. + Not, + + // + // Low-level instructions + // + + // A low-level instruction that loads a value into a register. + Load, + + // A low-level instruction that loads a value into a register and + // sign-extends it to a 64-bit value. + LoadSExt, + + // Low-level instruction to store a value to memory. + Store, + + // Load effective address + Lea, + + // Load effective address relative to the current instruction pointer. It + // accepts a single signed immediate operand. + LeaLabel, + + // A low-level mov instruction. It accepts two operands. + Mov, + + // Bitwise AND test instruction + Test, + + // Compare two operands + Cmp, + + // Unconditional jump to a branch target + Jmp, + + // Unconditional jump which takes a reg/mem address operand + JmpOpnd, + + // Low-level conditional jump instructions + Jbe, + Je, + Jne, + Jz, + Jnz, + Jo, + + // Conditional select instructions + CSelZ, + CSelNZ, + CSelE, + CSelNE, + CSelL, + CSelLE, + CSelG, + CSelGE, + + // Push and pop registers to/from the C stack + CPush, + CPop, + CPopInto, + + // Push and pop all of the caller-save registers and the flags to/from the C + // stack + CPushAll, + CPopAll, + + // C function call with N arguments (variadic) + CCall, + + // C function return + CRet, + + // Atomically increment a counter + // Input: memory operand, increment value + // Produces no output + IncrCounter, + + // Trigger a debugger breakpoint + Breakpoint, + + /// Set up the frame stack as necessary per the architecture. + FrameSetup, + + /// Tear down the frame stack as necessary per the architecture. + FrameTeardown, + + /// Take a specific register. Signal the register allocator to not use it. + LiveReg, +} + +/// Instruction idx in an assembler +/// This is used like a pointer +type InsnIdx = u32; + +/// Instruction operand index +type OpndIdx = u32; + +// Memory operand base +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum MemBase +{ + Reg(u8), + InsnOut(InsnIdx), +} + +// Memory location +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct Mem +{ + // Base register number or instruction index + pub(super) base: MemBase, + + // Offset relative to the base pointer + pub(super) disp: i32, + + // Size in bits + pub(super) num_bits: u8, +} + +impl fmt::Debug for Mem { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "Mem{}[{:?}", self.num_bits, self.base)?; + if self.disp != 0 { + let sign = if self.disp > 0 { '+' } else { '-' }; + write!(fmt, " {sign} {}", self.disp)?; + } + + write!(fmt, "]") + } +} + +/// Operand to an IR instruction +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Opnd +{ + None, // For insns with no output + + // Immediate Ruby value, may be GC'd, movable + Value(VALUE), + + // Output of a preceding instruction in this block + InsnOut{ idx: InsnIdx, num_bits: u8 }, + + // Low-level operands, for lowering + Imm(i64), // Raw signed immediate + UImm(u64), // Raw unsigned immediate + Mem(Mem), // Memory location + Reg(Reg), // Machine register +} + +impl fmt::Debug for Opnd { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + use Opnd::*; + match self { + Self::None => write!(fmt, "None"), + Value(val) => write!(fmt, "Value({val:?})"), + InsnOut { idx, num_bits } => write!(fmt, "Out{num_bits}({idx})"), + Imm(signed) => write!(fmt, "{signed:x}_i64"), + UImm(unsigned) => write!(fmt, "{unsigned:x}_u64"), + // Say Mem and Reg only once + Mem(mem) => write!(fmt, "{mem:?}"), + Reg(reg) => write!(fmt, "{reg:?}"), + } + } +} + +impl Opnd +{ + /// Convenience constructor for memory operands + pub fn mem(num_bits: u8, base: Opnd, disp: i32) -> Self { + match base { + Opnd::Reg(base_reg) => { + assert!(base_reg.num_bits == 64); + Opnd::Mem(Mem { + base: MemBase::Reg(base_reg.reg_no), + disp: disp, + num_bits: num_bits, + }) + }, + + Opnd::InsnOut{idx, num_bits } => { + assert!(num_bits == 64); + Opnd::Mem(Mem { + base: MemBase::InsnOut(idx), + disp: disp, + num_bits: num_bits, + }) + }, + + _ => unreachable!("memory operand with non-register base") + } + } + + /// Constructor for constant pointer operand + pub fn const_ptr(ptr: *const u8) -> Self { + Opnd::UImm(ptr as u64) + } + + pub fn is_some(&self) -> bool { + match *self { + Opnd::None => false, + _ => true, + } + } + + /// Unwrap a register operand + pub fn unwrap_reg(&self) -> Reg { + match self { + Opnd::Reg(reg) => *reg, + _ => unreachable!("trying to unwrap {:?} into reg", self) + } + } + + /// Get the size in bits for register/memory operands + pub fn rm_num_bits(&self) -> u8 { + match *self { + Opnd::Reg(reg) => reg.num_bits, + Opnd::Mem(mem) => mem.num_bits, + Opnd::InsnOut{ num_bits, .. } => num_bits, + _ => unreachable!() + } + } +} + +impl From for Opnd { + fn from(value: usize) -> Self { + Opnd::UImm(value.try_into().unwrap()) + } +} + +impl From for Opnd { + fn from(value: u64) -> Self { + Opnd::UImm(value.try_into().unwrap()) + } +} + +impl From for Opnd { + fn from(value: i64) -> Self { + Opnd::Imm(value) + } +} + +impl From for Opnd { + fn from(value: i32) -> Self { + Opnd::Imm(value.try_into().unwrap()) + } +} + +impl From for Opnd { + fn from(value: u32) -> Self { + Opnd::UImm(value as u64) + } +} + +impl From for Opnd { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + Opnd::UImm(uimm as u64) + } +} + +/// Branch target (something that we can jump to) +/// for branch instructions +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum Target +{ + CodePtr(CodePtr), // Pointer to a piece of YJIT-generated code (e.g. side-exit) + FunPtr(*const u8), // Pointer to a C function + Label(usize), // A label within the generated code +} + +impl Target +{ + pub fn unwrap_fun_ptr(&self) -> *const u8 { + match self { + Target::FunPtr(ptr) => *ptr, + _ => unreachable!("trying to unwrap {:?} into fun ptr", self) + } + } + + pub fn unwrap_label_idx(&self) -> usize { + match self { + Target::Label(idx) => *idx, + _ => unreachable!() + } + } +} + +impl From for Target { + fn from(code_ptr: CodePtr) -> Self { + Target::CodePtr(code_ptr) + } +} + +type PosMarkerFn = Box; + +/// YJIT IR instruction +pub struct Insn +{ + /// Previous and next instruction (doubly linked list) + pub(super) prev: Option, + pub(super) next: Option, + + /// Other instructions using this instruction's output + pub(super) uses: Vec<(InsnIdx, OpndIdx)>, + + // Opcode for the instruction + pub(super) op: Op, + + // Optional string for comments and labels + pub(super) text: Option, + + // List of input operands/values + pub(super) opnds: Vec, + + // Output operand for this instruction + pub(super) out: Opnd, + + // List of branch targets (branch instructions only) + pub(super) target: Option, + + // Callback to mark the position of this instruction + // in the generated code + pub(super) pos_marker: Option, +} + +/* +impl fmt::Debug for Insn { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{:?}(", self.op)?; + + // Print list of operands + let mut opnd_iter = self.opnds.iter(); + if let Some(first_opnd) = opnd_iter.next() { + write!(fmt, "{first_opnd:?}")?; + } + for opnd in opnd_iter { + write!(fmt, ", {opnd:?}")?; + } + write!(fmt, ")")?; + + // Print text, target, and pos if they are present + if let Some(text) = &self.text { + write!(fmt, " {text:?}")? + } + if let Some(target) = self.target { + write!(fmt, " target={target:?}")?; + } + + write!(fmt, " -> {:?}", self.out) + } +} +*/ + + + + + + +/// Object into which we assemble instructions to be +/// optimized and lowered +pub struct Assembler +{ + /// All instructions created for this assembler (out of order) + pub(super) insns: Vec, + + /// First and last instructions in the linked list + pub(super) first_insn: Option, + pub(super) last_insn: Option, + + /// Names of labels + pub(super) label_names: Vec, + + + + /* + /// FIXME: only compute the live ranges when doing register allocation? + /// + /// Parallel vec with insns + /// Index of the last insn using the output of this insn + //pub(super) live_ranges: Vec, + */ +} + + + + + + + +impl Assembler +{ + pub fn new() -> Assembler { + Assembler { + insns: Vec::default(), + first_insn: None, + last_insn: None, + label_names: Vec::default(), + } + } + + + + + + /// Append an instruction to the list + pub(super) fn push_insn( + &mut self, + op: Op, + opnds: Vec, + target: Option, + text: Option, + pos_marker: Option + ) -> Opnd + { + // Id of this instruction + let insn_idx = self.insns.len() as InsnIdx; + + let mut out_num_bits: u8 = 0; + + for (opnd_idx, opnd) in opnds.iter().enumerate() { + match *opnd { + Opnd::InsnOut{ num_bits, .. } | + Opnd::Mem(Mem { num_bits, .. }) | + Opnd::Reg(Reg { num_bits, .. }) => { + if out_num_bits == 0 { + out_num_bits = num_bits + } + else if out_num_bits != num_bits { + panic!("operands of incompatible sizes"); + } + } + _ => {} + } + + // Track which instructions this insn is using as operands + if let Opnd::InsnOut { idx, .. } = *opnd { + self.insns[idx as usize].uses.push((insn_idx, opnd_idx as OpndIdx)); + } + } + + if out_num_bits == 0 { + out_num_bits = 64; + } + + // Operand for the output of this instruction + let out_opnd = Opnd::InsnOut{ idx: insn_idx, num_bits: out_num_bits }; + + let insn = Insn { + prev: self.last_insn, + next: None, + uses: Vec::default(), + op, + text, + opnds, + out: out_opnd, + target, + pos_marker, + }; + + self.insns.push(insn); + + if let Some(last_insn_idx) = self.last_insn { + self.insns[last_insn_idx as usize].next = Some(insn_idx); + } + self.last_insn = Some(insn_idx); + self.first_insn = self.first_insn.or(Some(insn_idx)); + + // Return an operand for the output of this instruction + out_opnd + } + + /// Replace uses of this instruction by another operand + pub(super) fn replace_uses(&mut self, insn_idx: InsnIdx, replace_with: Opnd) + { + // We're going to clear the vector of uses + let uses = std::mem::take(&mut self.insns[insn_idx as usize].uses); + + // For each use of this instruction + for (use_idx, opnd_idx) in uses { + + // TODO: assert that this is indeed a use of this insn (sanity check) + + let use_insn = &mut self.insns[use_idx as usize]; + use_insn.opnds[opnd_idx as usize] = replace_with; + + // If replace_with is an insn, update its uses + if let Opnd::InsnOut { idx, .. } = replace_with { + let repl_insn = &mut self.insns[idx as usize]; + assert!(repl_insn.prev.is_some() || repl_insn.next.is_some()); + repl_insn.uses.push((use_idx, opnd_idx)); + } + } + } + + /// Remove a specific insn from the assembler + pub(super) fn remove_insn(&mut self, insn_idx: InsnIdx) + { + let prev = self.insns[insn_idx as usize].prev; + let next = self.insns[insn_idx as usize].next; + + match prev { + Some(prev_idx) => { + let prev_insn = &mut self.insns[prev_idx as usize]; + prev_insn.next = next; + } + None => { + assert!(self.first_insn == Some(insn_idx)); + self.first_insn = next; + } + }; + + match next { + Some(next_idx) => { + let next_insn = &mut self.insns[next_idx as usize]; + next_insn.prev = prev; + } + None => { + assert!(self.last_insn == Some(insn_idx)); + self.last_insn = prev; + } + }; + + // Note: we don't remove it from the vec because we do that + // only when we're done with the assembler + } + + + + // TODO: we need an insert_before() + // To insert an instruction before another instruction + + + + + + + // TODO: can we implement some kind of insn_iter()? + // could be useful for the emit passes + + + + + + + + // TODO: use push_insn for comment? + /* + /// Add a comment at the current position + pub fn comment(&mut self, text: &str) + { + let insn = Insn { + op: Op::Comment, + text: Some(text.to_owned()), + opnds: vec![], + out: Opnd::None, + target: None, + pos_marker: None, + }; + self.insns.push(insn); + self.live_ranges.push(self.insns.len()); + } + + /// Bake a string at the current position + pub fn bake_string(&mut self, text: &str) + { + let insn = Insn { + op: Op::BakeString, + text: Some(text.to_owned()), + opnds: vec![], + out: Opnd::None, + target: None, + pos_marker: None, + }; + self.insns.push(insn); + self.live_ranges.push(self.insns.len()); + } + */ + + + + + + + /// Load an address relative to the given label. + #[must_use] + pub fn lea_label(&mut self, target: Target) -> Opnd { + self.push_insn(Op::LeaLabel, vec![], Some(target), None, None) + } + + /// Create a new label instance that we can jump to + pub fn new_label(&mut self, name: &str) -> Target + { + assert!(!name.contains(" "), "use underscores in label names, not spaces"); + + let label_idx = self.label_names.len(); + self.label_names.push(name.to_string()); + Target::Label(label_idx) + } + + + + + // TODO: use push_insn for this? + /* + /// Add a label at the current position + pub fn write_label(&mut self, label: Target) + { + assert!(label.unwrap_label_idx() < self.label_names.len()); + + let insn = Insn { + op: Op::Label, + text: None, + opnds: vec![], + out: Opnd::None, + target: Some(label), + pos_marker: None, + }; + self.insns.push(insn); + self.live_ranges.push(self.insns.len()); + } + */ + + + + + /* + /// Transform input instructions, consumes the input assembler + pub(super) fn forward_pass(mut self, mut map_insn: F) -> Assembler + where F: FnMut(&mut Assembler, usize, Op, Vec, Option, Option, Option) + { + let mut asm = Assembler { + insns: Vec::default(), + live_ranges: Vec::default(), + label_names: self.label_names, + }; + + // Indices maps from the old instruction index to the new instruction + // index. + let mut indices: Vec = Vec::default(); + + // Map an operand to the next set of instructions by correcting previous + // InsnOut indices. + fn map_opnd(opnd: Opnd, indices: &mut Vec) -> Opnd { + match opnd { + Opnd::InsnOut{ idx, num_bits } => { + Opnd::InsnOut{ idx: indices[idx], num_bits } + } + Opnd::Mem(Mem{ base: MemBase::InsnOut(idx), disp, num_bits, }) => { + Opnd::Mem(Mem{ base:MemBase::InsnOut(indices[idx]), disp, num_bits }) + } + _ => opnd + } + } + + for (index, insn) in self.insns.drain(..).enumerate() { + let opnds: Vec = insn.opnds.into_iter().map(|opnd| map_opnd(opnd, &mut indices)).collect(); + + // For each instruction, either handle it here or allow the map_insn + // callback to handle it. + match insn.op { + Op::Comment => { + asm.comment(insn.text.unwrap().as_str()); + }, + _ => { + map_insn(&mut asm, index, insn.op, opnds, insn.target, insn.text, insn.pos_marker); + } + }; + + // Here we're assuming that if we've pushed multiple instructions, + // the output that we're using is still the final instruction that + // was pushed. + indices.push(asm.insns.len() - 1); + } + + asm + } + */ + + + /* + /// Sets the out field on the various instructions that require allocated + /// registers because their output is used as the operand on a subsequent + /// instruction. This is our implementation of the linear scan algorithm. + pub(super) fn alloc_regs(mut self, regs: Vec) -> Assembler + { + //dbg!(&self); + + // First, create the pool of registers. + let mut pool: u32 = 0; + + // Mutate the pool bitmap to indicate that the register at that index + // has been allocated and is live. + fn alloc_reg(pool: &mut u32, regs: &Vec) -> Reg { + for (index, reg) in regs.iter().enumerate() { + if (*pool & (1 << index)) == 0 { + *pool |= 1 << index; + return *reg; + } + } + + unreachable!("Register spill not supported"); + } + + // Allocate a specific register + fn take_reg(pool: &mut u32, regs: &Vec, reg: &Reg) -> Reg { + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); + + if let Some(reg_index) = reg_index { + assert_eq!(*pool & (1 << reg_index), 0); + *pool |= 1 << reg_index; + } + + return *reg; + } + + // Mutate the pool bitmap to indicate that the given register is being + // returned as it is no longer used by the instruction that previously + // held it. + fn dealloc_reg(pool: &mut u32, regs: &Vec, reg: &Reg) { + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); + + if let Some(reg_index) = reg_index { + *pool &= !(1 << reg_index); + } + } + + let live_ranges: Vec = std::mem::take(&mut self.live_ranges); + + let asm = self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { + // Check if this is the last instruction that uses an operand that + // spans more than one instruction. In that case, return the + // allocated register to the pool. + for opnd in &opnds { + match opnd { + Opnd::InsnOut{idx, .. } | + Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { + // Since we have an InsnOut, we know it spans more that one + // instruction. + let start_index = *idx; + assert!(start_index < index); + + // We're going to check if this is the last instruction that + // uses this operand. If it is, we can return the allocated + // register to the pool. + if live_ranges[start_index] == index { + if let Opnd::Reg(reg) = asm.insns[start_index].out { + dealloc_reg(&mut pool, ®s, ®); + } else { + unreachable!("no register allocated for insn {:?}", op); + } + } + } + + _ => {} + } + } + + // C return values need to be mapped to the C return register + if op == Op::CCall { + assert_eq!(pool, 0, "register lives past C function call"); + } + + // If this instruction is used by another instruction, + // we need to allocate a register to it + let mut out_reg = Opnd::None; + if live_ranges[index] != index { + + // C return values need to be mapped to the C return register + if op == Op::CCall { + out_reg = Opnd::Reg(take_reg(&mut pool, ®s, &C_RET_REG)) + } + + // If this instruction's first operand maps to a register and + // this is the last use of the register, reuse the register + // We do this to improve register allocation on x86 + // e.g. out = add(reg0, reg1) + // reg0 = add(reg0, reg1) + if opnds.len() > 0 { + if let Opnd::InsnOut{idx, ..} = opnds[0] { + if live_ranges[idx] == index { + if let Opnd::Reg(reg) = asm.insns[idx].out { + out_reg = Opnd::Reg(take_reg(&mut pool, ®s, ®)) + } + } + } + } + + // Allocate a new register for this instruction + if out_reg == Opnd::None { + out_reg = if op == Op::LiveReg { + // Allocate a specific register + let reg = opnds[0].unwrap_reg(); + Opnd::Reg(take_reg(&mut pool, ®s, ®)) + } else { + Opnd::Reg(alloc_reg(&mut pool, ®s)) + } + } + } + + // Replace InsnOut operands by their corresponding register + let reg_opnds: Vec = opnds.into_iter().map(|opnd| + match opnd { + Opnd::InsnOut{idx, ..} => asm.insns[idx].out, + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { + let out_reg = asm.insns[idx].out.unwrap_reg(); + Opnd::Mem(Mem { + base: MemBase::Reg(out_reg.reg_no), + disp, + num_bits + }) + } + _ => opnd, + } + ).collect(); + + asm.push_insn(op, reg_opnds, target, text, pos_marker); + + // Set the output register for this instruction + let num_insns = asm.insns.len(); + let mut new_insn = &mut asm.insns[num_insns - 1]; + if let Opnd::Reg(reg) = out_reg { + let num_out_bits = new_insn.out.rm_num_bits(); + out_reg = Opnd::Reg(reg.sub_reg(num_out_bits)) + } + new_insn.out = out_reg; + }); + + assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); + asm + } + */ + + + + /* + /// Compile the instructions down to machine code + /// NOTE: should compile return a list of block labels to enable + /// compiling multiple blocks at a time? + pub fn compile(self, cb: &mut CodeBlock) -> Vec + { + let alloc_regs = Self::get_alloc_regs(); + self.compile_with_regs(cb, alloc_regs) + } + + /// Compile with a limited number of registers + pub fn compile_with_num_regs(self, cb: &mut CodeBlock, num_regs: usize) -> Vec + { + let mut alloc_regs = Self::get_alloc_regs(); + let alloc_regs = alloc_regs.drain(0..num_regs).collect(); + self.compile_with_regs(cb, alloc_regs) + } + */ +} + + + + + + + + + + + + + +/* +impl fmt::Debug for Assembler { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "Assembler\n")?; + + for (idx, insn) in self.insns.iter().enumerate() { + write!(fmt, " {idx:03} {insn:?}\n")?; + } + + Ok(()) + } +} +*/ + +impl Assembler +{ + pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd + { + let target = Target::FunPtr(fptr); + self.push_insn(Op::CCall, opnds, Some(target), None, None) + } + + //pub fn pos_marker(&mut self, marker_fn: F) + pub fn pos_marker(&mut self, marker_fn: PosMarkerFn) + { + self.push_insn(Op::PosMarker, vec![], None, None, Some(marker_fn)); + } +} + +macro_rules! def_push_jcc { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + pub fn $op_name(&mut self, target: Target) + { + self.push_insn($opcode, vec![], Some(target), None, None); + } + } + }; +} + +macro_rules! def_push_0_opnd { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + #[must_use] + pub fn $op_name(&mut self) -> Opnd + { + self.push_insn($opcode, vec![], None, None, None) + } + } + }; +} + +macro_rules! def_push_0_opnd_no_out { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + pub fn $op_name(&mut self) + { + self.push_insn($opcode, vec![], None, None, None); + } + } + }; +} + +macro_rules! def_push_1_opnd { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + #[must_use] + pub fn $op_name(&mut self, opnd0: Opnd) -> Opnd + { + self.push_insn($opcode, vec![opnd0], None, None, None) + } + } + }; +} + +macro_rules! def_push_1_opnd_no_out { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + pub fn $op_name(&mut self, opnd0: Opnd) + { + self.push_insn($opcode, vec![opnd0], None, None, None); + } + } + }; +} + +macro_rules! def_push_2_opnd { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + #[must_use] + pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd + { + self.push_insn($opcode, vec![opnd0, opnd1], None, None, None) + } + } + }; +} + +macro_rules! def_push_2_opnd_no_out { + ($op_name:ident, $opcode:expr) => { + impl Assembler + { + pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) + { + self.push_insn($opcode, vec![opnd0, opnd1], None, None, None); + } + } + }; +} + +def_push_1_opnd_no_out!(jmp_opnd, Op::JmpOpnd); +def_push_jcc!(jmp, Op::Jmp); +def_push_jcc!(je, Op::Je); +def_push_jcc!(jne, Op::Jne); +def_push_jcc!(jbe, Op::Jbe); +def_push_jcc!(jz, Op::Jz); +def_push_jcc!(jnz, Op::Jnz); +def_push_jcc!(jo, Op::Jo); +def_push_2_opnd!(add, Op::Add); +def_push_2_opnd!(sub, Op::Sub); +def_push_2_opnd!(and, Op::And); +def_push_1_opnd!(not, Op::Not); +def_push_1_opnd_no_out!(cpush, Op::CPush); +def_push_0_opnd!(cpop, Op::CPop); +def_push_1_opnd_no_out!(cpop_into, Op::CPopInto); +def_push_0_opnd_no_out!(cpush_all, Op::CPushAll); +def_push_0_opnd_no_out!(cpop_all, Op::CPopAll); +def_push_1_opnd_no_out!(cret, Op::CRet); +def_push_1_opnd!(load, Op::Load); +def_push_1_opnd!(load_sext, Op::LoadSExt); +def_push_1_opnd!(lea, Op::Lea); +def_push_1_opnd!(live_reg_opnd, Op::LiveReg); +def_push_2_opnd_no_out!(store, Op::Store); +def_push_2_opnd_no_out!(mov, Op::Mov); +def_push_2_opnd_no_out!(cmp, Op::Cmp); +def_push_2_opnd_no_out!(test, Op::Test); +def_push_0_opnd_no_out!(breakpoint, Op::Breakpoint); +def_push_2_opnd_no_out!(incr_counter, Op::IncrCounter); +def_push_2_opnd!(csel_z, Op::CSelZ); +def_push_2_opnd!(csel_nz, Op::CSelNZ); +def_push_2_opnd!(csel_e, Op::CSelE); +def_push_2_opnd!(csel_ne, Op::CSelNE); +def_push_2_opnd!(csel_l, Op::CSelL); +def_push_2_opnd!(csel_le, Op::CSelLE); +def_push_2_opnd!(csel_g, Op::CSelG); +def_push_2_opnd!(csel_ge, Op::CSelGE); +def_push_0_opnd_no_out!(frame_setup, Op::FrameSetup); +def_push_0_opnd_no_out!(frame_teardown, Op::FrameTeardown); + +#[cfg(test)] +mod tests +{ + use super::*; + + #[test] + fn test_push_insn() + { + let mut asm = Assembler::new(); + let v0 = asm.add(1.into(), 2.into()); + let v1 = asm.add(v0, 3.into()); + } + + #[test] + fn test_replace_insn() + { + let mut asm = Assembler::new(); + let v0 = asm.add(1.into(), 2.into()); + let v1 = asm.add(v0, 3.into()); + + if let Opnd::InsnOut{ idx, ..} = v0 { + asm.replace_uses(idx, 3.into()); + asm.remove_insn(idx); + } + else + { + panic!(); + } + + // Nobody is using v1, but we should still be able to "replace" and remove it + if let Opnd::InsnOut{ idx, ..} = v1 { + asm.replace_uses(idx, 6.into()); + asm.remove_insn(idx); + } + else + { + panic!(); + } + + assert!(asm.first_insn.is_none()); + assert!(asm.last_insn.is_none()); + } + + #[test] + fn test_replace_insn_with_insn() + { + let mut asm = Assembler::new(); + let v0 = asm.add(1.into(), 2.into()); + let v1 = asm.add(v0, 3.into()); + let v2 = asm.add(v0, 4.into()); + + if let Opnd::InsnOut{ idx, ..} = v0 { + let v3 = asm.load(4.into()); + asm.replace_uses(idx, v3); + asm.remove_insn(idx); + } + else + { + panic!(); + } + } +} \ No newline at end of file diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs index 47946950946438..790df0d0326b0e 100644 --- a/yjit/src/backend/mod.rs +++ b/yjit/src/backend/mod.rs @@ -5,4 +5,5 @@ pub mod x86_64; pub mod arm64; pub mod ir; +pub mod ir_ssa; mod tests; diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index e92186da7de5c4..9b19c7d6a0ad65 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -3,13 +3,10 @@ #![allow(clippy::too_many_arguments)] // :shrug: #![allow(clippy::identity_op)] // Sometimes we do it for style - // Temporary while switching to the new backend #![allow(dead_code)] #![allow(unused)] - - mod asm; mod backend; mod codegen; From 16307adf8f0cde3a9383e6c7e39fc718ebc89b42 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 1 Aug 2022 20:06:53 -0700 Subject: [PATCH 425/546] Port only ATTRSET of opt_send_without_block (https://github.com/Shopify/ruby/pull/351) --- bootstraptest/test_yjit.rb | 13 ++++++ yjit/src/codegen.rs | 85 +++++++++++++++++++++++--------------- 2 files changed, 64 insertions(+), 34 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index d82f3de95ea957..2f8c6a8f180812 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3074,3 +3074,16 @@ def function() end function() } + +# opt_send_without_block (VM_METHOD_TYPE_ATTRSET) +assert_equal 'foo', %q{ + class Foo + attr_writer :foo + + def foo() + self.foo = "foo" + end + end + foo = Foo.new + foo.foo +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 73d76759a621c1..ee528006991b98 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1881,6 +1881,7 @@ pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2; // up to 5 different classes pub const SEND_MAX_DEPTH: i32 = 5; +*/ // Codegen for setting an instance variable. // Preconditions: @@ -1890,13 +1891,13 @@ pub const SEND_MAX_DEPTH: i32 = 5; fn gen_set_ivar( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, recv: VALUE, ivar_name: ID, ) -> CodegenStatus { // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Get the operands from the stack let val_opnd = ctx.stack_pop(1); @@ -1905,17 +1906,22 @@ fn gen_set_ivar( let ivar_index: u32 = unsafe { rb_obj_ensure_iv_index_mapping(recv, ivar_name) }; // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value - mov(cb, C_ARG_REGS[0], recv_opnd); - mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index.into())); - mov(cb, C_ARG_REGS[2], val_opnd); - call_ptr(cb, REG0, rb_vm_set_ivar_idx as *const u8); + let val = asm.ccall( + rb_vm_set_ivar_idx as *const u8, + vec![ + recv_opnd, + Opnd::Imm(ivar_index.into()), + val_opnd, + ], + ); let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, RAX); + asm.mov(out_opnd, val); KeepCompiling } +/* // Codegen for getting an instance variable. // Preconditions: // - receiver is in REG0 @@ -3863,6 +3869,7 @@ fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option bool { @@ -3872,6 +3879,7 @@ fn c_method_tracing_currently_enabled(jit: &JITState) -> bool { } } +/* // Similar to args_kw_argv_to_hash. It is called at runtime from within the // generated assembly to build a Ruby hash of the passed keyword arguments. The // keys are the Symbol objects associated with the keywords and the values are @@ -4798,11 +4806,12 @@ fn gen_struct_aset( jump_to_next_insn(jit, ctx, cb, ocb); EndBlock } +*/ fn gen_send_general( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, cd: *const rb_call_data, block: Option, @@ -4824,24 +4833,24 @@ fn gen_send_general( // Don't JIT calls with keyword splat if flags & VM_CALL_KW_SPLAT != 0 { - gen_counter_incr!(cb, send_kw_splat); + gen_counter_incr!(asm, send_kw_splat); return CantCompile; } // Don't JIT calls that aren't simple // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(cb, send_args_splat); + gen_counter_incr!(asm, send_args_splat); return CantCompile; } if flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(cb, send_block_arg); + gen_counter_incr!(asm, send_block_arg); return CantCompile; } // Defer compilation so we can specialize on class of receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -4854,6 +4863,8 @@ fn gen_send_general( // Points to the receiver operand on the stack let recv = ctx.stack_opnd(argc); let recv_opnd = StackOpnd(argc.try_into().unwrap()); + // TODO: Resurrect this once jit_guard_known_klass is implemented for getivar + /* mov(cb, REG0, recv); jit_guard_known_klass( jit, @@ -4865,7 +4876,7 @@ fn gen_send_general( comptime_recv, SEND_MAX_DEPTH, side_exit, - ); + ); */ // Do method lookup let mut cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) }; @@ -4891,7 +4902,7 @@ fn gen_send_general( if flags & VM_CALL_FCALL == 0 { // otherwise we need an ancestry check to ensure the receiver is vaild to be called // as protected - jit_protected_callee_ancestry_guard(jit, cb, ocb, cme, side_exit); + return CantCompile; // jit_protected_callee_ancestry_guard(jit, cb, ocb, cme, side_exit); } } _ => { @@ -4908,25 +4919,26 @@ fn gen_send_general( let def_type = unsafe { get_cme_def_type(cme) }; match def_type { VM_METHOD_TYPE_ISEQ => { - return gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc); + return CantCompile; // return gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc); } VM_METHOD_TYPE_CFUNC => { + return CantCompile; /* return gen_send_cfunc( jit, ctx, - cb, + asm, ocb, ci, cme, block, argc, &comptime_recv_klass, - ); + ); */ } VM_METHOD_TYPE_IVAR => { if argc != 0 { // Argument count mismatch. Getters take no arguments. - gen_counter_incr!(cb, send_getter_arity); + gen_counter_incr!(asm, send_getter_arity); return CantCompile; } @@ -4940,10 +4952,11 @@ fn gen_send_general( // attr_accessor is invalidated and we exit at the closest // instruction boundary which is always outside of the body of // the attr_accessor code. - gen_counter_incr!(cb, send_cfunc_tracing); + gen_counter_incr!(asm, send_cfunc_tracing); return CantCompile; } + return CantCompile; /* mov(cb, REG0, recv); let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; @@ -4958,31 +4971,32 @@ fn gen_send_general( recv_opnd, side_exit, ); + */ } VM_METHOD_TYPE_ATTRSET => { if flags & VM_CALL_KWARG != 0 { - gen_counter_incr!(cb, send_attrset_kwargs); + gen_counter_incr!(asm, send_attrset_kwargs); return CantCompile; } else if argc != 1 || unsafe { !RB_TYPE_P(comptime_recv, RUBY_T_OBJECT) } { - gen_counter_incr!(cb, send_ivar_set_method); + gen_counter_incr!(asm, send_ivar_set_method); return CantCompile; } else if c_method_tracing_currently_enabled(jit) { // Can't generate code for firing c_call and c_return events // See :attr-tracing: - gen_counter_incr!(cb, send_cfunc_tracing); + gen_counter_incr!(asm, send_cfunc_tracing); return CantCompile; } else { let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; - return gen_set_ivar(jit, ctx, cb, comptime_recv, ivar_name); + return gen_set_ivar(jit, ctx, asm, comptime_recv, ivar_name); } } // Block method, e.g. define_method(:foo) { :my_block } VM_METHOD_TYPE_BMETHOD => { - gen_counter_incr!(cb, send_bmethod); + gen_counter_incr!(asm, send_bmethod); return CantCompile; } VM_METHOD_TYPE_ZSUPER => { - gen_counter_incr!(cb, send_zsuper_method); + gen_counter_incr!(asm, send_zsuper_method); return CantCompile; } VM_METHOD_TYPE_ALIAS => { @@ -4991,15 +5005,16 @@ fn gen_send_general( continue; } VM_METHOD_TYPE_UNDEF => { - gen_counter_incr!(cb, send_undef_method); + gen_counter_incr!(asm, send_undef_method); return CantCompile; } VM_METHOD_TYPE_NOTIMPLEMENTED => { - gen_counter_incr!(cb, send_not_implemented_method); + gen_counter_incr!(asm, send_not_implemented_method); return CantCompile; } // Send family of methods, e.g. call/apply VM_METHOD_TYPE_OPTIMIZED => { + return CantCompile; /* let opt_type = unsafe { get_cme_def_body_optimized_type(cme) }; match opt_type { OPTIMIZED_METHOD_TYPE_SEND => { @@ -5042,13 +5057,14 @@ fn gen_send_general( panic!("unknown optimized method type!") } } + */ } VM_METHOD_TYPE_MISSING => { - gen_counter_incr!(cb, send_missing_method); + gen_counter_incr!(asm, send_missing_method); return CantCompile; } VM_METHOD_TYPE_REFINED => { - gen_counter_incr!(cb, send_refined_method); + gen_counter_incr!(asm, send_refined_method); return CantCompile; } _ => { @@ -5061,23 +5077,24 @@ fn gen_send_general( fn gen_opt_send_without_block( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let cd = jit_get_arg(jit, 0).as_ptr(); - gen_send_general(jit, ctx, cb, ocb, cd, None) + gen_send_general(jit, ctx, asm, ocb, cd, None) } +/* fn gen_send( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let cd = jit_get_arg(jit, 0).as_ptr(); let block = jit_get_arg(jit, 1).as_optional_ptr(); - return gen_send_general(jit, ctx, cb, ocb, cd, block); + return gen_send_general(jit, ctx, asm, ocb, cd, block); } fn gen_invokesuper( @@ -6028,7 +6045,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { //YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), //YARVINSN_getblockparam => Some(gen_getblockparam), - //YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), + YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), //YARVINSN_send => Some(gen_send), //YARVINSN_invokesuper => Some(gen_invokesuper), YARVINSN_leave => Some(gen_leave), From 330c9e98506d421778c8f2581a23ba44e4663e06 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 1 Aug 2022 20:39:31 -0700 Subject: [PATCH 426/546] Port anytostring, intern, and toregexp (https://github.com/Shopify/ruby/pull/348) * Port anytostring, intern, and toregexp * Port getspecial to the new backend (#349) PR: https://github.com/Shopify/ruby/pull/349 --- bootstraptest/test_yjit.rb | 16 +++++ yjit/src/codegen.rs | 122 ++++++++++++++++++------------------- 2 files changed, 77 insertions(+), 61 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 2f8c6a8f180812..b8374746f76f15 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3087,3 +3087,19 @@ def foo() foo = Foo.new foo.foo } + +# anytostring, intern +assert_equal 'true', %q{ + def foo() + :"#{true}" + end + foo() +} + +# toregexp +assert_equal '/true/', %q{ + def foo() + /#{true}/ + end + foo().inspect +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ee528006991b98..36cdd55573fb00 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5338,31 +5338,28 @@ fn gen_setglobal( KeepCompiling } -/* fn gen_anytostring( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Save the PC and SP since we might call #to_s - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); let str = ctx.stack_pop(1); let val = ctx.stack_pop(1); - mov(cb, C_ARG_REGS[0], str); - mov(cb, C_ARG_REGS[1], val); - - call_ptr(cb, REG0, rb_obj_as_string_result as *const u8); + let val = asm.ccall(rb_obj_as_string_result as *const u8, vec![str, val]); // Push the return value let stack_ret = ctx.stack_push(Type::TString); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } +/* fn gen_objtostring( jit: &mut JITState, ctx: &mut Context, @@ -5399,25 +5396,23 @@ fn gen_objtostring( gen_send_general(jit, ctx, cb, ocb, cd, None) } } +*/ fn gen_intern( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Save the PC and SP because we might allocate - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); let str = ctx.stack_pop(1); - - mov(cb, C_ARG_REGS[0], str); - - call_ptr(cb, REG0, rb_str_intern as *const u8); + let sym = asm.ccall(rb_str_intern as *const u8, vec![str]); // Push the return value let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, sym); KeepCompiling } @@ -5425,7 +5420,7 @@ fn gen_intern( fn gen_toregexp( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let opt = jit_get_arg(jit, 0).as_i64(); @@ -5433,34 +5428,43 @@ fn gen_toregexp( // Save the PC and SP because this allocates an object and could // raise an exception. - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize))); ctx.stack_pop(cnt); - mov(cb, C_ARG_REGS[0], imm_opnd(0)); - mov(cb, C_ARG_REGS[1], imm_opnd(cnt.try_into().unwrap())); - lea(cb, C_ARG_REGS[2], values_ptr); - call_ptr(cb, REG0, rb_ary_tmp_new_from_values as *const u8); + let ary = asm.ccall( + rb_ary_tmp_new_from_values as *const u8, + vec![ + Opnd::Imm(0), + Opnd::UImm(jit_get_arg(jit, 1).as_u64()), + values_ptr, + ] + ); // Save the array so we can clear it later - push(cb, RAX); - push(cb, RAX); // Alignment - mov(cb, C_ARG_REGS[0], RAX); - mov(cb, C_ARG_REGS[1], imm_opnd(opt)); - call_ptr(cb, REG0, rb_reg_new_ary as *const u8); + asm.cpush(ary); + asm.cpush(ary); // Alignment + + let val = asm.ccall( + rb_reg_new_ary as *const u8, + vec![ + ary, + Opnd::Imm(opt), + ] + ); // The actual regex is in RAX now. Pop the temp array from // rb_ary_tmp_new_from_values into C arg regs so we can clear it - pop(cb, REG1); // Alignment - pop(cb, C_ARG_REGS[0]); + let ary = asm.cpop(); // Alignment + asm.cpop_into(ary); // The value we want to push on the stack is in RAX right now let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); // Clear the temp array. - call_ptr(cb, REG0, rb_ary_clear as *const u8); + asm.ccall(rb_ary_clear as *const u8, vec![ary]); KeepCompiling } @@ -5468,7 +5472,7 @@ fn gen_toregexp( fn gen_getspecial( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // This takes two arguments, key and type @@ -5484,65 +5488,63 @@ fn gen_getspecial( // Fetch a "special" backref based on a char encoded by shifting by 1 // Can raise if matchdata uninitialized - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // call rb_backref_get() - add_comment(cb, "rb_backref_get"); - call_ptr(cb, REG0, rb_backref_get as *const u8); - mov(cb, C_ARG_REGS[0], RAX); + asm.comment("rb_backref_get"); + let backref = asm.ccall(rb_backref_get as *const u8, vec![]); let rt_u8: u8 = (rtype >> 1).try_into().unwrap(); - match rt_u8.into() { + let val = match rt_u8.into() { '&' => { - add_comment(cb, "rb_reg_last_match"); - call_ptr(cb, REG0, rb_reg_last_match as *const u8); + asm.comment("rb_reg_last_match"); + asm.ccall(rb_reg_last_match as *const u8, vec![backref]) } '`' => { - add_comment(cb, "rb_reg_match_pre"); - call_ptr(cb, REG0, rb_reg_match_pre as *const u8); + asm.comment("rb_reg_match_pre"); + asm.ccall(rb_reg_match_pre as *const u8, vec![backref]) } '\'' => { - add_comment(cb, "rb_reg_match_post"); - call_ptr(cb, REG0, rb_reg_match_post as *const u8); + asm.comment("rb_reg_match_post"); + asm.ccall(rb_reg_match_post as *const u8, vec![backref]) } '+' => { - add_comment(cb, "rb_reg_match_last"); - call_ptr(cb, REG0, rb_reg_match_last as *const u8); + asm.comment("rb_reg_match_last"); + asm.ccall(rb_reg_match_last as *const u8, vec![backref]) } _ => panic!("invalid back-ref"), - } + }; let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } else { // Fetch the N-th match from the last backref based on type shifted by 1 // Can raise if matchdata uninitialized - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // call rb_backref_get() - add_comment(cb, "rb_backref_get"); - call_ptr(cb, REG0, rb_backref_get as *const u8); + asm.comment("rb_backref_get"); + let backref = asm.ccall(rb_backref_get as *const u8, vec![]); // rb_reg_nth_match((int)(type >> 1), backref); - add_comment(cb, "rb_reg_nth_match"); - mov( - cb, - C_ARG_REGS[0], - imm_opnd((rtype >> 1).try_into().unwrap()), + asm.comment("rb_reg_nth_match"); + let val = asm.ccall( + rb_reg_nth_match as *const u8, + vec![ + Opnd::Imm((rtype >> 1).try_into().unwrap()), + backref, + ] ); - mov(cb, C_ARG_REGS[1], RAX); - call_ptr(cb, REG0, rb_reg_nth_match as *const u8); let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } } -*/ fn gen_getclassvariable( jit: &mut JITState, @@ -6052,13 +6054,11 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_getglobal => Some(gen_getglobal), YARVINSN_setglobal => Some(gen_setglobal), - /* YARVINSN_anytostring => Some(gen_anytostring), - YARVINSN_objtostring => Some(gen_objtostring), + //YARVINSN_objtostring => Some(gen_objtostring), YARVINSN_intern => Some(gen_intern), YARVINSN_toregexp => Some(gen_toregexp), YARVINSN_getspecial => Some(gen_getspecial), - */ YARVINSN_getclassvariable => Some(gen_getclassvariable), YARVINSN_setclassvariable => Some(gen_setclassvariable), From dea42385440c7abc332d8fda04dbec0f33364baa Mon Sep 17 00:00:00 2001 From: Zack Deveau Date: Tue, 2 Aug 2022 13:09:51 -0400 Subject: [PATCH 427/546] Port gen_concatstring to new backend IR (https://github.com/Shopify/ruby/pull/350) * Port gen_concatstring to new backend IR * Update yjit/src/codegen.rs Co-authored-by: Maxime Chevalier-Boisvert --- bootstraptest/test_yjit.rb | 8 ++++++++ yjit/src/codegen.rs | 19 ++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index b8374746f76f15..2409306106e10a 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3103,3 +3103,11 @@ def foo() end foo().inspect } + +# concatstrings +assert_equal '9001', %q{ + def foo() + "#{9001}" + end + foo() +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 36cdd55573fb00..d75484a5dfe547 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2278,32 +2278,33 @@ fn gen_checktype( } } -/* + fn gen_concatstrings( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let n = jit_get_arg(jit, 0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n.as_isize())); // call rb_str_concat_literals(long n, const VALUE *strings); - mov(cb, C_ARG_REGS[0], imm_opnd(n.into())); - lea(cb, C_ARG_REGS[1], values_ptr); - call_ptr(cb, REG0, rb_str_concat_literals as *const u8); + let return_value = asm.ccall( + rb_str_concat_literals as *const u8, + vec![Opnd::UImm(n.into()), values_ptr] + ); ctx.stack_pop(n.as_usize()); let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, return_value); KeepCompiling } -*/ + fn guard_two_fixnums(ctx: &mut Context, asm: &mut Assembler, side_exit: CodePtr) { // Get the stack operand types @@ -6015,8 +6016,8 @@ fn get_gen_fn(opcode: VALUE) -> Option { //YARVINSN_expandarray => Some(gen_expandarray), YARVINSN_defined => Some(gen_defined), YARVINSN_checkkeyword => Some(gen_checkkeyword), - /* YARVINSN_concatstrings => Some(gen_concatstrings), + /* YARVINSN_getinstancevariable => Some(gen_getinstancevariable), YARVINSN_setinstancevariable => Some(gen_setinstancevariable), From a75a6f7d7a1a2f876c76d1c0f3f56781221c3f68 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 2 Aug 2022 14:27:50 -0400 Subject: [PATCH 428/546] Remove empty lines --- yjit/src/codegen.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index d75484a5dfe547..ef7ccf9a658900 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2278,7 +2278,6 @@ fn gen_checktype( } } - fn gen_concatstrings( jit: &mut JITState, ctx: &mut Context, @@ -2305,7 +2304,6 @@ fn gen_concatstrings( KeepCompiling } - fn guard_two_fixnums(ctx: &mut Context, asm: &mut Assembler, side_exit: CodePtr) { // Get the stack operand types let arg1_type = ctx.get_opnd_type(StackOpnd(0)); From 0823260546f5fd749c3e1e9afadc29f4c6072ef1 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 2 Aug 2022 13:44:17 -0400 Subject: [PATCH 429/546] Implement iterators and double-linked list for IR SSA --- yjit/src/backend/ir_ssa.rs | 248 +++++++++++++++++++++++++------------ 1 file changed, 172 insertions(+), 76 deletions(-) diff --git a/yjit/src/backend/ir_ssa.rs b/yjit/src/backend/ir_ssa.rs index 49974b90b75ace..cd7f03c4faf442 100644 --- a/yjit/src/backend/ir_ssa.rs +++ b/yjit/src/backend/ir_ssa.rs @@ -378,10 +378,6 @@ type PosMarkerFn = Box; /// YJIT IR instruction pub struct Insn { - /// Previous and next instruction (doubly linked list) - pub(super) prev: Option, - pub(super) next: Option, - /// Other instructions using this instruction's output pub(super) uses: Vec<(InsnIdx, OpndIdx)>, @@ -405,6 +401,128 @@ pub struct Insn pub(super) pos_marker: Option, } +impl Insn { + fn new(op: Op, out: Opnd) -> Self { + Self { + uses: Vec::new(), + op, + text: None, + opnds: Vec::default(), + out, + target: None, + pos_marker: None, + } + } +} + +/// A container for an instruction within a doubly-linked list. +struct InsnNode { + insn: Insn, + prev_idx: Option, + next_idx: Option +} + +impl InsnNode { + fn new(insn: Insn, prev_idx: Option) -> Self { + Self { insn, prev_idx, next_idx: None } + } +} + +/// A doubly-linked list containing instructions. +pub(super) struct InsnList { + insns: Vec, + first_idx: Option, + last_idx: Option +} + +impl InsnList { + fn new() -> Self { + Self { insns: Vec::default(), first_idx: None, last_idx: None } + } + + /// Returns the next instruction index that will be generated + fn next_idx(&self) -> InsnIdx { + self.insns.len() as InsnIdx + } + + /// Return a mutable reference to the instruction for the given index + fn get_ref_mut(&mut self, idx: InsnIdx) -> &mut Insn { + &mut self.insns[idx as usize].insn + } + + /// Push a new instruction onto the end of the list + fn push(&mut self, insn: Insn) -> InsnIdx { + let insn_idx = self.next_idx(); + + // Push the new node onto the list + self.insns.push(InsnNode::new(insn, self.last_idx)); + + // Update the first index if it's not already set + self.first_idx = self.first_idx.or(Some(insn_idx)); + + // Update the last node's next_idx field if necessary + if let Some(last_idx) = self.last_idx { + self.insns[last_idx as usize].next_idx = Some(insn_idx); + } + + // Update the last index + self.last_idx = Some(insn_idx); + + insn_idx + } + + /// Remove an instruction from the list at a given index + fn remove(&mut self, insn_idx: InsnIdx) { + let prev_idx = self.insns[insn_idx as usize].prev_idx; + let next_idx = self.insns[insn_idx as usize].next_idx; + + // Update the previous node's next_idx field if necessary + if let Some(prev_idx) = prev_idx { + self.insns[prev_idx as usize].next_idx = next_idx; + } else { + assert_eq!(self.first_idx, Some(insn_idx)); + self.first_idx = next_idx; + } + + // Update the next node's prev_idx field if necessary + if let Some(next_idx) = next_idx { + self.insns[next_idx as usize].prev_idx = prev_idx; + } else { + assert_eq!(self.last_idx, Some(insn_idx)); + self.last_idx = prev_idx; + } + } +} + +/// An iterator that will walk through the list of instructions in order +/// according to the linked list. +pub(super) struct InsnListIterator<'a> { + insn_list: &'a InsnList, + insn_idx: Option +} + +impl<'a> Iterator for InsnListIterator<'a> { + type Item = &'a Insn; + + /// Return an option containing the next instruction in the list. + fn next(&mut self) -> Option { + self.insn_idx.map(|idx| { + let node = &self.insn_list.insns[idx as usize]; + self.insn_idx = node.next_idx; + &node.insn + }) + } +} + +impl<'a> IntoIterator for &'a InsnList { + type Item = &'a Insn; + type IntoIter = InsnListIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + InsnListIterator { insn_list: self, insn_idx: self.first_idx } + } +} + /* impl fmt::Debug for Insn { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { @@ -442,18 +560,12 @@ impl fmt::Debug for Insn { /// optimized and lowered pub struct Assembler { - /// All instructions created for this assembler (out of order) - pub(super) insns: Vec, - - /// First and last instructions in the linked list - pub(super) first_insn: Option, - pub(super) last_insn: Option, + /// The list of instructions created by this assembler + pub(super) insn_list: InsnList, /// Names of labels pub(super) label_names: Vec, - - /* /// FIXME: only compute the live ranges when doing register allocation? /// @@ -471,19 +583,10 @@ pub struct Assembler impl Assembler { - pub fn new() -> Assembler { - Assembler { - insns: Vec::default(), - first_insn: None, - last_insn: None, - label_names: Vec::default(), - } + pub fn new() -> Self { + Self { insn_list: InsnList::new(), label_names: Vec::default() } } - - - - /// Append an instruction to the list pub(super) fn push_insn( &mut self, @@ -494,9 +597,7 @@ impl Assembler pos_marker: Option ) -> Opnd { - // Id of this instruction - let insn_idx = self.insns.len() as InsnIdx; - + let insn_idx = self.insn_list.next_idx(); let mut out_num_bits: u8 = 0; for (opnd_idx, opnd) in opnds.iter().enumerate() { @@ -516,7 +617,7 @@ impl Assembler // Track which instructions this insn is using as operands if let Opnd::InsnOut { idx, .. } = *opnd { - self.insns[idx as usize].uses.push((insn_idx, opnd_idx as OpndIdx)); + self.insn_list.get_ref_mut(idx).uses.push((insn_idx, opnd_idx as OpndIdx)); } } @@ -527,9 +628,7 @@ impl Assembler // Operand for the output of this instruction let out_opnd = Opnd::InsnOut{ idx: insn_idx, num_bits: out_num_bits }; - let insn = Insn { - prev: self.last_insn, - next: None, + self.insn_list.push(Insn { uses: Vec::default(), op, text, @@ -537,15 +636,7 @@ impl Assembler out: out_opnd, target, pos_marker, - }; - - self.insns.push(insn); - - if let Some(last_insn_idx) = self.last_insn { - self.insns[last_insn_idx as usize].next = Some(insn_idx); - } - self.last_insn = Some(insn_idx); - self.first_insn = self.first_insn.or(Some(insn_idx)); + }); // Return an operand for the output of this instruction out_opnd @@ -555,21 +646,21 @@ impl Assembler pub(super) fn replace_uses(&mut self, insn_idx: InsnIdx, replace_with: Opnd) { // We're going to clear the vector of uses - let uses = std::mem::take(&mut self.insns[insn_idx as usize].uses); + let uses = std::mem::take(&mut self.insn_list.get_ref_mut(insn_idx).uses); // For each use of this instruction for (use_idx, opnd_idx) in uses { // TODO: assert that this is indeed a use of this insn (sanity check) - let use_insn = &mut self.insns[use_idx as usize]; + let use_insn = self.insn_list.get_ref_mut(use_idx); use_insn.opnds[opnd_idx as usize] = replace_with; // If replace_with is an insn, update its uses if let Opnd::InsnOut { idx, .. } = replace_with { - let repl_insn = &mut self.insns[idx as usize]; - assert!(repl_insn.prev.is_some() || repl_insn.next.is_some()); - repl_insn.uses.push((use_idx, opnd_idx)); + let repl_insn = &mut self.insn_list.insns[idx as usize]; + assert!(repl_insn.prev_idx.is_some() || repl_insn.next_idx.is_some()); + repl_insn.insn.uses.push((use_idx, opnd_idx)); } } } @@ -577,33 +668,9 @@ impl Assembler /// Remove a specific insn from the assembler pub(super) fn remove_insn(&mut self, insn_idx: InsnIdx) { - let prev = self.insns[insn_idx as usize].prev; - let next = self.insns[insn_idx as usize].next; - - match prev { - Some(prev_idx) => { - let prev_insn = &mut self.insns[prev_idx as usize]; - prev_insn.next = next; - } - None => { - assert!(self.first_insn == Some(insn_idx)); - self.first_insn = next; - } - }; - - match next { - Some(next_idx) => { - let next_insn = &mut self.insns[next_idx as usize]; - next_insn.prev = prev; - } - None => { - assert!(self.last_insn == Some(insn_idx)); - self.last_insn = prev; - } - }; - // Note: we don't remove it from the vec because we do that // only when we're done with the assembler + self.insn_list.remove(insn_idx); } @@ -1118,11 +1185,11 @@ mod tests fn test_replace_insn() { let mut asm = Assembler::new(); - let v0 = asm.add(1.into(), 2.into()); - let v1 = asm.add(v0, 3.into()); + let v0 = asm.add(1_u64.into(), 2_u64.into()); + let v1 = asm.add(v0, 3_u64.into()); if let Opnd::InsnOut{ idx, ..} = v0 { - asm.replace_uses(idx, 3.into()); + asm.replace_uses(idx, 3_u64.into()); asm.remove_insn(idx); } else @@ -1132,7 +1199,7 @@ mod tests // Nobody is using v1, but we should still be able to "replace" and remove it if let Opnd::InsnOut{ idx, ..} = v1 { - asm.replace_uses(idx, 6.into()); + asm.replace_uses(idx, 6_u64.into()); asm.remove_insn(idx); } else @@ -1140,8 +1207,8 @@ mod tests panic!(); } - assert!(asm.first_insn.is_none()); - assert!(asm.last_insn.is_none()); + assert!(asm.insn_list.first_idx.is_none()); + assert!(asm.insn_list.last_idx.is_none()); } #[test] @@ -1162,4 +1229,33 @@ mod tests panic!(); } } -} \ No newline at end of file + + #[test] + fn test_insn_list_push_and_remove() { + let mut insn_list = InsnList::new(); + + let insn_idx = insn_list.push(Insn::new(Op::Load, Opnd::None)); + insn_list.remove(insn_idx); + + assert_eq!(insn_list.first_idx, None); + assert_eq!(insn_list.last_idx, None); + } + + #[test] + fn test_insn_list_iterator() { + let mut insn_list = InsnList::new(); + + let first_insn_idx = insn_list.push(Insn::new(Op::Add, Opnd::None)); + let second_insn_idx = insn_list.push(Insn::new(Op::Sub, Opnd::None)); + let third_insn_idx = insn_list.push(Insn::new(Op::Load, Opnd::None)); + + for (insn_idx, insn) in insn_list.into_iter().enumerate() { + match insn_idx { + 0 => assert_eq!(insn.op, Op::Add), + 1 => assert_eq!(insn.op, Op::Sub), + 2 => assert_eq!(insn.op, Op::Load), + _ => panic!("Unexpected instruction index") + }; + } + } +} From ca68ccdaddb6930f2d6dc9172a8653bcfb340afd Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 2 Aug 2022 15:12:04 -0400 Subject: [PATCH 430/546] Fix C call reg alloc bug reported by Noah & Kokubun --- yjit/src/backend/ir.rs | 4 ++-- yjit/src/backend/tests.rs | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 45d4378eb4337d..f634fb7678be0e 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -633,7 +633,7 @@ impl Assembler let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); if let Some(reg_index) = reg_index { - assert_eq!(*pool & (1 << reg_index), 0); + assert_eq!(*pool & (1 << reg_index), 0, "register already allocated"); *pool |= 1 << reg_index; } @@ -702,7 +702,7 @@ impl Assembler // We do this to improve register allocation on x86 // e.g. out = add(reg0, reg1) // reg0 = add(reg0, reg1) - if opnds.len() > 0 { + else if opnds.len() > 0 { if let Opnd::InsnOut{idx, ..} = opnds[0] { if live_ranges[idx] == index { if let Opnd::Reg(reg) = asm.insns[idx].out { diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index dc56bd4abb9edd..a31e16071bdf75 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -195,9 +195,7 @@ fn test_base_insn_out() fn test_c_call() { c_callable! { - fn dummy_c_fun(v0: usize, v1: usize) - { - } + fn dummy_c_fun(v0: usize, v1: usize) {} } let (mut asm, mut cb) = setup_asm(); @@ -213,6 +211,16 @@ fn test_c_call() asm.compile_with_num_regs(&mut cb, 1); } +#[test] +fn test_alloc_ccall_regs() { + let mut asm = Assembler::new(); + let out1 = asm.ccall(0 as *const u8, vec![]); + let out2 = asm.ccall(0 as *const u8, vec![out1]); + asm.mov(EC, out2); + let mut cb = CodeBlock::new_dummy(1024); + asm.compile_with_regs(&mut cb, Assembler::get_alloc_regs()); +} + #[test] fn test_lea_ret() { From 9db2ca723cac60c2d65865a4851c13cac58ff6a3 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 2 Aug 2022 15:36:27 -0400 Subject: [PATCH 431/546] Add 1 more allocatable reg on arm --- .cirrus.yml | 11 +++++++++++ yjit/src/backend/arm64/mod.rs | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index ef91abd3b26e05..839b4a6c17707e 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -153,4 +153,15 @@ yjit_task: bootstraptest/test_struct.rb \ bootstraptest/test_yjit_new_backend.rb \ bootstraptest/test_yjit_rust_port.rb + + # These are the btests we can't run yet on arm: + #bootstraptest/test_block.rb (missing opt_send) + #bootstraptest/test_insns.rb (missing opt_send) + #bootstraptest/test_literal.rb (displacement bug) + #bootstraptest/test_syntax.rb (missing opt_send) + #bootstraptest/test_thread.rb (deadlock) + #bootstraptest/test_yjit.rb (multiple bugs) + #bootstraptest/test_yjit_30k_ifelse.rb (missing opt_send) + #bootstraptest/test_yjit_30k_methods.rb (missing opt_send) + # full_build_script: make -j diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 99cf08c09cdce2..e0e889c16c902e 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -66,7 +66,7 @@ impl Assembler /// Note: we intentionally exclude C_RET_REG (X0) from this list /// because of the way it's used in gen_leave() and gen_leave_exit() pub fn get_alloc_regs() -> Vec { - vec![X11_REG, X12_REG] + vec![X11_REG, X12_REG, X13_REG] } /// Get a list of all of the caller-saved registers From a95422a69167baba0e4d086b234ad5316d3c39fe Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 4 Aug 2022 10:31:16 -0400 Subject: [PATCH 432/546] Binary OR instruction for the IR (https://github.com/Shopify/ruby/pull/355) --- yjit/src/backend/arm64/mod.rs | 14 +++++++++++++- yjit/src/backend/ir.rs | 5 +++++ yjit/src/backend/x86_64/mod.rs | 6 +++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index e0e889c16c902e..9dc49a7686a5c2 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -175,7 +175,7 @@ impl Assembler } } }, - Op::And => { + Op::And | Op::Or => { match (opnds[0], opnds[1]) { (Opnd::Reg(_), Opnd::Reg(_)) => { asm.and(opnds[0], opnds[1]); @@ -567,6 +567,9 @@ impl Assembler Op::And => { and(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); }, + Op::Or => { + orr(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, Op::Not => { mvn(cb, insn.out.into(), insn.opnds[0].into()); }, @@ -886,6 +889,15 @@ mod tests { asm.compile_with_num_regs(&mut cb, 1); } + #[test] + fn test_emit_or() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.or(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + #[test] fn test_emit_test() { let (mut asm, mut cb) = setup_asm(); diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index f634fb7678be0e..99a084ff026e3d 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -57,6 +57,10 @@ pub enum Op // binary AND operation. And, + // This is the same as the OP_ADD instruction, except that it performs the + // binary OR operation. + Or, + // Perform the NOT operation on an individual operand, and return the result // as a new operand. This operand can then be used as the operand on another // instruction. @@ -899,6 +903,7 @@ def_push_jcc!(jo, Op::Jo); def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); +def_push_2_opnd!(or, Op::Or); def_push_1_opnd!(not, Op::Not); def_push_1_opnd_no_out!(cpush, Op::CPush); def_push_0_opnd!(cpop, Op::CPop); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 13bb106b970989..7074c8980b847a 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -113,7 +113,7 @@ impl Assembler }; match op { - Op::Add | Op::Sub | Op::And | Op::Cmp => { + Op::Add | Op::Sub | Op::And | Op::Cmp | Op::Or => { let (opnd0, opnd1) = match (opnds[0], opnds[1]) { (Opnd::Mem(_), Opnd::Mem(_)) => { (asm.load(opnds[0]), asm.load(opnds[1])) @@ -271,6 +271,10 @@ impl Assembler and(cb, insn.opnds[0].into(), insn.opnds[1].into()) }, + Op::Or => { + or(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Not => { not(cb, insn.opnds[0].into()) }, From fe172aac0465160ec5a02c687ab1dc6ade2c090a Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 4 Aug 2022 09:02:09 -0700 Subject: [PATCH 433/546] Convert getinstancevariable to new backend IR (https://github.com/Shopify/ruby/pull/352) * Convert getinstancevariable to new backend IR * Support mem-based mem * Use more into() * Add tests for getivar * Just load obj_opnd to a register * Apply another into() * Flip the nil-out condition * Fix duplicated counts of side_exit --- yjit/src/backend/ir.rs | 9 +- yjit/src/codegen.rs | 315 ++++++++++++++++++++--------------------- 2 files changed, 160 insertions(+), 164 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 99a084ff026e3d..41842c97045509 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -340,7 +340,14 @@ impl Target pub fn unwrap_label_idx(&self) -> usize { match self { Target::Label(idx) => *idx, - _ => unreachable!() + _ => unreachable!("trying to unwrap {:?} into label", self) + } + } + + pub fn unwrap_code_ptr(&self) -> CodePtr { + match self { + Target::CodePtr(ptr) => *ptr, + _ => unreachable!("trying to unwrap {:?} into code ptr", self) } } } diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ef7ccf9a658900..f08b073e34f40b 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -213,7 +213,7 @@ macro_rules! gen_counter_incr { let counter_opnd = Opnd::mem(64, ptr_reg, 0); // Increment and store the updated value - $asm.incr_counter(counter_opnd, 1.into()); + $asm.incr_counter(counter_opnd, Opnd::UImm(1)); } }; } @@ -236,11 +236,14 @@ macro_rules! counted_exit { let ocb = $ocb.unwrap(); let code_ptr = ocb.get_write_ptr(); + let mut ocb_asm = Assembler::new(); + // Increment the counter - gen_counter_incr!(ocb, $counter_name); + gen_counter_incr!(ocb_asm, $counter_name); // Jump to the existing side exit - jmp_ptr(ocb, $existing_side_exit); + ocb_asm.jmp($existing_side_exit.into()); + ocb_asm.compile(ocb); // Pointer to the side-exit code code_ptr @@ -1805,40 +1808,39 @@ fn gen_checkkeyword( KeepCompiling } -/* fn gen_jnz_to_target0( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, ) { match shape { BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => jnz_ptr(cb, target0), + BranchShape::Default => asm.jnz(target0.into()), } } fn gen_jz_to_target0( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, ) { match shape { BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => jz_ptr(cb, target0), + BranchShape::Default => asm.jz(Target::CodePtr(target0)), } } fn gen_jbe_to_target0( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, ) { match shape { BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => jbe_ptr(cb, target0), + BranchShape::Default => asm.jbe(Target::CodePtr(target0)), } } @@ -1848,7 +1850,7 @@ fn jit_chain_guard( jcc: JCCKinds, jit: &JITState, ctx: &Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, depth_limit: i32, side_exit: CodePtr, @@ -1867,15 +1869,16 @@ fn jit_chain_guard( idx: jit.insn_idx, }; - gen_branch(jit, ctx, cb, ocb, bid, &deeper, None, None, target0_gen_fn); + gen_branch(jit, ctx, asm, ocb, bid, &deeper, None, None, target0_gen_fn); } else { - target0_gen_fn(cb, side_exit, None, BranchShape::Default); + target0_gen_fn(asm, side_exit, None, BranchShape::Default); } } // up to 5 different classes, and embedded or not for each pub const GET_IVAR_MAX_DEPTH: i32 = 10; +/* // hashes and arrays pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2; @@ -1921,26 +1924,33 @@ fn gen_set_ivar( KeepCompiling } -/* + + // Codegen for getting an instance variable. // Preconditions: -// - receiver is in REG0 // - receiver has the same class as CLASS_OF(comptime_receiver) // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled fn gen_get_ivar( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, max_chain_depth: i32, comptime_receiver: VALUE, ivar_name: ID, - reg0_opnd: InsnOpnd, + recv: Opnd, + recv_opnd: InsnOpnd, side_exit: CodePtr, ) -> CodegenStatus { let comptime_val_klass = comptime_receiver.class_of(); let starting_context = *ctx; // make a copy for use with jit_chain_guard + // If recv isn't already a register, load it. + let recv = match recv { + Opnd::Reg(_) => recv, + _ => asm.load(recv), + }; + // Check if the comptime class uses a custom allocator let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; let uses_custom_allocator = match custom_allocator { @@ -1961,46 +1971,26 @@ fn gen_get_ivar( if !receiver_t_object || uses_custom_allocator { // General case. Call rb_ivar_get(). // VALUE rb_ivar_get(VALUE obj, ID id) - add_comment(cb, "call rb_ivar_get()"); + asm.comment("call rb_ivar_get()"); // The function could raise exceptions. - jit_prepare_routine_call(jit, ctx, cb, REG1); + jit_prepare_routine_call(jit, ctx, asm); - mov(cb, C_ARG_REGS[0], REG0); - mov(cb, C_ARG_REGS[1], uimm_opnd(ivar_name)); - call_ptr(cb, REG1, rb_ivar_get as *const u8); + let ivar_val = asm.ccall(rb_ivar_get as *const u8, vec![recv, Opnd::UImm(ivar_name)]); - if reg0_opnd != SelfOpnd { + if recv_opnd != SelfOpnd { ctx.stack_pop(1); } + // Push the ivar on the stack let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, RAX); + asm.mov(out_opnd, ivar_val); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); return EndBlock; } - /* - // FIXME: - // This check was added because of a failure in a test involving the - // Nokogiri Document class where we see a T_DATA that still has the default - // allocator. - // Aaron Patterson argues that this is a bug in the C extension, because - // people could call .allocate() on the class and still get a T_OBJECT - // For now I added an extra dynamic check that the receiver is T_OBJECT - // so we can safely pass all the tests in Shopify Core. - // - // Guard that the receiver is T_OBJECT - // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK) - add_comment(cb, "guard receiver is T_OBJECT"); - mov(cb, REG1, member_opnd(REG0, struct RBasic, flags)); - and(cb, REG1, imm_opnd(RUBY_T_MASK)); - cmp(cb, REG1, imm_opnd(T_OBJECT)); - jit_chain_guard(JCC_JNE, jit, &starting_context, cb, ocb, max_chain_depth, side_exit); - */ - // FIXME: Mapping the index could fail when there is too many ivar names. If we're // compiling for a branch stub that can cause the exception to be thrown from the // wrong PC. @@ -2008,16 +1998,16 @@ fn gen_get_ivar( unsafe { rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name) }.as_usize(); // Pop receiver if it's on the temp stack - if reg0_opnd != SelfOpnd { + if recv_opnd != SelfOpnd { ctx.stack_pop(1); } if USE_RVARGC != 0 { // Check that the ivar table is big enough // Check that the slot is inside the ivar table (num_slots > index) - let num_slots = mem_opnd(32, REG0, ROBJECT_OFFSET_NUMIV); - cmp(cb, num_slots, uimm_opnd(ivar_index as u64)); - jle_ptr(cb, counted_exit!(ocb, side_exit, getivar_idx_out_of_range)); + let num_slots = Opnd::mem(32, recv, ROBJECT_OFFSET_NUMIV); + asm.cmp(num_slots, Opnd::UImm(ivar_index as u64)); + asm.jbe(counted_exit!(ocb, side_exit, getivar_idx_out_of_range).into()); } // Compile time self is embedded and the ivar index lands within the object @@ -2027,15 +2017,15 @@ fn gen_get_ivar( // Guard that self is embedded // TODO: BT and JC is shorter - add_comment(cb, "guard embedded getivar"); - let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS); - test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64)); + asm.comment("guard embedded getivar"); + let flags_opnd = Opnd::mem(64, recv, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, Opnd::UImm(ROBJECT_EMBED as u64)); let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic); jit_chain_guard( JCC_JZ, jit, &starting_context, - cb, + asm, ocb, max_chain_depth, side_exit, @@ -2043,76 +2033,71 @@ fn gen_get_ivar( // Load the variable let offs = ROBJECT_OFFSET_AS_ARY + (ivar_index * SIZEOF_VALUE) as i32; - let ivar_opnd = mem_opnd(64, REG0, offs); - mov(cb, REG1, ivar_opnd); + let ivar_opnd = Opnd::mem(64, recv, offs); // Guard that the variable is not Qundef - cmp(cb, REG1, uimm_opnd(Qundef.into())); - mov(cb, REG0, uimm_opnd(Qnil.into())); - cmove(cb, REG1, REG0); + asm.cmp(ivar_opnd, Qundef.into()); + let out_val = asm.csel_e(Qnil.into(), ivar_opnd); // Push the ivar on the stack let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, REG1); + asm.mov(out_opnd, out_val); } else { // Compile time value is *not* embedded. // Guard that value is *not* embedded // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h - add_comment(cb, "guard extended getivar"); - let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS); - test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64)); - let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic); + asm.comment("guard extended getivar"); + let flags_opnd = Opnd::mem(64, recv, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, Opnd::UImm(ROBJECT_EMBED as u64)); + let megamorphic_side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic); jit_chain_guard( JCC_JNZ, jit, &starting_context, - cb, + asm, ocb, max_chain_depth, - side_exit, + megamorphic_side_exit, ); if USE_RVARGC == 0 { // Check that the extended table is big enough // Check that the slot is inside the extended table (num_slots > index) - let num_slots = mem_opnd(32, REG0, ROBJECT_OFFSET_NUMIV); - cmp(cb, num_slots, uimm_opnd(ivar_index as u64)); - jle_ptr(cb, counted_exit!(ocb, side_exit, getivar_idx_out_of_range)); + let num_slots = Opnd::mem(32, recv, ROBJECT_OFFSET_NUMIV); + asm.cmp(num_slots, Opnd::UImm(ivar_index as u64)); + asm.jbe(Target::CodePtr(counted_exit!(ocb, side_exit, getivar_idx_out_of_range))); } // Get a pointer to the extended table - let tbl_opnd = mem_opnd(64, REG0, ROBJECT_OFFSET_AS_HEAP_IVPTR); - mov(cb, REG0, tbl_opnd); + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR)); // Read the ivar from the extended table - let ivar_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE * ivar_index) as i32); - mov(cb, REG0, ivar_opnd); + let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); // Check that the ivar is not Qundef - cmp(cb, REG0, uimm_opnd(Qundef.into())); - mov(cb, REG1, uimm_opnd(Qnil.into())); - cmove(cb, REG0, REG1); + asm.cmp(ivar_opnd, Qundef.into()); + let out_val = asm.csel_ne(ivar_opnd, Qnil.into()); // Push the ivar on the stack let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, REG0); + asm.mov(out_opnd, out_val); } // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } fn gen_getinstancevariable( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2125,14 +2110,14 @@ fn gen_getinstancevariable( let side_exit = get_side_exit(jit, ocb, ctx); // Guard that the receiver has the same class as the one from compile time. - mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF)); - + let self_asm_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF); jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, comptime_val_klass, + self_asm_opnd, SelfOpnd, comptime_val, GET_IVAR_MAX_DEPTH, @@ -2142,16 +2127,18 @@ fn gen_getinstancevariable( gen_get_ivar( jit, ctx, - cb, + asm, ocb, GET_IVAR_MAX_DEPTH, comptime_val, ivar_name, + self_asm_opnd, SelfOpnd, side_exit, ) } +/* fn gen_setinstancevariable( jit: &mut JITState, ctx: &mut Context, @@ -2488,13 +2475,13 @@ fn gen_equality_specialized( mov(cb, C_ARG_REGS[1], b_opnd); // Guard that a is a String - mov(cb, REG0, C_ARG_REGS[0]); jit_guard_known_klass( jit, ctx, cb, ocb, unsafe { rb_cString }, + C_ARG_REGS[0], StackOpnd(1), comptime_a, SEND_MAX_DEPTH, @@ -2511,7 +2498,6 @@ fn gen_equality_specialized( // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard) let btype = ctx.get_opnd_type(StackOpnd(0)); if btype.known_value_type() != Some(RUBY_T_STRING) { - mov(cb, REG0, C_ARG_REGS[1]); // Note: any T_STRING is valid here, but we check for a ::String for simplicity // To pass a mutable static variable (rb_cString) requires an unsafe block jit_guard_known_klass( @@ -2520,6 +2506,7 @@ fn gen_equality_specialized( cb, ocb, unsafe { rb_cString }, + C_ARG_REGS[1], StackOpnd(0), comptime_b, SEND_MAX_DEPTH, @@ -2673,13 +2660,13 @@ fn gen_opt_aref( let recv_opnd = ctx.stack_opnd(1); // Guard that the receiver is a hash - mov(cb, REG0, recv_opnd); jit_guard_known_klass( jit, ctx, cb, ocb, unsafe { rb_cHash }, + recv_opnd, StackOpnd(1), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, @@ -2735,13 +2722,13 @@ fn gen_opt_aset( let side_exit = get_side_exit(jit, ocb, ctx); // Guard receiver is an Array - mov(cb, REG0, recv); jit_guard_known_klass( jit, ctx, cb, ocb, unsafe { rb_cArray }, + recv, StackOpnd(2), comptime_recv, SEND_MAX_DEPTH, @@ -2749,13 +2736,13 @@ fn gen_opt_aset( ); // Guard key is a fixnum - mov(cb, REG0, key); jit_guard_known_klass( jit, ctx, cb, ocb, unsafe { rb_cInteger }, + key, StackOpnd(1), comptime_key, SEND_MAX_DEPTH, @@ -2788,13 +2775,13 @@ fn gen_opt_aset( let side_exit = get_side_exit(jit, ocb, ctx); // Guard receiver is a Hash - mov(cb, REG0, recv); jit_guard_known_klass( jit, ctx, cb, ocb, unsafe { rb_cHash }, + recv, StackOpnd(2), comptime_recv, SEND_MAX_DEPTH, @@ -3395,20 +3382,19 @@ fn gen_jump( EndBlock } -/* /// Guard that self or a stack operand has the same class as `known_klass`, using /// `sample_instance` to speculate about the shape of the runtime value. /// FIXNUM and on-heap integers are treated as if they have distinct classes, and /// the guard generated for one will fail for the other. /// /// Recompile as contingency if possible, or take side exit a last resort. - fn jit_guard_known_klass( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, known_klass: VALUE, + obj_opnd: Opnd, insn_opnd: InsnOpnd, sample_instance: VALUE, max_chain_depth: i32, @@ -3425,28 +3411,28 @@ fn jit_guard_known_klass( assert!(!val_type.is_heap()); assert!(val_type.is_unknown()); - add_comment(cb, "guard object is nil"); - cmp(cb, REG0, imm_opnd(Qnil.into())); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard object is nil"); + asm.cmp(obj_opnd, Qnil.into()); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::Nil); } else if unsafe { known_klass == rb_cTrueClass } { assert!(!val_type.is_heap()); assert!(val_type.is_unknown()); - add_comment(cb, "guard object is true"); - cmp(cb, REG0, imm_opnd(Qtrue.into())); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard object is true"); + asm.cmp(obj_opnd, Qtrue.into()); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::True); } else if unsafe { known_klass == rb_cFalseClass } { assert!(!val_type.is_heap()); assert!(val_type.is_unknown()); - add_comment(cb, "guard object is false"); + asm.comment("guard object is false"); assert!(Qfalse.as_i32() == 0); - test(cb, REG0, REG0); - jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.test(obj_opnd, obj_opnd); + jit_chain_guard(JCC_JNZ, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::False); } else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() { @@ -3454,32 +3440,35 @@ fn jit_guard_known_klass( // BIGNUM can be handled by the general else case below assert!(val_type.is_unknown()); - add_comment(cb, "guard object is fixnum"); - test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG as i64)); - jit_chain_guard(JCC_JZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard object is fixnum"); + asm.test(obj_opnd, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); + jit_chain_guard(JCC_JZ, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); } else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() { assert!(!val_type.is_heap()); // We will guard STATIC vs DYNAMIC as though they were separate classes // DYNAMIC symbols can be handled by the general else case below - assert!(val_type.is_unknown()); - - add_comment(cb, "guard object is static symbol"); - assert!(RUBY_SPECIAL_SHIFT == 8); - cmp(cb, REG0_8, uimm_opnd(RUBY_SYMBOL_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); + if val_type != Type::ImmSymbol || !val_type.is_imm() { + assert!(val_type.is_unknown()); + + asm.comment("guard object is static symbol"); + assert!(RUBY_SPECIAL_SHIFT == 8); + asm.cmp(obj_opnd, Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); + ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); + } } else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() { assert!(!val_type.is_heap()); - assert!(val_type.is_unknown()); - - // We will guard flonum vs heap float as though they were separate classes - add_comment(cb, "guard object is flonum"); - mov(cb, REG1, REG0); - and(cb, REG1, uimm_opnd(RUBY_FLONUM_MASK as u64)); - cmp(cb, REG1, uimm_opnd(RUBY_FLONUM_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); + if val_type != Type::Flonum || !val_type.is_imm() { + assert!(val_type.is_unknown()); + + // We will guard flonum vs heap float as though they were separate classes + asm.comment("guard object is flonum"); + asm.and(obj_opnd, Opnd::UImm(RUBY_FLONUM_MASK as u64)); + asm.cmp(obj_opnd, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); + ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); + } } else if unsafe { FL_TEST(known_klass, VALUE(RUBY_FL_SINGLETON as usize)) != VALUE(0) && sample_instance == rb_attr_get(known_klass, id__attached__ as ID) @@ -3494,35 +3483,42 @@ fn jit_guard_known_klass( // that its singleton class is empty, so we can't avoid the memory // access. As an example, `Object.new.singleton_class` is an object in // this situation. - add_comment(cb, "guard known object with singleton class"); - // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object. - jit_mov_gc_ptr(jit, cb, REG1, sample_instance); - cmp(cb, REG0, REG1); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard known object with singleton class"); + asm.cmp(obj_opnd, sample_instance.into()); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); + } else if val_type == Type::CString && unsafe { known_klass == rb_cString } { + // guard elided because the context says we've already checked + unsafe { + assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String") + }; } else { assert!(!val_type.is_imm()); // Check that the receiver is a heap object // Note: if we get here, the class doesn't have immediate instances. if !val_type.is_heap() { - add_comment(cb, "guard not immediate"); + asm.comment("guard not immediate"); assert!(Qfalse.as_i32() < Qnil.as_i32()); - test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64)); - jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); - cmp(cb, REG0, imm_opnd(Qnil.into())); - jit_chain_guard(JCC_JBE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.test(obj_opnd, Opnd::Imm(RUBY_IMMEDIATE_MASK as i64)); + jit_chain_guard(JCC_JNZ, jit, ctx, asm, ocb, max_chain_depth, side_exit); + asm.cmp(obj_opnd, Qnil.into()); + jit_chain_guard(JCC_JBE, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); } - let klass_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS); + // If obj_opnd isn't already a register, load it. + let obj_opnd = match obj_opnd { + Opnd::Reg(_) => obj_opnd, + _ => asm.load(obj_opnd), + }; + let klass_opnd = Opnd::mem(64, obj_opnd, RUBY_OFFSET_RBASIC_KLASS); // Bail if receiver class is different from known_klass // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class. - add_comment(cb, "guard known class"); - jit_mov_gc_ptr(jit, cb, REG1, known_klass); - cmp(cb, klass_opnd, REG1); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard known class"); + asm.cmp(klass_opnd, known_klass.into()); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); if known_klass == unsafe { rb_cString } { ctx.upgrade_opnd_type(insn_opnd, Type::CString); @@ -3530,6 +3526,7 @@ fn jit_guard_known_klass( } } +/* // Generate ancestry guard for protected callee. // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee). fn jit_protected_callee_ancestry_guard( @@ -3774,25 +3771,18 @@ fn jit_rb_str_concat( let side_exit = get_side_exit(jit, ocb, ctx); // Guard that the argument is of class String at runtime. - let insn_opnd = StackOpnd(0); - let arg_opnd = ctx.stack_opnd(0); - mov(cb, REG0, arg_opnd); - let arg_type = ctx.get_opnd_type(insn_opnd); - - if arg_type != Type::CString && arg_type != Type::TString { - if !arg_type.is_heap() { - add_comment(cb, "guard arg not immediate"); - test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64)); - jnz_ptr(cb, side_exit); - cmp(cb, REG0, imm_opnd(Qnil.into())); - jbe_ptr(cb, side_exit); - - ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); - } - guard_object_is_string(cb, REG0, REG1, side_exit); - // We know this has type T_STRING, but not necessarily that it's a ::String - ctx.upgrade_opnd_type(insn_opnd, Type::TString); - } + jit_guard_known_klass( + jit, + ctx, + cb, + ocb, + unsafe { rb_cString }, + ctx.stack_opnd(0), + StackOpnd(0), + comptime_arg, + SEND_MAX_DEPTH, + side_exit, + ); let concat_arg = ctx.stack_pop(1); let recv = ctx.stack_pop(1); @@ -4682,7 +4672,7 @@ fn gen_send_iseq( gen_branch( jit, ctx, - cb, + asm, ocb, return_block, &return_ctx, @@ -4864,13 +4854,13 @@ fn gen_send_general( let recv_opnd = StackOpnd(argc.try_into().unwrap()); // TODO: Resurrect this once jit_guard_known_klass is implemented for getivar /* - mov(cb, REG0, recv); jit_guard_known_klass( jit, ctx, cb, ocb, comptime_recv_klass, + recv, recv_opnd, comptime_recv, SEND_MAX_DEPTH, @@ -5257,14 +5247,13 @@ fn gen_leave( // Only the return value should be on the stack assert!(ctx.get_stack_size() == 1); - // FIXME - /* // Create a side-exit to fall back to the interpreter - //let side_exit = get_side_exit(jit, ocb, ctx); + let side_exit = get_side_exit(jit, ocb, ctx); + let mut ocb_asm = Assembler::new(); // Check for interrupts - //gen_check_ints(cb, counted_exit!(ocb, side_exit, leave_se_interrupt)); - */ + gen_check_ints(asm, counted_exit!(ocb, side_exit, leave_se_interrupt)); + ocb_asm.compile(ocb.unwrap()); // Pop the current frame (ec->cfp++) // Note: the return PC is already in the previous CFP @@ -5376,13 +5365,13 @@ fn gen_objtostring( if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRING) } { let side_exit = get_side_exit(jit, ocb, ctx); - mov(cb, REG0, recv); jit_guard_known_klass( jit, ctx, cb, ocb, comptime_recv.class_of(), + recv, StackOpnd(0), comptime_recv, SEND_MAX_DEPTH, @@ -6015,10 +6004,10 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_defined => Some(gen_defined), YARVINSN_checkkeyword => Some(gen_checkkeyword), YARVINSN_concatstrings => Some(gen_concatstrings), - /* YARVINSN_getinstancevariable => Some(gen_getinstancevariable), - YARVINSN_setinstancevariable => Some(gen_setinstancevariable), + //YARVINSN_setinstancevariable => Some(gen_setinstancevariable), + /* YARVINSN_opt_eq => Some(gen_opt_eq), YARVINSN_opt_neq => Some(gen_opt_neq), YARVINSN_opt_aref => Some(gen_opt_aref), From 24ddc07d6ee02620b8be7b4defd903897bb97845 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Thu, 4 Aug 2022 10:12:25 -0700 Subject: [PATCH 434/546] Fix live_ranges idx calculation (https://github.com/Shopify/ruby/pull/353) forward_pass adjusts the indexes of our opnds to reflect the new instructions as they are generated in the forward pass. However, we were using the old live_ranges array, for which the new indexes are incorrect. This caused us to previously generate an IR which contained unnecessary trivial load instructions (ex. mov rax, rax), because it was looking at the wrong lifespans. Presumably this could also cause bugs because the lifespan of the incorrectly considered operand idx could be short. We've added an assert which would have failed on the previous trivial case (but not necessarily all cases). Co-authored-by: Matthew Draper --- yjit/src/backend/ir.rs | 7 ++++--- yjit/src/backend/x86_64/mod.rs | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 41842c97045509..066e9dd4ce4677 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -567,7 +567,7 @@ impl Assembler /// Transform input instructions, consumes the input assembler pub(super) fn forward_pass(mut self, mut map_insn: F) -> Assembler - where F: FnMut(&mut Assembler, usize, Op, Vec, Option, Option, Option) + where F: FnMut(&mut Assembler, usize, Op, Vec, Option, Option, Option, Vec) { let mut asm = Assembler { insns: Vec::default(), @@ -594,6 +594,7 @@ impl Assembler } for (index, insn) in self.insns.drain(..).enumerate() { + let original_opnds = insn.opnds.clone(); let opnds: Vec = insn.opnds.into_iter().map(|opnd| map_opnd(opnd, &mut indices)).collect(); // For each instruction, either handle it here or allow the map_insn @@ -603,7 +604,7 @@ impl Assembler asm.comment(insn.text.unwrap().as_str()); }, _ => { - map_insn(&mut asm, index, insn.op, opnds, insn.target, insn.text, insn.pos_marker); + map_insn(&mut asm, index, insn.op, opnds, insn.target, insn.text, insn.pos_marker, original_opnds); } }; @@ -664,7 +665,7 @@ impl Assembler let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - let asm = self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { + let asm = self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_insns| { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 7074c8980b847a..c3885be811365a 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -94,7 +94,7 @@ impl Assembler { let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { + self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_opnds| { // Load heap object operands into registers because most // instructions can't directly work with 64-bit constants let opnds = match op { @@ -134,7 +134,17 @@ impl Assembler } }, // Instruction output whose live range spans beyond this instruction - (Opnd::InsnOut { idx, .. }, _) => { + (Opnd::InsnOut { .. }, _) => { + let idx = match original_opnds[0] { + Opnd::InsnOut { idx, .. } => { + idx + }, + _ => panic!("nooooo") + }; + + // Our input must be from a previous instruction! + assert!(idx < index); + if live_ranges[idx] > index { (asm.load(opnds[0]), opnds[1]) } else { From c91a44cba49c8164982f241c00dc7c6418a4757b Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 4 Aug 2022 14:29:18 -0400 Subject: [PATCH 435/546] Fix forward_pass usage in arm backend following John's PR --- yjit/src/backend/arm64/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 9dc49a7686a5c2..4b0b0340885413 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -143,7 +143,7 @@ impl Assembler } } - self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { + self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_opnds| { // Load all Value operands into registers that aren't already a part // of Load instructions. let opnds = match op { From 4539c21367cf63e34afe4f14656779d33883647b Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 4 Aug 2022 11:47:53 -0700 Subject: [PATCH 436/546] Port gen_send_cfunc to the new backend (https://github.com/Shopify/ruby/pull/357) * Port gen_send_cfunc to the new backend * Remove an obsoleted test * Add more cfunc tests * Use csel_e instead and more into() Co-authored-by: Maxime Chevalier-Boisvert * Add a missing lea for build_kwargs * Split cfunc test cases Co-authored-by: Maxime Chevalier-Boisvert --- bootstraptest/test_yjit.rb | 32 ++++++++ yjit/src/codegen.rs | 162 ++++++++++++++++--------------------- 2 files changed, 103 insertions(+), 91 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 2409306106e10a..16eda7fa84ecc0 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3111,3 +3111,35 @@ def foo() end foo() } + +# opt_send_without_block (VM_METHOD_TYPE_CFUNC) +assert_equal 'nil', %q{ + def foo + nil.inspect # argc: 0 + end + foo +} +assert_equal '4', %q{ + def foo + 2.pow(2) # argc: 1 + end + foo +} +assert_equal 'aba', %q{ + def foo + "abc".tr("c", "a") # argc: 2 + end + foo +} +assert_equal 'true', %q{ + def foo + respond_to?(:inspect) # argc: -1 + end + foo +} +assert_equal '["a", "b"]', %q{ + def foo + "a\nb".lines(chomp: true) # kwargs + end + foo +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index f08b073e34f40b..c89cfc366f8197 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1881,10 +1881,10 @@ pub const GET_IVAR_MAX_DEPTH: i32 = 10; /* // hashes and arrays pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2; +*/ // up to 5 different classes pub const SEND_MAX_DEPTH: i32 = 5; -*/ // Codegen for setting an instance variable. // Preconditions: @@ -2221,7 +2221,7 @@ fn gen_checktype( // Only three types are emitted by compile.c at the moment if let RUBY_T_STRING | RUBY_T_ARRAY | RUBY_T_HASH = type_val { let val_type = ctx.get_opnd_type(StackOpnd(0)); - let val = ctx.stack_pop(1); + let val = asm.load(ctx.stack_pop(1)); // Check if we know from type information match val_type.known_value_type() { @@ -2253,7 +2253,7 @@ fn gen_checktype( Opnd::mem(64, val, RUBY_OFFSET_RBASIC_FLAGS), Opnd::UImm(RUBY_T_MASK.into())); asm.cmp(object_type, Opnd::UImm(type_val.into())); - let ret_opnd = asm.csel_e(Opnd::UImm(Qfalse.into()), Opnd::UImm(Qtrue.into())); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); asm.write_label(ret); let stack_ret = ctx.stack_push(Type::UnknownImm); @@ -3851,6 +3851,7 @@ fn jit_thread_s_current( mov(cb, stack_ret, REG0); true } +*/ // Check if we know how to codegen for a particular cfunc method fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option { @@ -3858,7 +3859,6 @@ fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option bool { @@ -3868,7 +3868,6 @@ fn c_method_tracing_currently_enabled(jit: &JITState) -> bool { } } -/* // Similar to args_kw_argv_to_hash. It is called at runtime from within the // generated assembly to build a Ruby hash of the passed keyword arguments. The // keys are the Symbol objects associated with the keywords and the values are @@ -3889,7 +3888,7 @@ unsafe extern "C" fn build_kwhash(ci: *const rb_callinfo, sp: *const VALUE) -> V fn gen_send_cfunc( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -3902,7 +3901,7 @@ fn gen_send_cfunc( // If the function expects a Ruby array of arguments if cfunc_argc < 0 && cfunc_argc != -1 { - gen_counter_incr!(cb, send_cfunc_ruby_array_varg); + gen_counter_incr!(asm, send_cfunc_ruby_array_varg); return CantCompile; } @@ -3923,19 +3922,19 @@ fn gen_send_cfunc( // If the argument count doesn't match if cfunc_argc >= 0 && cfunc_argc != passed_argc { - gen_counter_incr!(cb, send_cfunc_argc_mismatch); + gen_counter_incr!(asm, send_cfunc_argc_mismatch); return CantCompile; } // Don't JIT functions that need C stack arguments for now - if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_REGS.len() as i32) { - gen_counter_incr!(cb, send_cfunc_toomany_args); + if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) { + gen_counter_incr!(asm, send_cfunc_toomany_args); return CantCompile; } if c_method_tracing_currently_enabled(jit) { // Don't JIT if tracing c_call or c_return - gen_counter_incr!(cb, send_cfunc_tracing); + gen_counter_incr!(asm, send_cfunc_tracing); return CantCompile; } @@ -3943,6 +3942,7 @@ fn gen_send_cfunc( if kw_arg.is_null() { let codegen_p = lookup_cfunc_codegen(unsafe { (*cme).def }); if let Some(known_cfunc_codegen) = codegen_p { + return CantCompile; /* let start_pos = cb.get_write_ptr().raw_ptr() as usize; if known_cfunc_codegen(jit, ctx, cb, ocb, ci, cme, block, argc, recv_known_klass) { let written_bytes = cb.get_write_ptr().raw_ptr() as usize - start_pos; @@ -3955,6 +3955,7 @@ fn gen_send_cfunc( jump_to_next_insn(jit, ctx, cb, ocb); return EndBlock; } + */ } } @@ -3962,57 +3963,49 @@ fn gen_send_cfunc( let side_exit = get_side_exit(jit, ocb, ctx); // Check for interrupts - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); // Stack overflow check // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) // REG_CFP <= REG_SP + 4 * SIZEOF_VALUE + sizeof(rb_control_frame_t) - add_comment(cb, "stack overflow check"); - lea( - cb, - REG0, - ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize), - ); - cmp(cb, REG_CFP, REG0); - jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow)); + asm.comment("stack overflow check"); + let stack_limit = asm.lea(ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize)); + asm.cmp(CFP, stack_limit); + asm.jbe(counted_exit!(ocb, side_exit, send_se_cf_overflow).into()); // Points to the receiver operand on the stack let recv = ctx.stack_opnd(argc); // Store incremented PC into current control frame in case callee raises. - jit_save_pc(jit, cb, REG0); + jit_save_pc(jit, asm); if let Some(block_iseq) = block { // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases // with cfp->block_code. - jit_mov_gc_ptr(jit, cb, REG0, VALUE(block_iseq as usize)); - let block_code_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE); - mov(cb, block_code_opnd, REG0); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), Opnd::UImm(block_iseq as u64)); } // Increment the stack pointer by 3 (in the callee) // sp += 3 - lea(cb, REG0, ctx.sp_opnd((SIZEOF_VALUE as isize) * 3)); + let sp = asm.lea(ctx.sp_opnd((SIZEOF_VALUE as isize) * 3)); // Write method entry at sp[-3] // sp[-3] = me; // Put compile time cme into REG1. It's assumed to be valid because we are notified when // any cme we depend on become outdated. See yjit_method_lookup_change(). - jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize)); - mov(cb, mem_opnd(64, REG0, 8 * -3), REG1); + asm.mov(Opnd::mem(64, sp, 8 * -3), Opnd::UImm(cme as u64)); // Write block handler at sp[-2] // sp[-2] = block_handler; if let Some(_block_iseq) = block { // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); - let cfp_self = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF); - lea(cb, REG1, cfp_self); - or(cb, REG1, imm_opnd(1)); - mov(cb, mem_opnd(64, REG0, 8 * -2), REG1); + let cfp_self = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + let block_handler = asm.or(cfp_self, Opnd::Imm(1)); + asm.mov(Opnd::mem(64, sp, 8 * -2), block_handler); } else { - let dst_opnd = mem_opnd(64, REG0, 8 * -2); - mov(cb, dst_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into())); + let dst_opnd = Opnd::mem(64, sp, 8 * -2); + asm.mov(dst_opnd, Opnd::UImm(VM_BLOCK_HANDLER_NONE.into())); } // Write env flags at sp[-1] @@ -4021,11 +4014,12 @@ fn gen_send_cfunc( if !kw_arg.is_null() { frame_type |= VM_FRAME_FLAG_CFRAME_KW } - mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into())); + asm.mov(Opnd::mem(64, sp, 8 * -1), Opnd::UImm(frame_type.into())); // Allocate a new CFP (ec->cfp--) - let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP); - sub(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); + let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); + let new_cfp = asm.sub(ec_cfp_opnd, Opnd::UImm(RUBY_SIZEOF_CONTROL_FRAME as u64)); + asm.store(ec_cfp_opnd, new_cfp); // Setup the new frame // *cfp = (const struct rb_control_frame_struct) { @@ -4039,22 +4033,15 @@ fn gen_send_cfunc( // }; // Can we re-use ec_cfp_opnd from above? - let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP); - mov(cb, REG1, ec_cfp_opnd); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_PC), imm_opnd(0)); - - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SP), REG0); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_ISEQ), imm_opnd(0)); - mov( - cb, - mem_opnd(64, REG1, RUBY_OFFSET_CFP_BLOCK_CODE), - imm_opnd(0), - ); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_BP), REG0); - sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64)); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_EP), REG0); - mov(cb, REG0, recv); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SELF), REG0); + let ec_cfp_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_PC), Opnd::Imm(0)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_SP), sp); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_ISEQ), Opnd::Imm(0)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_BLOCK_CODE), Opnd::Imm(0)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_BP), sp); + let ep = asm.sub(sp, Opnd::UImm(SIZEOF_VALUE as u64)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_EP), ep); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_SELF), recv); /* // Verify that we are calling the right function @@ -4070,71 +4057,66 @@ fn gen_send_cfunc( if !kw_arg.is_null() { // Build a hash from all kwargs passed - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], VALUE(ci as usize)); - lea(cb, C_ARG_REGS[1], ctx.sp_opnd(0)); - call_ptr(cb, REG0, build_kwhash as *const u8); + asm.comment("build_kwhash"); + let sp = asm.lea(ctx.sp_opnd(0)); + let kwargs = asm.ccall(build_kwhash as *const u8, vec![Opnd::UImm(ci as u64), sp]); // Replace the stack location at the start of kwargs with the new hash let stack_opnd = ctx.stack_opnd(argc - passed_argc); - mov(cb, stack_opnd, RAX); + asm.mov(stack_opnd, kwargs); } - // Copy SP into RAX because REG_SP will get overwritten - lea(cb, RAX, ctx.sp_opnd(0)); + // Copy SP because REG_SP will get overwritten + let sp = asm.lea(ctx.sp_opnd(0)); // Pop the C function arguments from the stack (in the caller) ctx.stack_pop((argc + 1).try_into().unwrap()); // Write interpreter SP into CFP. // Needed in case the callee yields to the block. - gen_save_sp(cb, ctx); + gen_save_sp(jit, asm, ctx); // Non-variadic method - if cfunc_argc >= 0 { + let args = if cfunc_argc >= 0 { // Copy the arguments from the stack to the C argument registers // self is the 0th argument and is at index argc from the stack top - for i in 0..=passed_argc as usize { - let stack_opnd = mem_opnd(64, RAX, -(argc + 1 - (i as i32)) * SIZEOF_VALUE_I32); - let c_arg_reg = C_ARG_REGS[i]; - mov(cb, c_arg_reg, stack_opnd); - } + (0..=passed_argc).map(|i| + Opnd::mem(64, sp, -(argc + 1 - (i as i32)) * SIZEOF_VALUE_I32) + ).collect() } - // Variadic method - if cfunc_argc == -1 { + else if cfunc_argc == -1 { // The method gets a pointer to the first argument // rb_f_puts(int argc, VALUE *argv, VALUE recv) - mov(cb, C_ARG_REGS[0], imm_opnd(passed_argc.into())); - lea( - cb, - C_ARG_REGS[1], - mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE_I32), - ); - mov( - cb, - C_ARG_REGS[2], - mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE_I32), - ); + vec![ + Opnd::Imm(passed_argc.into()), + asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32)), + Opnd::mem(64, sp, -(argc + 1) * SIZEOF_VALUE_I32), + ] } + else { + panic!("unexpected cfunc_args: {}", cfunc_argc) + }; // Call the C function // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]); // cfunc comes from compile-time cme->def, which we assume to be stable. // Invalidation logic is in yjit_method_lookup_change() - add_comment(cb, "call C function"); - call_ptr(cb, REG0, unsafe { get_mct_func(cfunc) }); + asm.comment("call C function"); + let ret = asm.ccall(unsafe { get_mct_func(cfunc) }, args); // Record code position for TracePoint patching. See full_cfunc_return(). - record_global_inval_patch(cb, CodegenGlobals::get_outline_full_cfunc_return_pos()); + record_global_inval_patch(asm, CodegenGlobals::get_outline_full_cfunc_return_pos()); // Push the return value on the Ruby stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, ret); // Pop the stack frame (ec->cfp++) // Can we reuse ec_cfp_opnd from above? - let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP); - add(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); + let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); + let new_cfp = asm.add(ec_cfp_opnd, Opnd::UImm(RUBY_SIZEOF_CONTROL_FRAME as u64)); + asm.store(ec_cfp_opnd, new_cfp); // cfunc calls may corrupt types ctx.clear_local_types(); @@ -4144,10 +4126,11 @@ fn gen_send_cfunc( // Jump (fall through) to the call continuation block // We do this to end the current block after the call - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } +/* fn gen_return_branch( cb: &mut CodeBlock, target0: CodePtr, @@ -4852,12 +4835,10 @@ fn gen_send_general( // Points to the receiver operand on the stack let recv = ctx.stack_opnd(argc); let recv_opnd = StackOpnd(argc.try_into().unwrap()); - // TODO: Resurrect this once jit_guard_known_klass is implemented for getivar - /* jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, comptime_recv_klass, recv, @@ -4865,7 +4846,7 @@ fn gen_send_general( comptime_recv, SEND_MAX_DEPTH, side_exit, - ); */ + ); // Do method lookup let mut cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) }; @@ -4911,7 +4892,6 @@ fn gen_send_general( return CantCompile; // return gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc); } VM_METHOD_TYPE_CFUNC => { - return CantCompile; /* return gen_send_cfunc( jit, ctx, @@ -4922,7 +4902,7 @@ fn gen_send_general( block, argc, &comptime_recv_klass, - ); */ + ); } VM_METHOD_TYPE_IVAR => { if argc != 0 { From ddee4d3af8859d30e3714ac544828d5b76027093 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Wed, 3 Aug 2022 18:25:01 -0400 Subject: [PATCH 437/546] Opnd::Value fixes (https://github.com/Shopify/ruby/pull/354) * Fix asm.load(VALUE) - `>` didn't track that the value is a value - `Iterator::map` doesn't evaluate the closure you give it until you call `collect`. Use a for loop instead so we put the gc offsets into the compiled block properly. * x64: Mov(mem, VALUE) should load the value first Tripped in codegen for putobject now that we are actually feeding `Opnd::Value` into the backend. * x64 split: Canonicallize VALUE loads * Update yjit/src/backend/x86_64/mod.rs --- yjit/src/backend/ir.rs | 3 +-- yjit/src/backend/x86_64/mod.rs | 20 +++++++++++--------- yjit/src/codegen.rs | 4 +++- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 066e9dd4ce4677..a23b27dda26921 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -313,8 +313,7 @@ impl From for Opnd { impl From for Opnd { fn from(value: VALUE) -> Self { - let VALUE(uimm) = value; - Opnd::UImm(uimm as u64) + Opnd::Value(value) } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index c3885be811365a..d1f1698b2fdb24 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -95,20 +95,22 @@ impl Assembler let live_ranges: Vec = std::mem::take(&mut self.live_ranges); self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_opnds| { - // Load heap object operands into registers because most - // instructions can't directly work with 64-bit constants + // Load VALUEs into registers because + // - Most instructions can't be encoded with 64-bit immediates. + // - We look for Op::Load specifically when emiting to keep GC'ed + // VALUEs alive. This is a sort of canonicalization. let opnds = match op { - Op::Load | Op::Mov => opnds, + Op::Load => opnds, _ => opnds.into_iter().map(|opnd| { if let Opnd::Value(value) = opnd { - if !value.special_const_p() { - asm.load(opnd) - } else { - opnd + // Since mov(mem64, imm32) sign extends, as_i64() makes sure we split + // when the extended value is different. + if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { + return asm.load(opnd); } - } else { - opnd } + + opnd }).collect() }; diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index c89cfc366f8197..903e899888d981 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -849,7 +849,9 @@ pub fn gen_single_block( let mut block = jit.block.borrow_mut(); // Add the GC offsets to the block - gc_offsets.iter().map(|offs| { block.add_gc_obj_offset(*offs) }); + for offset in gc_offsets { + block.add_gc_obj_offset(offset) + } // Mark the end position of the block block.set_end_addr(cb.get_write_ptr()); From d63b3e43bd0a2dac5447855e3fae2c3de3499e53 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 4 Aug 2022 16:47:35 -0400 Subject: [PATCH 438/546] Enable test_thread.rb on arm, now working --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 839b4a6c17707e..18f8ae5b255110 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -151,6 +151,7 @@ yjit_task: bootstraptest/test_proc.rb \ bootstraptest/test_string.rb \ bootstraptest/test_struct.rb \ + bootstraptest/test_thread.rb \ bootstraptest/test_yjit_new_backend.rb \ bootstraptest/test_yjit_rust_port.rb @@ -159,7 +160,6 @@ yjit_task: #bootstraptest/test_insns.rb (missing opt_send) #bootstraptest/test_literal.rb (displacement bug) #bootstraptest/test_syntax.rb (missing opt_send) - #bootstraptest/test_thread.rb (deadlock) #bootstraptest/test_yjit.rb (multiple bugs) #bootstraptest/test_yjit_30k_ifelse.rb (missing opt_send) #bootstraptest/test_yjit_30k_methods.rb (missing opt_send) From b024b18f56df7e18bc744905f6d2e3888fc3900f Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 4 Aug 2022 16:46:36 -0400 Subject: [PATCH 439/546] Fix block invalidation with new backend. Enable more btests on x86 (https://github.com/Shopify/ruby/pull/359) --- yjit/src/core.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 3b33360b90c6d0..1bc3d738ef4edf 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -2021,7 +2021,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // machine code that some other thread is running. let block = blockref.borrow(); - let cb = CodegenGlobals::get_inline_cb(); + let mut cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); verify_blockid(block.blockid); @@ -2069,11 +2069,13 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // if (block.start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) // Don't patch frozen code region // Patch in a jump to block.entry_exit. + let cur_pos = cb.get_write_ptr(); cb.set_write_ptr(block_start); - //jmp_ptr(cb, block_entry_exit); - todo!("jmp_ptr with new assembler"); + let mut asm = Assembler::new(); + asm.jmp(block_entry_exit.into()); + asm.compile(&mut cb); assert!( cb.get_write_ptr() < block_end, @@ -2154,13 +2156,12 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // change this in the future when we support optional parameters because // they enter the function with a non-zero PC if block.blockid.idx == 0 { + // TODO: + // We could reset the exec counter to zero in rb_iseq_reset_jit_func() + // so that we eventually compile a new entry point when useful unsafe { rb_iseq_reset_jit_func(block.blockid.iseq) }; } - // TODO: - // May want to recompile a new entry point (for interpreter entry blocks) - // This isn't necessary for correctness - // FIXME: // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub. From 74527a764d42ebf9e7f780915dc9d73e760ed168 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 4 Aug 2022 13:58:07 -0700 Subject: [PATCH 440/546] Port send-only insns and write tests (https://github.com/Shopify/ruby/pull/360) --- yjit/src/codegen.rs | 76 ++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 903e899888d981..2862016061ade9 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1149,8 +1149,7 @@ fn gen_opt_plus( KeepCompiling } else { - todo!("opt_plus send path"); - //gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } @@ -2387,8 +2386,7 @@ fn gen_fixnum_cmp( KeepCompiling } else { - todo!("compare send path not yet implemented"); - //gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } @@ -2401,34 +2399,34 @@ fn gen_opt_lt( gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_l) } -/* fn gen_opt_le( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovle) + gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_le) } fn gen_opt_ge( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovge) + gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_ge) } fn gen_opt_gt( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovg) + gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_g) } +/* // Implements specialized equality for either two fixnum or two strings // Returns true if code was generated, otherwise false fn gen_equality_specialized( @@ -2951,27 +2949,29 @@ fn gen_opt_minus( gen_opt_send_without_block(jit, ctx, cb, ocb) } } +*/ fn gen_opt_mult( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } fn gen_opt_div( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } +/* fn gen_opt_mod( jit: &mut JITState, ctx: &mut Context, @@ -3023,47 +3023,49 @@ fn gen_opt_mod( gen_opt_send_without_block(jit, ctx, cb, ocb) } } +*/ fn gen_opt_ltlt( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } fn gen_opt_nil_p( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } fn gen_opt_empty_p( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } fn gen_opt_succ( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } +/* fn gen_opt_str_freeze( jit: &mut JITState, ctx: &mut Context, @@ -3103,47 +3105,48 @@ fn gen_opt_str_uminus( KeepCompiling } +*/ fn gen_opt_not( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); + return gen_opt_send_without_block(jit, ctx, asm, ocb); } fn gen_opt_size( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); + return gen_opt_send_without_block(jit, ctx, asm, ocb); } fn gen_opt_length( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); + return gen_opt_send_without_block(jit, ctx, asm, ocb); } fn gen_opt_regexpmatch2( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); + return gen_opt_send_without_block(jit, ctx, asm, ocb); } fn gen_opt_case_dispatch( _jit: &mut JITState, ctx: &mut Context, - _cb: &mut CodeBlock, + _asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Normally this instruction would lookup the key in a hash and jump to an @@ -3158,7 +3161,6 @@ fn gen_opt_case_dispatch( KeepCompiling // continue with the next instruction } -*/ fn gen_branchif_branch( asm: &mut Assembler, @@ -5056,7 +5058,6 @@ fn gen_opt_send_without_block( gen_send_general(jit, ctx, asm, ocb, cd, None) } -/* fn gen_send( jit: &mut JITState, ctx: &mut Context, @@ -5068,6 +5069,7 @@ fn gen_send( return gen_send_general(jit, ctx, asm, ocb, cd, block); } +/* fn gen_invokesuper( jit: &mut JITState, ctx: &mut Context, @@ -5971,10 +5973,10 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_duparray => Some(gen_duparray), YARVINSN_checktype => Some(gen_checktype), YARVINSN_opt_lt => Some(gen_opt_lt), - /* YARVINSN_opt_le => Some(gen_opt_le), YARVINSN_opt_gt => Some(gen_opt_gt), YARVINSN_opt_ge => Some(gen_opt_ge), + /* YARVINSN_opt_mod => Some(gen_opt_mod), YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), @@ -5994,6 +5996,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_neq => Some(gen_opt_neq), YARVINSN_opt_aref => Some(gen_opt_aref), YARVINSN_opt_aset => Some(gen_opt_aset), + */ YARVINSN_opt_mult => Some(gen_opt_mult), YARVINSN_opt_div => Some(gen_opt_div), YARVINSN_opt_ltlt => Some(gen_opt_ltlt), @@ -6004,12 +6007,13 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_size => Some(gen_opt_size), YARVINSN_opt_length => Some(gen_opt_length), YARVINSN_opt_regexpmatch2 => Some(gen_opt_regexpmatch2), + /* YARVINSN_opt_getinlinecache => Some(gen_opt_getinlinecache), YARVINSN_invokebuiltin => Some(gen_invokebuiltin), YARVINSN_opt_invokebuiltin_delegate => Some(gen_opt_invokebuiltin_delegate), YARVINSN_opt_invokebuiltin_delegate_leave => Some(gen_opt_invokebuiltin_delegate), - YARVINSN_opt_case_dispatch => Some(gen_opt_case_dispatch), */ + YARVINSN_opt_case_dispatch => Some(gen_opt_case_dispatch), YARVINSN_branchif => Some(gen_branchif), YARVINSN_branchunless => Some(gen_branchunless), YARVINSN_branchnil => Some(gen_branchnil), From d131b410259bef45757e4bec02c9c62c88033fc8 Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Fri, 5 Aug 2022 15:26:07 +0100 Subject: [PATCH 441/546] Fix to float guard in jit_guard_known_klass to use the correct output operand. (https://github.com/Shopify/ruby/pull/365) --- yjit/src/codegen.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 2862016061ade9..ca5958723ec1f0 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3468,8 +3468,8 @@ fn jit_guard_known_klass( // We will guard flonum vs heap float as though they were separate classes asm.comment("guard object is flonum"); - asm.and(obj_opnd, Opnd::UImm(RUBY_FLONUM_MASK as u64)); - asm.cmp(obj_opnd, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); + let flag_bits = asm.and(obj_opnd, Opnd::UImm(RUBY_FLONUM_MASK as u64)); + asm.cmp(flag_bits, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); } From e24037267f5912814dff463bccc618deacefd529 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 5 Aug 2022 11:05:20 -0400 Subject: [PATCH 442/546] Add Opnd::None error message to x86 backend as well --- yjit/src/backend/arm64/mod.rs | 5 ++++- yjit/src/backend/x86_64/mod.rs | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 4b0b0340885413..778f1b69926597 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -50,8 +50,11 @@ impl From for A64Opnd { panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") }, Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), - Opnd::None => panic!("attempted to lower an Opnd::None"), Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), + Opnd::None => panic!( + "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." + ), + } } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index d1f1698b2fdb24..696846d2a846ec 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -40,8 +40,6 @@ impl From for X86Opnd { Opnd::InsnOut{..} => panic!("InsnOut operand made it past register allocation"), - Opnd::None => X86Opnd::None, - Opnd::UImm(val) => uimm_opnd(val), Opnd::Imm(val) => imm_opnd(val), Opnd::Value(VALUE(uimm)) => uimm_opnd(uimm as u64), @@ -60,6 +58,10 @@ impl From for X86Opnd { mem_opnd(num_bits, X86Opnd::Reg(reg), disp) } + Opnd::None => panic!( + "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." + ), + _ => panic!("unsupported x86 operand type") } } From dcb6fc16e54b83f9653bbab68ec1770b301952a0 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 5 Aug 2022 08:21:52 -0700 Subject: [PATCH 443/546] Port opt_mod to the new backend IR (https://github.com/Shopify/ruby/pull/363) --- yjit/src/codegen.rs | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ca5958723ec1f0..33524c160f2a82 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2971,16 +2971,15 @@ fn gen_opt_div( gen_opt_send_without_block(jit, ctx, asm, ocb) } -/* fn gen_opt_mod( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2997,33 +2996,29 @@ fn gen_opt_mod( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands and destination from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); - mov(cb, C_ARG_REGS[0], arg0); - mov(cb, C_ARG_REGS[1], arg1); - // Check for arg0 % 0 - cmp(cb, C_ARG_REGS[1], imm_opnd(VALUE::fixnum_from_usize(0).as_i64())); - je_ptr(cb, side_exit); + asm.cmp(arg1, Opnd::Imm(VALUE::fixnum_from_usize(0).as_i64())); + asm.je(side_exit.into()); // Call rb_fix_mod_fix(VALUE recv, VALUE obj) - call_ptr(cb, REG0, rb_fix_mod_fix as *const u8); + let ret = asm.ccall(rb_fix_mod_fix as *const u8, vec![arg0, arg1]); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, ret); KeepCompiling } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } -*/ fn gen_opt_ltlt( jit: &mut JITState, @@ -5976,11 +5971,9 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_le => Some(gen_opt_le), YARVINSN_opt_gt => Some(gen_opt_gt), YARVINSN_opt_ge => Some(gen_opt_ge), - /* YARVINSN_opt_mod => Some(gen_opt_mod), - YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), - YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), - */ + //YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), + //YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), YARVINSN_splatarray => Some(gen_splatarray), YARVINSN_newrange => Some(gen_newrange), YARVINSN_putstring => Some(gen_putstring), From a55a3f8ad1104870d7a92f6d296325a415ed6910 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 5 Aug 2022 08:47:09 -0700 Subject: [PATCH 444/546] Port opt_minus, opt_or, and opt_and to the new IR (https://github.com/Shopify/ruby/pull/364) * Port opt_minus, opt_or, and opt_and to the new IR * Fix the Op::Or issue with push_insn * Prefer asm.store for clarity --- yjit/src/backend/arm64/mod.rs | 6 ++--- yjit/src/codegen.rs | 47 ++++++++++++++++------------------- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 778f1b69926597..2cddf557562154 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -181,17 +181,17 @@ impl Assembler Op::And | Op::Or => { match (opnds[0], opnds[1]) { (Opnd::Reg(_), Opnd::Reg(_)) => { - asm.and(opnds[0], opnds[1]); + asm.push_insn(op, vec![opnds[0], opnds[1]], target, text, pos_marker); }, (reg_opnd @ Opnd::Reg(_), other_opnd) | (other_opnd, reg_opnd @ Opnd::Reg(_)) => { let opnd1 = split_bitmask_immediate(asm, other_opnd); - asm.and(reg_opnd, opnd1); + asm.push_insn(op, vec![reg_opnd, opnd1], target, text, pos_marker); }, _ => { let opnd0 = asm.load(opnds[0]); let opnd1 = split_bitmask_immediate(asm, opnds[1]); - asm.and(opnd0, opnd1); + asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); } } }, diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 33524c160f2a82..33de061095f59d 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2809,16 +2809,17 @@ fn gen_opt_aset( gen_opt_send_without_block(jit, ctx, cb, ocb) } } +*/ fn gen_opt_and( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2835,36 +2836,35 @@ fn gen_opt_and( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands and destination from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Do the bitwise and arg0 & arg1 - mov(cb, REG0, arg0); - and(cb, REG0, arg1); + let val = asm.and(arg0, arg1); // Push the output on the stack let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + asm.store(dst, val); KeepCompiling } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_or( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2881,36 +2881,35 @@ fn gen_opt_or( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands and destination from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Do the bitwise or arg0 | arg1 - mov(cb, REG0, arg0); - or(cb, REG0, arg1); + let val = asm.or(arg0, arg1); // Push the output on the stack let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + asm.store(dst, val); KeepCompiling } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_minus( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2927,29 +2926,27 @@ fn gen_opt_minus( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands and destination from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Subtract arg0 - arg1 and test for overflow - mov(cb, REG0, arg0); - sub(cb, REG0, arg1); - jo_ptr(cb, side_exit); - add(cb, REG0, imm_opnd(1)); + let val_untag = asm.sub(arg0, arg1); + asm.jo(side_exit.into()); + let val = asm.add(val_untag, Opnd::Imm(1)); // Push the output on the stack let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + asm.store(dst, val); KeepCompiling } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } -*/ fn gen_opt_mult( jit: &mut JITState, @@ -5957,11 +5954,9 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), YARVINSN_opt_plus => Some(gen_opt_plus), - /* YARVINSN_opt_minus => Some(gen_opt_minus), YARVINSN_opt_and => Some(gen_opt_and), YARVINSN_opt_or => Some(gen_opt_or), - */ YARVINSN_newhash => Some(gen_newhash), YARVINSN_duphash => Some(gen_duphash), YARVINSN_newarray => Some(gen_newarray), From 7908eabf6f639cf7ec84714418b6b1aa28825af7 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 5 Aug 2022 08:58:08 -0700 Subject: [PATCH 445/546] Port setivar to the new backend IR (https://github.com/Shopify/ruby/pull/362) * Port setivar to the new backend IR * Add a few more setivar test cases * Prefer const_ptr Co-authored-by: Maxime Chevalier-Boisvert --- yjit/src/codegen.rs | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 33de061095f59d..58bc0367565ab5 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2139,11 +2139,10 @@ fn gen_getinstancevariable( ) } -/* fn gen_setinstancevariable( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let id = jit_get_arg(jit, 0); @@ -2151,27 +2150,25 @@ fn gen_setinstancevariable( // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Get the operands from the stack let val_opnd = ctx.stack_pop(1); // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic); - mov( - cb, - C_ARG_REGS[1], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), + asm.ccall( + rb_vm_setinstancevariable as *const u8, + vec![ + Opnd::const_ptr(jit.iseq as *const u8), + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), + Opnd::UImm(id.into()), + val_opnd, + Opnd::const_ptr(ic as *const u8), + ] ); - mov(cb, C_ARG_REGS[3], val_opnd); - mov(cb, C_ARG_REGS[2], uimm_opnd(id.into())); - mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic as *const u8)); - let iseq = VALUE(jit.iseq as usize); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], iseq); - call_ptr(cb, REG0, rb_vm_setinstancevariable as *const u8); KeepCompiling } -*/ fn gen_defined( jit: &mut JITState, @@ -5977,7 +5974,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_checkkeyword => Some(gen_checkkeyword), YARVINSN_concatstrings => Some(gen_concatstrings), YARVINSN_getinstancevariable => Some(gen_getinstancevariable), - //YARVINSN_setinstancevariable => Some(gen_setinstancevariable), + YARVINSN_setinstancevariable => Some(gen_setinstancevariable), /* YARVINSN_opt_eq => Some(gen_opt_eq), From 0a680912e960c66b9894cd8b04ecdc819a531727 Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Fri, 5 Aug 2022 17:03:06 +0100 Subject: [PATCH 446/546] x86 TEST should do a load for mem opnds first (https://github.com/Shopify/ruby/pull/368) --- yjit/src/backend/x86_64/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 696846d2a846ec..8d45230e913d94 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -117,7 +117,7 @@ impl Assembler }; match op { - Op::Add | Op::Sub | Op::And | Op::Cmp | Op::Or => { + Op::Add | Op::Sub | Op::And | Op::Cmp | Op::Or | Op::Test => { let (opnd0, opnd1) = match (opnds[0], opnds[1]) { (Opnd::Mem(_), Opnd::Mem(_)) => { (asm.load(opnds[0]), asm.load(opnds[1])) From db22a560a49b4ad82430b6bec7feaf2a4a3bfe19 Mon Sep 17 00:00:00 2001 From: Zack Deveau Date: Fri, 5 Aug 2022 09:18:34 -0400 Subject: [PATCH 447/546] Port gen_opt_str_freeze to new backend IR (https://github.com/Shopify/ruby/pull/366) --- yjit/src/codegen.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 58bc0367565ab5..87bb43caa218e0 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3054,11 +3054,11 @@ fn gen_opt_succ( gen_opt_send_without_block(jit, ctx, asm, ocb) } -/* + fn gen_opt_str_freeze( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) { @@ -3066,15 +3066,15 @@ fn gen_opt_str_freeze( } let str = jit_get_arg(jit, 0); - jit_mov_gc_ptr(jit, cb, REG0, str); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, str.into()); KeepCompiling } +/* fn gen_opt_str_uminus( jit: &mut JITState, ctx: &mut Context, @@ -5964,7 +5964,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_gt => Some(gen_opt_gt), YARVINSN_opt_ge => Some(gen_opt_ge), YARVINSN_opt_mod => Some(gen_opt_mod), - //YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), + YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), //YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), YARVINSN_splatarray => Some(gen_splatarray), YARVINSN_newrange => Some(gen_newrange), From e06c86fada92cf81cdc8e5cf4f0792ae701eb2e0 Mon Sep 17 00:00:00 2001 From: Zack Deveau Date: Fri, 5 Aug 2022 13:32:16 -0400 Subject: [PATCH 448/546] Port opt_str_uminus to new backend IR (https://github.com/Shopify/ruby/pull/370) --- yjit/src/codegen.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 87bb43caa218e0..8b724a1fc9cb03 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3074,11 +3074,10 @@ fn gen_opt_str_freeze( KeepCompiling } -/* fn gen_opt_str_uminus( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) { @@ -3086,15 +3085,13 @@ fn gen_opt_str_uminus( } let str = jit_get_arg(jit, 0); - jit_mov_gc_ptr(jit, cb, REG0, str); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, str.into()); KeepCompiling } -*/ fn gen_opt_not( jit: &mut JITState, @@ -5965,7 +5962,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_ge => Some(gen_opt_ge), YARVINSN_opt_mod => Some(gen_opt_mod), YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), - //YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), + YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), YARVINSN_splatarray => Some(gen_splatarray), YARVINSN_newrange => Some(gen_newrange), YARVINSN_putstring => Some(gen_putstring), From 2b85295d2845ee7ebefe04bcdda48970694ff0ad Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 5 Aug 2022 10:47:35 -0700 Subject: [PATCH 449/546] Port objtostring to the new backend (https://github.com/Shopify/ruby/pull/369) --- bootstraptest/test_yjit.rb | 4 ++-- yjit/src/codegen.rs | 16 +++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 16eda7fa84ecc0..826e0066fa1ac9 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3096,7 +3096,7 @@ def foo() foo() } -# toregexp +# toregexp, objtostring assert_equal '/true/', %q{ def foo() /#{true}/ @@ -3104,7 +3104,7 @@ def foo() foo().inspect } -# concatstrings +# concatstrings, objtostring assert_equal '9001', %q{ def foo() "#{9001}" diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 8b724a1fc9cb03..d054697e53a1fc 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2274,7 +2274,7 @@ fn gen_concatstrings( // Save the PC and SP because we are allocating jit_prepare_routine_call(jit, ctx, asm); - let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n.as_isize())); + let values_ptr = asm.lea(ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n.as_isize()))); // call rb_str_concat_literals(long n, const VALUE *strings); let return_value = asm.ccall( @@ -5317,15 +5317,14 @@ fn gen_anytostring( KeepCompiling } -/* fn gen_objtostring( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -5338,7 +5337,7 @@ fn gen_objtostring( jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, comptime_recv.class_of(), recv, @@ -5351,10 +5350,9 @@ fn gen_objtostring( KeepCompiling } else { let cd = jit_get_arg(jit, 0).as_ptr(); - gen_send_general(jit, ctx, cb, ocb, cd, None) + gen_send_general(jit, ctx, asm, ocb, cd, None) } } -*/ fn gen_intern( jit: &mut JITState, @@ -5388,7 +5386,7 @@ fn gen_toregexp( // raise an exception. jit_prepare_routine_call(jit, ctx, asm); - let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize))); + let values_ptr = asm.lea(ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize)))); ctx.stack_pop(cnt); let ary = asm.ccall( @@ -6011,7 +6009,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_getglobal => Some(gen_getglobal), YARVINSN_setglobal => Some(gen_setglobal), YARVINSN_anytostring => Some(gen_anytostring), - //YARVINSN_objtostring => Some(gen_objtostring), + YARVINSN_objtostring => Some(gen_objtostring), YARVINSN_intern => Some(gen_intern), YARVINSN_toregexp => Some(gen_toregexp), YARVINSN_getspecial => Some(gen_getspecial), From ffdd09e22af4966f3d8f8ada7f9ee829ba9b9212 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 5 Aug 2022 11:13:01 -0700 Subject: [PATCH 450/546] Port opt_eq and opt_neq to the new backend (https://github.com/Shopify/ruby/pull/371) * Port opt_eq and opt_neq to the new backend * Just use into() outside Co-authored-by: Alan Wu * Use C_RET_OPND to share the register * Revert "Use C_RET_OPND to share the register" This reverts commit 99381765d0008ff0f03ea97c6c8db608a2298e2b. Co-authored-by: Alan Wu --- yjit/src/codegen.rs | 67 +++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index d054697e53a1fc..884a8344c27837 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2423,13 +2423,12 @@ fn gen_opt_gt( gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_g) } -/* // Implements specialized equality for either two fixnum or two strings // Returns true if code was generated, otherwise false fn gen_equality_specialized( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, side_exit: CodePtr, ) -> bool { @@ -2445,19 +2444,16 @@ fn gen_equality_specialized( return false; } - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); - mov(cb, REG0, a_opnd); - cmp(cb, REG0, b_opnd); + asm.cmp(a_opnd, b_opnd); - mov(cb, REG0, imm_opnd(Qfalse.into())); - mov(cb, REG1, imm_opnd(Qtrue.into())); - cmove(cb, REG0, REG1); + let val = asm.csel_ne(Opnd::Imm(Qfalse.into()), Opnd::Imm(Qtrue.into())); // Push the output on the stack ctx.stack_pop(2); let dst = ctx.stack_push(Type::UnknownImm); - mov(cb, dst, REG0); + asm.mov(dst, val); true } else if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString } @@ -2467,30 +2463,26 @@ fn gen_equality_specialized( return false; } - // Load a and b in preparation for call later - mov(cb, C_ARG_REGS[0], a_opnd); - mov(cb, C_ARG_REGS[1], b_opnd); - // Guard that a is a String jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cString }, - C_ARG_REGS[0], + a_opnd, StackOpnd(1), comptime_a, SEND_MAX_DEPTH, side_exit, ); - let ret = cb.new_label("ret".to_string()); + let equal = asm.new_label("equal"); + let ret = asm.new_label("ret"); // If they are equal by identity, return true - cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]); - mov(cb, RAX, imm_opnd(Qtrue.into())); - je_label(cb, ret); + asm.cmp(a_opnd, b_opnd); + asm.je(equal); // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard) let btype = ctx.get_opnd_type(StackOpnd(0)); @@ -2500,10 +2492,10 @@ fn gen_equality_specialized( jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cString }, - C_ARG_REGS[1], + b_opnd, StackOpnd(0), comptime_b, SEND_MAX_DEPTH, @@ -2512,14 +2504,18 @@ fn gen_equality_specialized( } // Call rb_str_eql_internal(a, b) - call_ptr(cb, REG0, rb_str_eql_internal as *const u8); + let val = asm.ccall(rb_str_eql_internal as *const u8, vec![a_opnd, b_opnd]); // Push the output on the stack - cb.write_label(ret); ctx.stack_pop(2); let dst = ctx.stack_push(Type::UnknownImm); - mov(cb, dst, RAX); - cb.link_labels(); + asm.mov(dst, val); + asm.jmp(ret); + + asm.write_label(equal); + asm.mov(dst, Qtrue.into()); + + asm.write_label(ret); true } else { @@ -2530,38 +2526,39 @@ fn gen_equality_specialized( fn gen_opt_eq( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize base on a runtime receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } // Create a side-exit to fall back to the interpreter let side_exit = get_side_exit(jit, ocb, ctx); - if gen_equality_specialized(jit, ctx, cb, ocb, side_exit) { - jump_to_next_insn(jit, ctx, cb, ocb); + if gen_equality_specialized(jit, ctx, asm, ocb, side_exit) { + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_neq( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // opt_neq is passed two rb_call_data as arguments: // first for ==, second for != let cd = jit_get_arg(jit, 1).as_ptr(); - return gen_send_general(jit, ctx, cb, ocb, cd, None); + return gen_send_general(jit, ctx, asm, ocb, cd, None); } +/* fn gen_opt_aref( jit: &mut JITState, ctx: &mut Context, @@ -5971,12 +5968,10 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_getinstancevariable => Some(gen_getinstancevariable), YARVINSN_setinstancevariable => Some(gen_setinstancevariable), - /* YARVINSN_opt_eq => Some(gen_opt_eq), YARVINSN_opt_neq => Some(gen_opt_neq), - YARVINSN_opt_aref => Some(gen_opt_aref), - YARVINSN_opt_aset => Some(gen_opt_aset), - */ + //YARVINSN_opt_aref => Some(gen_opt_aref), + //YARVINSN_opt_aset => Some(gen_opt_aset), YARVINSN_opt_mult => Some(gen_opt_mult), YARVINSN_opt_div => Some(gen_opt_div), YARVINSN_opt_ltlt => Some(gen_opt_ltlt), From 2429635bc724c953bd653a9202c5e1768f61d254 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 5 Aug 2022 11:37:51 -0700 Subject: [PATCH 451/546] Port send to the new backend and test it (https://github.com/Shopify/ruby/pull/373) --- yjit/src/codegen.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 884a8344c27837..76874cf7588fcd 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5997,7 +5997,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { //YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), //YARVINSN_getblockparam => Some(gen_getblockparam), YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), - //YARVINSN_send => Some(gen_send), + YARVINSN_send => Some(gen_send), //YARVINSN_invokesuper => Some(gen_invokesuper), YARVINSN_leave => Some(gen_leave), From 8fffff536db7d603c6caef80d11c0926d59b1001 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 5 Aug 2022 14:41:55 -0400 Subject: [PATCH 452/546] More concise csel with Into --- yjit/src/codegen.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 76874cf7588fcd..78120a599590ef 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2448,7 +2448,7 @@ fn gen_equality_specialized( asm.cmp(a_opnd, b_opnd); - let val = asm.csel_ne(Opnd::Imm(Qfalse.into()), Opnd::Imm(Qtrue.into())); + let val = asm.csel_ne(Qfalse.into(), Qtrue.into()); // Push the output on the stack ctx.stack_pop(2); From 8278d722907dc134e9a3436d5542d7dc168d8925 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 5 Aug 2022 16:52:23 -0400 Subject: [PATCH 453/546] Left and right shift for IR (https://github.com/Shopify/ruby/pull/374) * Left and right shift for IR * Update yjit/src/backend/x86_64/mod.rs Co-authored-by: Alan Wu Co-authored-by: Maxime Chevalier-Boisvert --- yjit/src/asm/arm64/inst/sbfm.rs | 33 +++++++++++++++++++++++-- yjit/src/asm/arm64/mod.rs | 21 ++++++++++++++++ yjit/src/backend/arm64/mod.rs | 36 +++++++++++++++++++++++++++ yjit/src/backend/ir.rs | 12 +++++++++ yjit/src/backend/x86_64/mod.rs | 43 +++++++++++++++++++++++++++++++++ 5 files changed, 143 insertions(+), 2 deletions(-) diff --git a/yjit/src/asm/arm64/inst/sbfm.rs b/yjit/src/asm/arm64/inst/sbfm.rs index 4fbb567ed05bcb..6f69e58043c2a1 100644 --- a/yjit/src/asm/arm64/inst/sbfm.rs +++ b/yjit/src/asm/arm64/inst/sbfm.rs @@ -31,6 +31,18 @@ pub struct SBFM { } impl SBFM { + /// ASR + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ASR--immediate---Arithmetic-Shift-Right--immediate---an-alias-of-SBFM-?lang=en + pub fn asr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { + let (imms, n) = if num_bits == 64 { + (0b111111, true) + } else { + (0b011111, false) + }; + + Self { rd, rn, immr: shift, imms, n, sf: num_bits.into() } + } + /// SXTW /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-?lang=en pub fn sxtw(rd: u8, rn: u8) -> Self { @@ -44,13 +56,16 @@ const FAMILY: u32 = 0b1001; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: SBFM) -> Self { + let immr = (inst.immr as u32) & ((1 << 6) - 1); + let imms = (inst.imms as u32) & ((1 << 6) - 1); + 0 | ((inst.sf as u32) << 31) | (FAMILY << 25) | (1 << 24) | ((inst.n as u32) << 22) - | ((inst.immr as u32) << 16) - | ((inst.imms as u32) << 10) + | (immr << 16) + | (imms << 10) | ((inst.rn as u32) << 5) | inst.rd as u32 } @@ -68,6 +83,20 @@ impl From for [u8; 4] { mod tests { use super::*; + #[test] + fn test_asr_32_bits() { + let inst = SBFM::asr(0, 1, 2, 32); + let result: u32 = inst.into(); + assert_eq!(0x13027c20, result); + } + + #[test] + fn test_asr_64_bits() { + let inst = SBFM::asr(10, 11, 5, 64); + let result: u32 = inst.into(); + assert_eq!(0x9345fd6a, result); + } + #[test] fn test_sxtw() { let inst = SBFM::sxtw(0, 1); diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index d114f64a22c3ec..68be36c2569ad8 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -166,6 +166,22 @@ pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// ASR - arithmetic shift right rn by shift, put the result in rd, don't update +/// flags +pub fn asr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, shift) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(shift)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(uimm_fits_bits(shift, 6), "The shift operand must be 6 bits or less."); + + SBFM::asr(rd.reg_no, rn.reg_no, shift.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to asr instruction."), + }; + + cb.write_bytes(&bytes); +} + /// Whether or not the offset between two instructions fits into the branch with /// or without link instruction. If it doesn't, then we have to load the value /// into a register first. @@ -903,6 +919,11 @@ mod tests { check_bytes("200840f2", |cb| ands(cb, X0, X1, A64Opnd::new_uimm(7))); } + #[test] + fn test_asr() { + check_bytes("b4fe4a93", |cb| asr(cb, X20, X21, A64Opnd::new_uimm(10))); + } + #[test] fn test_bcond() { check_bytes("01200054", |cb| bcond(cb, Condition::NE, A64Opnd::new_imm(0x400))); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 2cddf557562154..2e8c2068af3bd0 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -576,6 +576,15 @@ impl Assembler Op::Not => { mvn(cb, insn.out.into(), insn.opnds[0].into()); }, + Op::RShift => { + asr(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::URShift => { + lsr(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::LShift => { + lsl(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, Op::Store => { // This order may be surprising but it is correct. The way // the Arm64 assembler works, the register that is going to @@ -901,6 +910,33 @@ mod tests { asm.compile_with_num_regs(&mut cb, 1); } + #[test] + fn test_emit_lshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.lshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_rshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.rshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_urshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.urshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + #[test] fn test_emit_test() { let (mut asm, mut cb) = setup_asm(); diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index a23b27dda26921..5eee61b2281e3b 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -66,6 +66,15 @@ pub enum Op // instruction. Not, + /// Shift a value right by a certain amount (signed). + RShift, + + /// Shift a value right by a certain amount (unsigned). + URShift, + + /// Shift a value left by a certain amount. + LShift, + // // Low-level instructions // @@ -912,6 +921,9 @@ def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); def_push_2_opnd!(or, Op::Or); def_push_1_opnd!(not, Op::Not); +def_push_2_opnd!(lshift, Op::LShift); +def_push_2_opnd!(rshift, Op::RShift); +def_push_2_opnd!(urshift, Op::URShift); def_push_1_opnd_no_out!(cpush, Op::CPush); def_push_0_opnd!(cpop, Op::CPop); def_push_1_opnd_no_out!(cpop_into, Op::CPopInto); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 8d45230e913d94..4ba849b2399ff4 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -164,6 +164,37 @@ impl Assembler asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); }, + // These instructions modify their input operand in-place, so we + // may need to load the input value to preserve it + Op::LShift | Op::RShift | Op::URShift => { + let (opnd0, opnd1) = match (opnds[0], opnds[1]) { + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { .. }, _) => { + let idx = match original_opnds[0] { + Opnd::InsnOut { idx, .. } => { + idx + }, + _ => unreachable!() + }; + + // Our input must be from a previous instruction! + assert!(idx < index); + + if live_ranges[idx] > index { + (asm.load(opnds[0]), opnds[1]) + } else { + (opnds[0], opnds[1]) + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + (asm.load(opnds[0]), opnds[1]) + }, + _ => (opnds[0], opnds[1]) + }; + + asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + }, Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE => { let new_opnds = opnds.into_iter().map(|opnd| { @@ -293,6 +324,18 @@ impl Assembler not(cb, insn.opnds[0].into()) }, + Op::LShift => { + shl(cb, insn.opnds[0].into(), insn.opnds[1].into()) + }, + + Op::RShift => { + sar(cb, insn.opnds[0].into(), insn.opnds[1].into()) + }, + + Op::URShift => { + shr(cb, insn.opnds[0].into(), insn.opnds[1].into()) + }, + Op::Store => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), // This assumes only load instructions can contain references to GC'd Value operands From 46822ee13f4fb8d590c3bd6c66aab71566d68dfe Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Fri, 5 Aug 2022 17:09:22 -0400 Subject: [PATCH 454/546] Enable btests now passing on arm (https://github.com/Shopify/ruby/pull/378) --- .cirrus.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 18f8ae5b255110..31fa448d5a5efa 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -129,6 +129,7 @@ yjit_task: RUST_BACKTRACE=1 ruby --disable=gems bootstraptest/runner.rb --ruby="./miniruby -I./lib -I. -I.ext/common --disable-gems --yjit-call-threshold=1 --yjit-verify-ctx" \ bootstraptest/test_attr.rb \ bootstraptest/test_autoload.rb \ + bootstraptest/test_block.rb \ bootstraptest/test_class.rb \ bootstraptest/test_constant_cache.rb \ bootstraptest/test_env.rb \ @@ -151,15 +152,14 @@ yjit_task: bootstraptest/test_proc.rb \ bootstraptest/test_string.rb \ bootstraptest/test_struct.rb \ + bootstraptest/test_syntax.rb \ bootstraptest/test_thread.rb \ bootstraptest/test_yjit_new_backend.rb \ bootstraptest/test_yjit_rust_port.rb # These are the btests we can't run yet on arm: - #bootstraptest/test_block.rb (missing opt_send) - #bootstraptest/test_insns.rb (missing opt_send) + #bootstraptest/test_insns.rb (illegal instruction) #bootstraptest/test_literal.rb (displacement bug) - #bootstraptest/test_syntax.rb (missing opt_send) #bootstraptest/test_yjit.rb (multiple bugs) #bootstraptest/test_yjit_30k_ifelse.rb (missing opt_send) #bootstraptest/test_yjit_30k_methods.rb (missing opt_send) From 726a4519550fd9191833e482122e3c7bf9a468be Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 8 Aug 2022 08:27:54 -0700 Subject: [PATCH 455/546] Port invokebuiltin* insns to the new backend IR (https://github.com/Shopify/ruby/pull/375) * Port invokebuiltin* insns to the new backend IR * Fix the C_ARG_OPNDS check boundary --- bootstraptest/test_yjit.rb | 22 ++++++++++++ yjit/src/backend/arm64/mod.rs | 2 +- yjit/src/backend/x86_64/mod.rs | 2 +- yjit/src/codegen.rs | 65 +++++++++++++--------------------- 4 files changed, 49 insertions(+), 42 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 826e0066fa1ac9..0f9150e38849ae 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3143,3 +3143,25 @@ def foo end foo } + +# invokebuiltin +assert_equal '123', %q{ + def foo(obj) + obj.foo = 123 + end + + struct = Struct.new(:foo) + obj = struct.new + foo(obj) +} + +# invokebuiltin_delegate +assert_equal '.', %q{ + def foo(path) + Dir.open(path).path + end + foo(".") +} + +# opt_invokebuiltin_delegate_leave +assert_equal '[0]', %q{"\x00".unpack("c")} diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 2e8c2068af3bd0..d6e8e831c7a45b 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -196,7 +196,7 @@ impl Assembler } }, Op::CCall => { - assert!(opnds.len() < C_ARG_OPNDS.len()); + assert!(opnds.len() <= C_ARG_OPNDS.len()); // For each of the operands we're going to first load them // into a register and then move them into the correct diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 4ba849b2399ff4..b0802b31870075 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -400,7 +400,7 @@ impl Assembler // C function call Op::CCall => { // Temporary - assert!(insn.opnds.len() < _C_ARG_OPNDS.len()); + assert!(insn.opnds.len() <= _C_ARG_OPNDS.len()); // For each operand for (idx, opnd) in insn.opnds.iter().enumerate() { diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 78120a599590ef..7f955124452ce4 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5818,45 +5818,40 @@ fn gen_getblockparam( KeepCompiling } +*/ fn gen_invokebuiltin( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr(); let bf_argc: usize = unsafe { (*bf).argc }.try_into().expect("non negative argc"); // ec, self, and arguments - if bf_argc + 2 > C_ARG_REGS.len() { + if bf_argc + 2 > C_ARG_OPNDS.len() { return CantCompile; } // If the calls don't allocate, do they need up to date PC, SP? - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) - mov(cb, C_ARG_REGS[0], REG_EC); - mov( - cb, - C_ARG_REGS[1], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); + let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; // Copy arguments from locals for i in 0..bf_argc { let stack_opnd = ctx.stack_opnd((bf_argc - i - 1) as i32); - let c_arg_reg = C_ARG_REGS[2 + i]; - mov(cb, c_arg_reg, stack_opnd); + args.push(stack_opnd); } - call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8); + let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); // Push the return value ctx.stack_pop(bf_argc); let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } @@ -5867,7 +5862,7 @@ fn gen_invokebuiltin( fn gen_opt_invokebuiltin_delegate( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr(); @@ -5875,44 +5870,36 @@ fn gen_opt_invokebuiltin_delegate( let start_index = jit_get_arg(jit, 1).as_i32(); // ec, self, and arguments - if bf_argc + 2 > (C_ARG_REGS.len() as i32) { + if bf_argc + 2 > (C_ARG_OPNDS.len() as i32) { return CantCompile; } // If the calls don't allocate, do they need up to date PC, SP? - jit_prepare_routine_call(jit, ctx, cb, REG0); - - if bf_argc > 0 { - // Load environment pointer EP from CFP - mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); - } + jit_prepare_routine_call(jit, ctx, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) - mov(cb, C_ARG_REGS[0], REG_EC); - mov( - cb, - C_ARG_REGS[1], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); + let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; // Copy arguments from locals - for i in 0..bf_argc { - let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; - let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i; - let local_opnd = mem_opnd(64, REG0, offs * (SIZEOF_VALUE as i32)); - let offs: usize = (i + 2) as usize; - let c_arg_reg = C_ARG_REGS[offs]; - mov(cb, c_arg_reg, local_opnd); + if bf_argc > 0 { + // Load environment pointer EP from CFP + let ep = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); + + for i in 0..bf_argc { + let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; + let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i; + let local_opnd = Opnd::mem(64, ep, offs * (SIZEOF_VALUE as i32)); + args.push(local_opnd); + } } - call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8); + let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); // Push the return value let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } -*/ /// Maps a YARV opcode to a code generation function (if supported) fn get_gen_fn(opcode: VALUE) -> Option { @@ -5982,12 +5969,10 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_size => Some(gen_opt_size), YARVINSN_opt_length => Some(gen_opt_length), YARVINSN_opt_regexpmatch2 => Some(gen_opt_regexpmatch2), - /* - YARVINSN_opt_getinlinecache => Some(gen_opt_getinlinecache), + //YARVINSN_opt_getinlinecache => Some(gen_opt_getinlinecache), YARVINSN_invokebuiltin => Some(gen_invokebuiltin), YARVINSN_opt_invokebuiltin_delegate => Some(gen_opt_invokebuiltin_delegate), YARVINSN_opt_invokebuiltin_delegate_leave => Some(gen_opt_invokebuiltin_delegate), - */ YARVINSN_opt_case_dispatch => Some(gen_opt_case_dispatch), YARVINSN_branchif => Some(gen_branchif), YARVINSN_branchunless => Some(gen_branchunless), From a3757842752e78a5f53b5dfcdcf9601c037f8c76 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Mon, 8 Aug 2022 14:49:46 -0400 Subject: [PATCH 456/546] Use new assembler to support global invalidation on A64 Previously, we patched in an x64 JMP even on A64, which resulted in invalid machine code. Use the new assembler to generate a jump instead. Add an assert to make sure patches don't step on each other since it's less clear cut on A64, where the size of the jump varies depending on its placement relative to the target. Fixes a lot of tests that use `set_trace_func` in `test_insns.rb`. PR: https://github.com/Shopify/ruby/pull/379 --- yjit/src/invariants.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs index 6329c70f873981..9cdef0d8bb95d2 100644 --- a/yjit/src/invariants.rs +++ b/yjit/src/invariants.rs @@ -528,8 +528,6 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { return; } - use crate::asm::x86_64::jmp_ptr; - // Stop other ractors since we are going to patch machine code. with_vm_lock(src_loc!(), || { // Make it so all live block versions are no longer valid branch targets @@ -561,13 +559,18 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { // Apply patches let old_pos = cb.get_write_pos(); - let patches = CodegenGlobals::take_global_inval_patches(); + let mut patches = CodegenGlobals::take_global_inval_patches(); + patches.sort_by_cached_key(|patch| patch.inline_patch_pos.raw_ptr()); + let mut last_patch_end = std::ptr::null(); for patch in &patches { - cb.set_write_ptr(patch.inline_patch_pos); - jmp_ptr(cb, patch.outlined_target_pos); + assert!(last_patch_end <= patch.inline_patch_pos.raw_ptr(), "patches should not overlap"); - // FIXME: Can't easily check we actually wrote out the JMP at the moment. - // assert!(!cb.has_dropped_bytes(), "patches should have space and jump offsets should fit in JMP rel32"); + let mut asm = crate::backend::ir::Assembler::new(); + asm.jmp(patch.outlined_target_pos.into()); + + cb.set_write_ptr(patch.inline_patch_pos); + asm.compile(cb); + last_patch_end = cb.get_write_ptr().raw_ptr(); } cb.set_pos(old_pos); From a5ea577cc6fc43c5bd5beaef3f6175832a55268a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 8 Aug 2022 17:37:03 -0400 Subject: [PATCH 457/546] Update flags for data processing on ARM (https://github.com/Shopify/ruby/pull/380) * Update flags for data processing on ARM * Update yjit/src/backend/arm64/mod.rs Co-authored-by: Maxime Chevalier-Boisvert --- yjit/src/backend/arm64/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index d6e8e831c7a45b..db2a30aec05c8c 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -550,7 +550,7 @@ impl Assembler } }, Op::Add => { - add(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + adds(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); }, Op::FrameSetup => { stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); @@ -565,7 +565,7 @@ impl Assembler ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); }, Op::Sub => { - sub(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + subs(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); }, Op::And => { and(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); From 85d6d76e41b0c2cec64e3726d8218467954f5ee6 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 8 Aug 2022 18:42:42 -0400 Subject: [PATCH 458/546] Enable test_insns.rb on arm --- .cirrus.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 31fa448d5a5efa..f91b72be64d905 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -141,6 +141,7 @@ yjit_task: bootstraptest/test_flow.rb \ bootstraptest/test_fork.rb \ bootstraptest/test_gc.rb \ + bootstraptest/test_insns.rb \ bootstraptest/test_io.rb \ bootstraptest/test_jump.rb \ bootstraptest/test_literal_suffix.rb \ @@ -158,7 +159,9 @@ yjit_task: bootstraptest/test_yjit_rust_port.rb # These are the btests we can't run yet on arm: - #bootstraptest/test_insns.rb (illegal instruction) + #bootstraptest/test_block.rb (missing opt_send) + #bootstraptest/test_insns.rb (missing opt_send) + #bootstraptest/test_ractor.rb ? (untested) #bootstraptest/test_literal.rb (displacement bug) #bootstraptest/test_yjit.rb (multiple bugs) #bootstraptest/test_yjit_30k_ifelse.rb (missing opt_send) From b8846dd2f8042fc13a0f5ae17e2e2a6f400074dd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 9 Aug 2022 10:27:21 -0400 Subject: [PATCH 459/546] Load mem displacement when necessary on AArch64 (https://github.com/Shopify/ruby/pull/382) * LDR instruction for AArch64 * Split loads in arm64_split when memory address displacements do not fit --- yjit/src/asm/arm64/inst/load_literal.rs | 6 +- yjit/src/asm/arm64/inst/load_register.rs | 108 ++++++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/mod.rs | 46 +++++-- yjit/src/backend/arm64/mod.rs | 152 +++++++++++++++++------ 5 files changed, 261 insertions(+), 53 deletions(-) create mode 100644 yjit/src/asm/arm64/inst/load_register.rs diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs index a49130c3eb0562..d2a5d57eea81e2 100644 --- a/yjit/src/asm/arm64/inst/load_literal.rs +++ b/yjit/src/asm/arm64/inst/load_literal.rs @@ -39,7 +39,7 @@ pub struct LoadLiteral { impl LoadLiteral { /// LDR (load literal) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en - pub fn ldr(rt: u8, imm19: i32, num_bits: u8) -> Self { + pub fn ldr_literal(rt: u8, imm19: i32, num_bits: u8) -> Self { Self { rt, imm19, opc: num_bits.into() } } } @@ -75,14 +75,14 @@ mod tests { #[test] fn test_ldr_positive() { - let inst = LoadLiteral::ldr(0, 5, 64); + let inst = LoadLiteral::ldr_literal(0, 5, 64); let result: u32 = inst.into(); assert_eq!(0x580000a0, result); } #[test] fn test_ldr_negative() { - let inst = LoadLiteral::ldr(0, -5, 64); + let inst = LoadLiteral::ldr_literal(0, -5, 64); let result: u32 = inst.into(); assert_eq!(0x58ffff60, result); } diff --git a/yjit/src/asm/arm64/inst/load_register.rs b/yjit/src/asm/arm64/inst/load_register.rs new file mode 100644 index 00000000000000..3426b9ba5f9be0 --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_register.rs @@ -0,0 +1,108 @@ +/// Whether or not to shift the register. +enum S { + Shift = 1, + NoShift = 0 +} + +/// The option for this instruction. +enum Option { + UXTW = 0b010, + LSL = 0b011, + SXTW = 0b110, + SXTX = 0b111 +} + +/// The size of the operands of this instruction. +enum Size { + Size32 = 0b10, + Size64 = 0b11 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into a Size enum variant. +impl From for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 load instruction that can be encoded. +/// +/// LDR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 0 1 1 1 0 | +/// | size. rm.............. option.. S rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadRegister { + /// The number of the register to load the value into. + rt: u8, + + /// The base register with which to form the address. + rn: u8, + + /// Whether or not to shift the value of the register. + s: S, + + /// The option associated with this instruction that controls the shift. + option: Option, + + /// The number of the offset register. + rm: u8, + + /// The size of the operands. + size: Size +} + +impl LoadRegister { + /// LDR + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en + pub fn ldr(rt: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rt, rn, s: S::NoShift, option: Option::LSL, rm, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadRegister) -> Self { + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | (0b11 << 21) + | ((inst.rm as u32) << 16) + | ((inst.option as u32) << 13) + | ((inst.s as u32) << 12) + | (0b10 << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadRegister) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr() { + let inst = LoadRegister::ldr(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8626820, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 42df2d137a7741..ab41464013dc50 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -10,6 +10,7 @@ mod conditional; mod data_imm; mod data_reg; mod load_literal; +mod load_register; mod load_store; mod logical_imm; mod logical_reg; @@ -30,6 +31,7 @@ pub use conditional::Conditional; pub use data_imm::DataImm; pub use data_reg::DataReg; pub use load_literal::LoadLiteral; +pub use load_register::LoadRegister; pub use load_store::LoadStore; pub use logical_imm::LogicalImm; pub use logical_reg::LogicalReg; diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 68be36c2569ad8..93b44dba4b5bc4 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -374,11 +374,26 @@ pub fn ldp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// LDR - load a memory address into a register with a register offset +pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn, rm) { + (A64Opnd::Reg(rt), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LoadRegister::ldr(rt.reg_no, rn.reg_no, rm.reg_no, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction.") + }; + + cb.write_bytes(&bytes); +} + /// LDR - load a PC-relative memory address into a register -pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { +pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { let bytes: [u8; 4] = match rt { A64Opnd::Reg(rt) => { - LoadLiteral::ldr(rt.reg_no, rn, rt.num_bits).into() + LoadLiteral::ldr_literal(rt.reg_no, rn, rt.num_bits).into() }, _ => panic!("Invalid operand combination to ldr instruction."), }; @@ -386,12 +401,18 @@ pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { cb.write_bytes(&bytes); } +/// Whether or not a memory address displacement fits into the maximum number of +/// bits such that it can be used without loading it into a register first. +pub fn mem_disp_fits_bits(disp: i32) -> bool { + imm_fits_bits(disp.into(), 9) +} + /// LDR (post-index) - load a register from memory, update the base pointer after loading it pub fn ldr_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); - assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); LoadStore::ldr_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -406,7 +427,7 @@ pub fn ldr_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); - assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); LoadStore::ldr_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -426,7 +447,7 @@ pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { }, (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); - assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); LoadStore::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -441,7 +462,7 @@ pub fn ldursw(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); - assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); LoadStore::ldursw(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() }, @@ -670,7 +691,7 @@ pub fn str_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); - assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); LoadStore::str_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -685,7 +706,7 @@ pub fn str_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); - assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); LoadStore::str_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -700,7 +721,7 @@ pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); - assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); LoadStore::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -1024,7 +1045,12 @@ mod tests { #[test] fn test_ldr() { - check_bytes("40010058", |cb| ldr(cb, X0, 10)); + check_bytes("6a696cf8", |cb| ldr(cb, X10, X11, X12)); + } + + #[test] + fn test_ldr_literal() { + check_bytes("40010058", |cb| ldr_literal(cb, X0, 10)); } #[test] diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index db2a30aec05c8c..196523bf74cbb2 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -84,13 +84,66 @@ impl Assembler /// have no memory operands. fn arm64_split(mut self) -> Assembler { + /// When we're attempting to load a memory address into a register, the + /// displacement must fit into the maximum number of bits for an Op::Add + /// immediate. If it doesn't, we have to load the displacement into a + /// register first. + fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(Mem { base, disp, num_bits }) => { + if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() { + asm.lea(opnd) + } else { + let disp = asm.load(Opnd::Imm(disp.into())); + let reg = match base { + MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }), + MemBase::InsnOut(idx) => Opnd::InsnOut { idx, num_bits } + }; + + asm.add(reg, disp) + } + }, + _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.") + } + } + + /// When you're storing a register into a memory location or loading a + /// memory location into a register, the displacement from the base + /// register of the memory location must fit into 9 bits. If it doesn't, + /// then we need to load that memory address into a register first. + fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(mem) => { + if mem_disp_fits_bits(mem.disp) { + opnd + } else { + let base = split_lea_operand(asm, opnd); + Opnd::mem(64, base, 0) + } + }, + _ => unreachable!("Can only split memory addresses.") + } + } + + /// Any memory operands you're sending into an Op::Load instruction need + /// to be split in case their displacement doesn't fit into 9 bits. + fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(_) => { + let split_opnd = split_memory_address(asm, opnd); + asm.load(split_opnd) + }, + _ => asm.load(opnd) + } + } + /// Operands that take the place of bitmask immediates must follow a /// certain encoding. In this function we ensure that those operands /// do follow that encoding, and if they don't then we load them first. fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, - Opnd::Mem(_) => asm.load(opnd), + Opnd::Mem(_) => split_load_operand(asm, opnd), Opnd::Imm(imm) => { if imm <= 0 { asm.load(opnd) @@ -116,7 +169,8 @@ impl Assembler fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, - Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnd), + Opnd::Mem(_) => split_load_operand(asm, opnd), + Opnd::Imm(_) => asm.load(opnd), Opnd::UImm(uimm) => { if ShiftedImmediate::try_from(uimm).is_ok() { opnd @@ -128,24 +182,6 @@ impl Assembler } } - /// When you're storing a register into a memory location, the - /// displacement from the base register of the memory location must fit - /// into 9 bits. If it doesn't, then we need to load that memory address - /// into a register first. - fn split_store(asm: &mut Assembler, opnd: Opnd) -> Opnd { - match opnd { - Opnd::Mem(mem) => { - if imm_fits_bits(mem.disp.into(), 9) { - opnd - } else { - let base = asm.lea(opnd); - Opnd::mem(64, base, 0) - } - }, - _ => unreachable!("Can only store memory addresses.") - } - } - self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_opnds| { // Load all Value operands into registers that aren't already a part // of Load instructions. @@ -172,7 +208,7 @@ impl Assembler asm.add(reg_opnd, opnd1); }, _ => { - let opnd0 = asm.load(opnds[0]); + let opnd0 = split_load_operand(asm, opnds[0]); let opnd1 = split_shifted_immediate(asm, opnds[1]); asm.add(opnd0, opnd1); } @@ -189,7 +225,7 @@ impl Assembler asm.push_insn(op, vec![reg_opnd, opnd1], target, text, pos_marker); }, _ => { - let opnd0 = asm.load(opnds[0]); + let opnd0 = split_load_operand(asm, opnds[0]); let opnd1 = split_bitmask_immediate(asm, opnds[1]); asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); } @@ -204,7 +240,7 @@ impl Assembler // Note: the iteration order is reversed to avoid corrupting x0, // which is both the return value and first argument register for (idx, opnd) in opnds.into_iter().enumerate().rev() { - let value = asm.load(opnd); + let value = split_load_operand(asm, opnd); asm.mov(C_ARG_OPNDS[idx], value); } @@ -215,16 +251,15 @@ impl Assembler Op::Cmp => { let opnd0 = match opnds[0] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], - _ => asm.load(opnds[0]) + _ => split_load_operand(asm, opnds[0]) }; let opnd1 = split_shifted_immediate(asm, opnds[1]); - - asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + asm.cmp(opnd0, opnd1); }, Op::CRet => { if opnds[0] != Opnd::Reg(C_RET_REG) { - let value = asm.load(opnds[0]); + let value = split_load_operand(asm, opnds[0]); asm.mov(C_RET_OPND, value); } asm.cret(C_RET_OPND); @@ -234,7 +269,7 @@ impl Assembler let new_opnds = opnds.into_iter().map(|opnd| { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, - _ => asm.load(opnd) + _ => split_load_operand(asm, opnd) } }).collect(); @@ -243,7 +278,7 @@ impl Assembler Op::IncrCounter => { // We'll use LDADD later which only works with registers // ... Load pointer into register - let counter_addr = asm.lea(opnds[0]); + let counter_addr = split_lea_operand(asm, opnds[0]); // Load immediates into a register let addend = match opnds[1] { @@ -255,12 +290,15 @@ impl Assembler }, Op::JmpOpnd => { if let Opnd::Mem(_) = opnds[0] { - let opnd0 = asm.load(opnds[0]); + let opnd0 = split_load_operand(asm, opnds[0]); asm.jmp_opnd(opnd0); } else { asm.jmp_opnd(opnds[0]); } }, + Op::Load => { + split_load_operand(asm, opnds[0]); + }, Op::LoadSExt => { match opnds[0] { // We only want to sign extend if the operand is a @@ -295,7 +333,7 @@ impl Assembler // we'll use the normal mov instruction. match opnds[0] { Opnd::Mem(_) => { - let opnd0 = split_store(asm, opnds[0]); + let opnd0 = split_memory_address(asm, opnds[0]); asm.store(opnd0, value); }, Opnd::Reg(_) => { @@ -308,7 +346,7 @@ impl Assembler // The value that is being negated must be in a register, so // if we get anything else we need to load it first. let opnd0 = match opnds[0] { - Opnd::Mem(_) => asm.load(opnds[0]), + Opnd::Mem(_) => split_load_operand(asm, opnds[0]), _ => opnds[0] }; @@ -318,13 +356,13 @@ impl Assembler // The displacement for the STUR instruction can't be more // than 9 bits long. If it's longer, we need to load the // memory address into a register first. - let opnd0 = split_store(asm, opnds[0]); + let opnd0 = split_memory_address(asm, opnds[0]); // The value being stored must be in a register, so if it's // not already one we'll load it first. let opnd1 = match opnds[1] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], - _ => asm.load(opnds[1]) + _ => split_load_operand(asm, opnds[1]) }; asm.store(opnd0, opnd1); @@ -332,19 +370,18 @@ impl Assembler Op::Sub => { let opnd0 = match opnds[0] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], - _ => asm.load(opnds[0]) + _ => split_load_operand(asm, opnds[0]) }; let opnd1 = split_shifted_immediate(asm, opnds[1]); - - asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + asm.sub(opnd0, opnd1); }, Op::Test => { // The value being tested must be in a register, so if it's // not already one we'll load it first. let opnd0 = match opnds[0] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], - _ => asm.load(opnds[0]) + _ => split_load_operand(asm, opnds[0]) }; // The second value must be either a register or an @@ -352,7 +389,6 @@ impl Assembler // immediate. If it's not one of those, we'll need to load // it first. let opnd1 = split_bitmask_immediate(asm, opnds[1]); - asm.test(opnd0, opnd1); }, _ => { @@ -611,7 +647,7 @@ impl Assembler // references to GC'd Value operands. If the value // being loaded is a heap object, we'll report that // back out to the gc_offsets list. - ldr(cb, insn.out.into(), 2); + ldr_literal(cb, insn.out.into(), 2); b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4)); cb.write_bytes(&value.as_u64().to_le_bytes()); @@ -901,6 +937,42 @@ mod tests { asm.compile_with_num_regs(&mut cb, 1); } + #[test] + fn test_emit_load_mem_disp_fits_into_load() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 0)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that two instructions were written: LDUR and STUR. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_fits_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 10)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: ADD, LDUR, and STUR. + assert_eq!(12, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_does_not_fit_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR. + assert_eq!(16, cb.get_write_pos()); + } + #[test] fn test_emit_or() { let (mut asm, mut cb) = setup_asm(); From cd7506593ad6bd92dd330ba969e76ff8a1f5ee31 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 9 Aug 2022 10:28:19 -0400 Subject: [PATCH 460/546] Fix tests on yjit_backend_ir for AArch64 backend (https://github.com/Shopify/ruby/pull/383) --- yjit/src/backend/arm64/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 196523bf74cbb2..234339ca4e3b3a 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -882,8 +882,8 @@ mod tests { asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_regs(&mut cb, vec![X3_REG]); - let insns = cb.get_ptr(0).raw_ptr() as *const u32; - assert_eq!(0x8b010003, unsafe { *insns }); + // Assert that only 2 instructions were written. + assert_eq!(8, cb.get_write_pos()); } #[test] From 32d9eef5626118b5848cdbe9fe8f17da043d781c Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 9 Aug 2022 10:32:24 -0400 Subject: [PATCH 461/546] Enable test_literal.rb and test_ractor.rb on arm --- .cirrus.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.cirrus.yml b/.cirrus.yml index f91b72be64d905..1d7056c1d228ff 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -144,12 +144,14 @@ yjit_task: bootstraptest/test_insns.rb \ bootstraptest/test_io.rb \ bootstraptest/test_jump.rb \ + bootstraptest/test_literal.rb \ bootstraptest/test_literal_suffix.rb \ bootstraptest/test_load.rb \ bootstraptest/test_marshal.rb \ bootstraptest/test_massign.rb \ bootstraptest/test_method.rb \ bootstraptest/test_objectspace.rb \ + bootstraptest/test_ractor.rb \ bootstraptest/test_proc.rb \ bootstraptest/test_string.rb \ bootstraptest/test_struct.rb \ From 668b99b43b26868e3cddc41de2025841522acc3b Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 9 Aug 2022 09:16:27 -0700 Subject: [PATCH 462/546] Port gen_send_iseq to the new backend IR (https://github.com/Shopify/ruby/pull/381) * Port gen_send_iseq to the new backend IR * Replace occurrences of 8 by SIZEOF_VALUE Co-authored-by: Maxime Chevalier-Boisvert Co-authored-by: Maxime Chevalier-Boisvert --- .cirrus.yml | 10 +-- bootstraptest/test_yjit.rb | 33 +++++++++ yjit/src/codegen.rs | 145 ++++++++++++++++--------------------- 3 files changed, 98 insertions(+), 90 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 1d7056c1d228ff..f7432526557d41 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -157,16 +157,12 @@ yjit_task: bootstraptest/test_struct.rb \ bootstraptest/test_syntax.rb \ bootstraptest/test_thread.rb \ + bootstraptest/test_yjit_30k_ifelse.rb \ + bootstraptest/test_yjit_30k_methods.rb \ bootstraptest/test_yjit_new_backend.rb \ bootstraptest/test_yjit_rust_port.rb # These are the btests we can't run yet on arm: - #bootstraptest/test_block.rb (missing opt_send) - #bootstraptest/test_insns.rb (missing opt_send) - #bootstraptest/test_ractor.rb ? (untested) - #bootstraptest/test_literal.rb (displacement bug) - #bootstraptest/test_yjit.rb (multiple bugs) - #bootstraptest/test_yjit_30k_ifelse.rb (missing opt_send) - #bootstraptest/test_yjit_30k_methods.rb (missing opt_send) + #bootstraptest/test_yjit.rb (out of executable memory not handled) # full_build_script: make -j diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 0f9150e38849ae..833d1393f0de79 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3165,3 +3165,36 @@ def foo(path) # opt_invokebuiltin_delegate_leave assert_equal '[0]', %q{"\x00".unpack("c")} + +# opt_send_without_block (VM_METHOD_TYPE_ISEQ) +assert_equal '1', %q{ + def foo = 1 + def bar = foo + bar +} +assert_equal '[1, 2, 3]', %q{ + def foo(a, b) = [1, a, b] + def bar = foo(2, 3) + bar +} +assert_equal '[1, 2, 3, 4, 5, 6]', %q{ + def foo(a, b, c:, d:, e: 0, f: 6) = [a, b, c, d, e, f] + def bar = foo(1, 2, c: 3, d: 4, e: 5) + bar +} +assert_equal '[1, 2, 3, 4]', %q{ + def foo(a, b = 2) = [a, b] + def bar = foo(1) + foo(3, 4) + bar +} + +assert_equal '1', %q{ + def foo(a) = a + def bar = foo(1) { 2 } + bar +} +assert_equal '[1, 2]', %q{ + def foo(a, &block) = [a, block.call] + def bar = foo(1) { 2 } + bar +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 7f955124452ce4..b8f4b842855170 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3981,7 +3981,7 @@ fn gen_send_cfunc( // sp[-3] = me; // Put compile time cme into REG1. It's assumed to be valid because we are notified when // any cme we depend on become outdated. See yjit_method_lookup_change(). - asm.mov(Opnd::mem(64, sp, 8 * -3), Opnd::UImm(cme as u64)); + asm.mov(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -3), Opnd::UImm(cme as u64)); // Write block handler at sp[-2] // sp[-2] = block_handler; @@ -3989,9 +3989,9 @@ fn gen_send_cfunc( // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); let cfp_self = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); let block_handler = asm.or(cfp_self, Opnd::Imm(1)); - asm.mov(Opnd::mem(64, sp, 8 * -2), block_handler); + asm.mov(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2), block_handler); } else { - let dst_opnd = Opnd::mem(64, sp, 8 * -2); + let dst_opnd = Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2); asm.mov(dst_opnd, Opnd::UImm(VM_BLOCK_HANDLER_NONE.into())); } @@ -4001,7 +4001,7 @@ fn gen_send_cfunc( if !kw_arg.is_null() { frame_type |= VM_FRAME_FLAG_CFRAME_KW } - asm.mov(Opnd::mem(64, sp, 8 * -1), Opnd::UImm(frame_type.into())); + asm.mov(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -1), Opnd::UImm(frame_type.into())); // Allocate a new CFP (ec->cfp--) let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); @@ -4117,9 +4117,8 @@ fn gen_send_cfunc( EndBlock } -/* fn gen_return_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, @@ -4127,8 +4126,7 @@ fn gen_return_branch( match shape { BranchShape::Next0 | BranchShape::Next1 => unreachable!(), BranchShape::Default => { - mov(cb, REG0, code_ptr_opnd(target0)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), Opnd::const_ptr(target0.raw_ptr())); } } } @@ -4136,7 +4134,7 @@ fn gen_return_branch( fn gen_send_iseq( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -4156,7 +4154,7 @@ fn gen_send_iseq( if unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0 { // We can't handle tailcalls - gen_counter_incr!(cb, send_iseq_tailcall); + gen_counter_incr!(asm, send_iseq_tailcall); return CantCompile; } @@ -4167,7 +4165,7 @@ fn gen_send_iseq( || get_iseq_flags_has_post(iseq) || get_iseq_flags_has_kwrest(iseq) } { - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } @@ -4175,14 +4173,14 @@ fn gen_send_iseq( // positionals, then we need to allocate a hash. For now we're going to // call that too complex and bail. if supplying_kws && !unsafe { get_iseq_flags_has_kw(iseq) } { - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } // If we have a method accepting no kwargs (**nil), exit if we have passed // it any kwargs. if supplying_kws && unsafe { get_iseq_flags_has_accepts_no_kwarg(iseq) } { - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } @@ -4197,7 +4195,7 @@ fn gen_send_iseq( // In this case (param.flags.has_block && local_iseq != iseq), // the block argument is setup as a local variable and requires // materialization (allocation). Bail. - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } } @@ -4220,7 +4218,7 @@ fn gen_send_iseq( let opts_missing: i32 = opt_num - opts_filled; if opts_filled < 0 || opts_filled > opt_num { - gen_counter_incr!(cb, send_iseq_arity_error); + gen_counter_incr!(asm, send_iseq_arity_error); return CantCompile; } @@ -4228,7 +4226,7 @@ fn gen_send_iseq( // would need to move adjust the arguments location to account for that. // For now we aren't handling this case. if doing_kw_call && opts_missing > 0 { - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } @@ -4256,7 +4254,7 @@ fn gen_send_iseq( // We have so many keywords that (1 << num) encoded as a FIXNUM // (which shifts it left one more) no longer fits inside a 32-bit // immediate. - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } @@ -4294,7 +4292,7 @@ fn gen_send_iseq( // If the keyword was never found, then we know we have a // mismatch in the names of the keyword arguments, so we need to // bail. - gen_counter_incr!(cb, send_iseq_kwargs_mismatch); + gen_counter_incr!(asm, send_iseq_kwargs_mismatch); return CantCompile; } Some((callee_idx, _)) if callee_idx < keyword_required_num => { @@ -4307,7 +4305,7 @@ fn gen_send_iseq( } assert!(required_kwargs_filled <= keyword_required_num); if required_kwargs_filled != keyword_required_num { - gen_counter_incr!(cb, send_iseq_kwargs_mismatch); + gen_counter_incr!(asm, send_iseq_kwargs_mismatch); return CantCompile; } } @@ -4319,7 +4317,7 @@ fn gen_send_iseq( let side_exit = get_side_exit(jit, ocb, ctx); // Check for interrupts - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); let leaf_builtin_raw = unsafe { rb_leaf_builtin_function(iseq) }; let leaf_builtin: Option<*const rb_builtin_function> = if leaf_builtin_raw.is_null() { @@ -4329,26 +4327,23 @@ fn gen_send_iseq( }; if let (None, Some(builtin_info)) = (block, leaf_builtin) { let builtin_argc = unsafe { (*builtin_info).argc }; - if builtin_argc + 1 /* for self */ + 1 /* for ec */ <= (C_ARG_REGS.len() as i32) { - add_comment(cb, "inlined leaf builtin"); + if builtin_argc + 1 /* for self */ + 1 /* for ec */ <= (C_ARG_OPNDS.len() as i32) { + asm.comment("inlined leaf builtin"); // Call the builtin func (ec, recv, arg1, arg2, ...) - mov(cb, C_ARG_REGS[0], REG_EC); + let mut args = vec![EC]; // Copy self and arguments for i in 0..=builtin_argc { let stack_opnd = ctx.stack_opnd(builtin_argc - i); - let idx: usize = (i + 1).try_into().unwrap(); - let c_arg_reg = C_ARG_REGS[idx]; - mov(cb, c_arg_reg, stack_opnd); + args.push(stack_opnd); } ctx.stack_pop((builtin_argc + 1).try_into().unwrap()); - let builtin_func_ptr = unsafe { (*builtin_info).func_ptr as *const u8 }; - call_ptr(cb, REG0, builtin_func_ptr); + let val = asm.ccall(unsafe { (*builtin_info).func_ptr as *const u8 }, args); // Push the return value let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); // Note: assuming that the leaf builtin doesn't change local variables here. // Seems like a safe assumption. @@ -4360,13 +4355,13 @@ fn gen_send_iseq( // Stack overflow check // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2. // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) - add_comment(cb, "stack overflow check"); + asm.comment("stack overflow check"); let stack_max: i32 = unsafe { get_iseq_body_stack_max(iseq) }.try_into().unwrap(); let locals_offs = (SIZEOF_VALUE as i32) * (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME as i32); - lea(cb, REG0, ctx.sp_opnd(locals_offs as isize)); - cmp(cb, REG_CFP, REG0); - jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow)); + let stack_limit = asm.lea(ctx.sp_opnd(locals_offs as isize)); + asm.cmp(CFP, stack_limit); + asm.jbe(counted_exit!(ocb, side_exit, send_se_cf_overflow).into()); if doing_kw_call { // Here we're calling a method with keyword arguments and specifying @@ -4391,7 +4386,7 @@ fn gen_send_iseq( // keyword parameters. let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; - add_comment(cb, "keyword args"); + asm.comment("keyword args"); // This is the list of keyword arguments that the callee specified // in its initial declaration. @@ -4448,8 +4443,7 @@ fn gen_send_iseq( default_value = Qnil; } - jit_mov_gc_ptr(jit, cb, REG0, default_value); - mov(cb, default_arg, REG0); + asm.mov(default_arg, default_value.into()); caller_kwargs[kwarg_idx] = callee_kwarg; kwarg_idx += 1; @@ -4487,7 +4481,7 @@ fn gen_send_iseq( let offset1: u16 = (argc - 1 - kwarg_idx_i32 - args_before_kw) .try_into() .unwrap(); - stack_swap(jit, ctx, cb, offset0, offset1, REG1, REG0); + stack_swap(jit, ctx, asm, offset0, offset1); // Next we're going to do some bookkeeping on our end so // that we know the order that the arguments are @@ -4502,80 +4496,70 @@ fn gen_send_iseq( // Keyword arguments cause a special extra local variable to be // pushed onto the stack that represents the parameters that weren't // explicitly given a value and have a non-constant default. - let unspec_opnd = uimm_opnd(VALUE::fixnum_from_usize(unspecified_bits).as_u64()); - mov(cb, ctx.stack_opnd(-1), unspec_opnd); + let unspec_opnd = VALUE::fixnum_from_usize(unspecified_bits).as_u64(); + asm.mov(ctx.stack_opnd(-1), unspec_opnd.into()); } // Points to the receiver operand on the stack let recv = ctx.stack_opnd(argc); // Store the updated SP on the current frame (pop arguments and receiver) - add_comment(cb, "store caller sp"); - lea( - cb, - REG0, - ctx.sp_opnd((SIZEOF_VALUE as isize) * -((argc as isize) + 1)), - ); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0); + asm.comment("store caller sp"); + let caller_sp = asm.lea(ctx.sp_opnd((SIZEOF_VALUE as isize) * -((argc as isize) + 1))); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), caller_sp); // Store the next PC in the current frame - jit_save_pc(jit, cb, REG0); + jit_save_pc(jit, asm); if let Some(block_val) = block { // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases // with cfp->block_code. - let gc_ptr = VALUE(block_val as usize); - jit_mov_gc_ptr(jit, cb, REG0, gc_ptr); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE), REG0); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE(block_val as usize).into()); } // Adjust the callee's stack pointer let offs = (SIZEOF_VALUE as isize) * (3 + (num_locals as isize) + if doing_kw_call { 1 } else { 0 }); - lea(cb, REG0, ctx.sp_opnd(offs)); + let callee_sp = asm.lea(ctx.sp_opnd(offs)); // Initialize local variables to Qnil for i in 0..num_locals { let offs = (SIZEOF_VALUE as i32) * (i - num_locals - 3); - mov(cb, mem_opnd(64, REG0, offs), uimm_opnd(Qnil.into())); + asm.mov(Opnd::mem(64, callee_sp, offs), Qnil.into()); } - add_comment(cb, "push env"); + asm.comment("push env"); // Put compile time cme into REG1. It's assumed to be valid because we are notified when // any cme we depend on become outdated. See yjit_method_lookup_change(). - jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize)); // Write method entry at sp[-3] // sp[-3] = me; - mov(cb, mem_opnd(64, REG0, 8 * -3), REG1); + asm.mov(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -3), VALUE(cme as usize).into()); // Write block handler at sp[-2] // sp[-2] = block_handler; match block { Some(_) => { // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); - lea(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF)); - or(cb, REG1, imm_opnd(1)); - mov(cb, mem_opnd(64, REG0, 8 * -2), REG1); + let block_handler = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + let block_handler = asm.or(block_handler, 1.into()); + asm.mov(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -2), block_handler); } None => { - mov( - cb, - mem_opnd(64, REG0, 8 * -2), - uimm_opnd(VM_BLOCK_HANDLER_NONE.into()), - ); + asm.mov(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -2), VM_BLOCK_HANDLER_NONE.into()); } } // Write env flags at sp[-1] // sp[-1] = frame_type; let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; - mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into())); + asm.mov(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -1), frame_type.into()); - add_comment(cb, "push callee CFP"); + asm.comment("push callee CFP"); // Allocate a new CFP (ec->cfp--) - sub(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); - mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP); + let new_cfp = asm.sub(CFP, (RUBY_SIZEOF_CONTROL_FRAME as u64).into()); + asm.mov(CFP, new_cfp); + asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); // Setup the new frame // *cfp = (const struct rb_control_frame_struct) { @@ -4587,20 +4571,14 @@ fn gen_send_iseq( // .block_code = 0, // .__bp__ = sp, // }; - mov(cb, REG1, recv); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), REG1); - mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BP), REG0); - sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP), REG0); - jit_mov_gc_ptr(jit, cb, REG0, VALUE(iseq as usize)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ), REG0); - mov( - cb, - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE), - imm_opnd(0), - ); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), recv); + asm.mov(SP, callee_sp); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), callee_sp); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BP), callee_sp); + let callee_ep = asm.sub(callee_sp, (SIZEOF_VALUE as u64).into()); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP), callee_ep); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), VALUE(iseq as usize).into()); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), 0.into()); // No need to set cfp->pc since the callee sets it whenever calling into routines // that could look at it through jit_save_pc(). @@ -4662,12 +4640,13 @@ fn gen_send_iseq( iseq: iseq, idx: start_pc_offset, }, - cb, + asm, ); EndBlock } +/* fn gen_struct_aref( jit: &mut JITState, ctx: &mut Context, @@ -4876,7 +4855,7 @@ fn gen_send_general( let def_type = unsafe { get_cme_def_type(cme) }; match def_type { VM_METHOD_TYPE_ISEQ => { - return CantCompile; // return gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc); + return gen_send_iseq(jit, ctx, asm, ocb, ci, cme, block, argc); } VM_METHOD_TYPE_CFUNC => { return gen_send_cfunc( From 49c9f893f863108f741b6b6535dc53126733ded0 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 9 Aug 2022 10:01:20 -0700 Subject: [PATCH 463/546] Port expandarray to the new backend IR (https://github.com/Shopify/ruby/pull/376) * Port expandarray to the new backend IR * More use of into() * Break out live ranges * Refactor the code further * Reuse registers more --- yjit/src/codegen.rs | 104 ++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 57 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index b8f4b842855170..491eec0aeb81ae 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1285,50 +1285,43 @@ fn gen_newrange( KeepCompiling } -/* fn guard_object_is_heap( - cb: &mut CodeBlock, - object_opnd: X86Opnd, - _ctx: &mut Context, + asm: &mut Assembler, + object_opnd: Opnd, side_exit: CodePtr, ) { - add_comment(cb, "guard object is heap"); + asm.comment("guard object is heap"); // Test that the object is not an immediate - test(cb, object_opnd, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_ptr(cb, side_exit); + asm.test(object_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(side_exit.into()); // Test that the object is not false or nil - cmp(cb, object_opnd, uimm_opnd(Qnil.into())); - jbe_ptr(cb, side_exit); + asm.cmp(object_opnd, Qnil.into()); + asm.jbe(side_exit.into()); } fn guard_object_is_array( - cb: &mut CodeBlock, - object_opnd: X86Opnd, - flags_opnd: X86Opnd, - _ctx: &mut Context, + asm: &mut Assembler, + object_opnd: Opnd, side_exit: CodePtr, ) { - add_comment(cb, "guard object is array"); + asm.comment("guard object is array"); // Pull out the type mask - mov( - cb, - flags_opnd, - mem_opnd( - 8 * SIZEOF_VALUE as u8, - object_opnd, - RUBY_OFFSET_RBASIC_FLAGS, - ), + let flags_opnd = Opnd::mem( + 8 * SIZEOF_VALUE as u8, + object_opnd, + RUBY_OFFSET_RBASIC_FLAGS, ); - and(cb, flags_opnd, uimm_opnd(RUBY_T_MASK as u64)); + let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); // Compare the result with T_ARRAY - cmp(cb, flags_opnd, uimm_opnd(RUBY_T_ARRAY as u64)); - jne_ptr(cb, side_exit); + asm.cmp(flags_opnd, (RUBY_T_ARRAY as u64).into()); + asm.jne(side_exit.into()); } +/* fn guard_object_is_string( cb: &mut CodeBlock, object_reg: X86Opnd, @@ -1353,12 +1346,13 @@ fn guard_object_is_string( cmp(cb, flags_reg, uimm_opnd(RUBY_T_STRING as u64)); jne_ptr(cb, side_exit); } +*/ // push enough nils onto the stack to fill out an array fn gen_expandarray( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let flag = jit_get_arg(jit, 1); @@ -1366,13 +1360,13 @@ fn gen_expandarray( // If this instruction has the splat flag, then bail out. if flag_value & 0x01 != 0 { - incr_counter!(expandarray_splat); + gen_counter_incr!(asm, expandarray_splat); return CantCompile; } // If this instruction has the postarg flag, then bail out. if flag_value & 0x02 != 0 { - incr_counter!(expandarray_postarg); + gen_counter_incr!(asm, expandarray_postarg); return CantCompile; } @@ -1389,24 +1383,21 @@ fn gen_expandarray( // push N nils onto the stack for _i in 0..(num.into()) { let push_opnd = ctx.stack_push(Type::Nil); - mov(cb, push_opnd, uimm_opnd(Qnil.into())); + asm.mov(push_opnd, Qnil.into()); } return KeepCompiling; } // Move the array from the stack into REG0 and check that it's an array. - mov(cb, REG0, array_opnd); + let array_reg = asm.load(array_opnd); guard_object_is_heap( - cb, - REG0, - ctx, + asm, + array_reg, counted_exit!(ocb, side_exit, expandarray_not_array), ); guard_object_is_array( - cb, - REG0, - REG1, - ctx, + asm, + array_reg, counted_exit!(ocb, side_exit, expandarray_not_array), ); @@ -1416,52 +1407,51 @@ fn gen_expandarray( } // Pull out the embed flag to check if it's an embedded array. - let flags_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RBASIC_FLAGS); - mov(cb, REG1, flags_opnd); + let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS); // Move the length of the embedded array into REG1. - and(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_MASK as u64)); - shr(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_SHIFT as u64)); + let emb_len_opnd = asm.and(flags_opnd, (RARRAY_EMBED_LEN_MASK as u64).into()); + let emb_len_opnd = asm.rshift(emb_len_opnd, (RARRAY_EMBED_LEN_SHIFT as u64).into()); // Conditionally move the length of the heap array into REG1. - test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64)); - let array_len_opnd = mem_opnd( + let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); + let array_len_opnd = Opnd::mem( (8 * size_of::()) as u8, - REG0, + asm.load(array_opnd), RUBY_OFFSET_RARRAY_AS_HEAP_LEN, ); - cmovz(cb, REG1, array_len_opnd); + let array_len_opnd = asm.csel_nz(emb_len_opnd, array_len_opnd); // Only handle the case where the number of values in the array is greater // than or equal to the number of values requested. - cmp(cb, REG1, uimm_opnd(num.into())); - jl_ptr(cb, counted_exit!(ocb, side_exit, expandarray_rhs_too_small)); + asm.cmp(array_len_opnd, num.into()); + asm.jo(counted_exit!(ocb, side_exit, expandarray_rhs_too_small).into()); // Load the address of the embedded array into REG1. // (struct RArray *)(obj)->as.ary - let ary_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RARRAY_AS_ARY); - lea(cb, REG1, ary_opnd); + let array_reg = asm.load(array_opnd); + let ary_opnd = asm.lea(Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RARRAY_AS_ARY)); // Conditionally load the address of the heap array into REG1. // (struct RArray *)(obj)->as.heap.ptr - test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64)); - let heap_ptr_opnd = mem_opnd( + let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, Opnd::UImm(RARRAY_EMBED_FLAG as u64)); + let heap_ptr_opnd = Opnd::mem( (8 * size_of::()) as u8, - REG0, + asm.load(array_opnd), RUBY_OFFSET_RARRAY_AS_HEAP_PTR, ); - cmovz(cb, REG1, heap_ptr_opnd); + let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd); // Loop backward through the array and push each element onto the stack. for i in (0..(num.as_i32())).rev() { let top = ctx.stack_push(Type::Unknown); - mov(cb, REG0, mem_opnd(64, REG1, i * (SIZEOF_VALUE as i32))); - mov(cb, top, REG0); + asm.mov(top, Opnd::mem(64, ary_opnd, i * (SIZEOF_VALUE as i32))); } KeepCompiling } -*/ fn gen_getlocal_wc0( jit: &mut JITState, @@ -5927,7 +5917,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_splatarray => Some(gen_splatarray), YARVINSN_newrange => Some(gen_newrange), YARVINSN_putstring => Some(gen_putstring), - //YARVINSN_expandarray => Some(gen_expandarray), + YARVINSN_expandarray => Some(gen_expandarray), YARVINSN_defined => Some(gen_defined), YARVINSN_checkkeyword => Some(gen_checkkeyword), YARVINSN_concatstrings => Some(gen_concatstrings), From 3f42028e3e7df7d476e71cc995608e26208e3ae0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 4 Aug 2022 15:29:31 -0400 Subject: [PATCH 464/546] Iterator (https://github.com/Shopify/ruby/pull/372) * Iterator * Use the new iterator for the X86 backend split * Use iterator for reg alloc, remove forward pass * Fix up iterator usage on AArch64 * Update yjit/src/backend/ir.rs Co-authored-by: Maxime Chevalier-Boisvert * Various PR feedback for iterators for IR * Use a local mutable reference for a64_split * Move tests from ir.rs to tests.rs in backend * Fix x86 shift instructions live range calculation * Iterator * Use the new iterator for the X86 backend split * Fix up x86 iterator usage * Fix ARM iterator usage * Remove unintentionally duplicated tests --- yjit/src/backend/arm64/mod.rs | 34 +++-- yjit/src/backend/ir.rs | 238 +++++++++++++++++++++------------ yjit/src/backend/tests.rs | 38 ++++++ yjit/src/backend/x86_64/mod.rs | 114 ++++++++-------- 4 files changed, 274 insertions(+), 150 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 234339ca4e3b3a..fac77f972d2b37 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -182,12 +182,14 @@ impl Assembler } } - self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_opnds| { - // Load all Value operands into registers that aren't already a part - // of Load instructions. - let opnds = match op { - Op::Load => opnds, - _ => opnds.into_iter().map(|opnd| { + let mut asm_local = Assembler::new_with_label_names(std::mem::take(&mut self.label_names)); + let asm = &mut asm_local; + let mut iterator = self.into_draining_iter(); + + while let Some((index, insn)) = iterator.next_mapped() { + let opnds = match insn.op { + Op::Load => insn.opnds, + _ => insn.opnds.into_iter().map(|opnd| { if let Opnd::Value(_) = opnd { asm.load(opnd) } else { @@ -196,7 +198,7 @@ impl Assembler }).collect() }; - match op { + match insn.op { Op::Add => { match (opnds[0], opnds[1]) { (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => { @@ -217,17 +219,17 @@ impl Assembler Op::And | Op::Or => { match (opnds[0], opnds[1]) { (Opnd::Reg(_), Opnd::Reg(_)) => { - asm.push_insn(op, vec![opnds[0], opnds[1]], target, text, pos_marker); + asm.push_insn(insn.op, vec![opnds[0], opnds[1]], insn.target, insn.text, insn.pos_marker); }, (reg_opnd @ Opnd::Reg(_), other_opnd) | (other_opnd, reg_opnd @ Opnd::Reg(_)) => { let opnd1 = split_bitmask_immediate(asm, other_opnd); - asm.push_insn(op, vec![reg_opnd, opnd1], target, text, pos_marker); + asm.push_insn(insn.op, vec![reg_opnd, opnd1], insn.target, insn.text, insn.pos_marker); }, _ => { let opnd0 = split_load_operand(asm, opnds[0]); let opnd1 = split_bitmask_immediate(asm, opnds[1]); - asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + asm.push_insn(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); } } }, @@ -246,7 +248,7 @@ impl Assembler // Now we push the CCall without any arguments so that it // just performs the call. - asm.ccall(target.unwrap().unwrap_fun_ptr(), vec![]); + asm.ccall(insn.target.unwrap().unwrap_fun_ptr(), vec![]); }, Op::Cmp => { let opnd0 = match opnds[0] { @@ -273,7 +275,7 @@ impl Assembler } }).collect(); - asm.push_insn(op, new_opnds, target, text, pos_marker); + asm.push_insn(insn.op, new_opnds, insn.target, insn.text, insn.pos_marker); }, Op::IncrCounter => { // We'll use LDADD later which only works with registers @@ -392,10 +394,14 @@ impl Assembler asm.test(opnd0, opnd1); }, _ => { - asm.push_insn(op, opnds, target, text, pos_marker); + asm.push_insn(insn.op, opnds, insn.target, insn.text, insn.pos_marker); } }; - }) + + iterator.map_insn_index(asm); + } + + asm_local } /// Emit platform-specific machine code diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 5eee61b2281e3b..2dfb859fe9d548 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -2,8 +2,10 @@ #![allow(unused_variables)] #![allow(unused_imports)] +use std::cell::Cell; use std::fmt; use std::convert::From; +use std::mem::take; use crate::cruby::{VALUE}; use crate::virtualmem::{CodePtr}; use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; @@ -288,6 +290,20 @@ impl Opnd _ => unreachable!() } } + + /// Maps the indices from a previous list of instructions to a new list of + /// instructions. + pub fn map_index(self, indices: &Vec) -> Opnd { + match self { + Opnd::InsnOut { idx, num_bits } => { + Opnd::InsnOut { idx: indices[idx], num_bits } + } + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { + Opnd::Mem(Mem { base: MemBase::InsnOut(indices[idx]), disp, num_bits }) + }, + _ => self + } + } } impl From for Opnd { @@ -433,11 +449,15 @@ pub struct Assembler impl Assembler { - pub fn new() -> Assembler { - Assembler { + pub fn new() -> Self { + Self::new_with_label_names(Vec::default()) + } + + pub fn new_with_label_names(label_names: Vec) -> Self { + Self { insns: Vec::default(), live_ranges: Vec::default(), - label_names: Vec::default(), + label_names } } @@ -573,58 +593,6 @@ impl Assembler self.live_ranges.push(self.insns.len()); } - /// Transform input instructions, consumes the input assembler - pub(super) fn forward_pass(mut self, mut map_insn: F) -> Assembler - where F: FnMut(&mut Assembler, usize, Op, Vec, Option, Option, Option, Vec) - { - let mut asm = Assembler { - insns: Vec::default(), - live_ranges: Vec::default(), - label_names: self.label_names, - }; - - // Indices maps from the old instruction index to the new instruction - // index. - let mut indices: Vec = Vec::default(); - - // Map an operand to the next set of instructions by correcting previous - // InsnOut indices. - fn map_opnd(opnd: Opnd, indices: &mut Vec) -> Opnd { - match opnd { - Opnd::InsnOut{ idx, num_bits } => { - Opnd::InsnOut{ idx: indices[idx], num_bits } - } - Opnd::Mem(Mem{ base: MemBase::InsnOut(idx), disp, num_bits, }) => { - Opnd::Mem(Mem{ base:MemBase::InsnOut(indices[idx]), disp, num_bits }) - } - _ => opnd - } - } - - for (index, insn) in self.insns.drain(..).enumerate() { - let original_opnds = insn.opnds.clone(); - let opnds: Vec = insn.opnds.into_iter().map(|opnd| map_opnd(opnd, &mut indices)).collect(); - - // For each instruction, either handle it here or allow the map_insn - // callback to handle it. - match insn.op { - Op::Comment => { - asm.comment(insn.text.unwrap().as_str()); - }, - _ => { - map_insn(&mut asm, index, insn.op, opnds, insn.target, insn.text, insn.pos_marker, original_opnds); - } - }; - - // Here we're assuming that if we've pushed multiple instructions, - // the output that we're using is still the final instruction that - // was pushed. - indices.push(asm.insns.len() - 1); - } - - asm - } - /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. @@ -671,13 +639,15 @@ impl Assembler } } - let live_ranges: Vec = std::mem::take(&mut self.live_ranges); + let live_ranges: Vec = take(&mut self.live_ranges); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); + let mut iterator = self.into_draining_iter(); - let asm = self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_insns| { + while let Some((index, insn)) = iterator.next_unmapped() { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. - for opnd in &opnds { + for opnd in &insn.opnds { match opnd { Opnd::InsnOut{idx, .. } | Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { @@ -693,7 +663,7 @@ impl Assembler if let Opnd::Reg(reg) = asm.insns[start_index].out { dealloc_reg(&mut pool, ®s, ®); } else { - unreachable!("no register allocated for insn {:?}", op); + unreachable!("no register allocated for insn {:?}", insn.op); } } } @@ -703,7 +673,7 @@ impl Assembler } // C return values need to be mapped to the C return register - if op == Op::CCall { + if insn.op == Op::CCall { assert_eq!(pool, 0, "register lives past C function call"); } @@ -713,7 +683,7 @@ impl Assembler if live_ranges[index] != index { // C return values need to be mapped to the C return register - if op == Op::CCall { + if insn.op == Op::CCall { out_reg = Opnd::Reg(take_reg(&mut pool, ®s, &C_RET_REG)) } @@ -722,8 +692,8 @@ impl Assembler // We do this to improve register allocation on x86 // e.g. out = add(reg0, reg1) // reg0 = add(reg0, reg1) - else if opnds.len() > 0 { - if let Opnd::InsnOut{idx, ..} = opnds[0] { + else if insn.opnds.len() > 0 { + if let Opnd::InsnOut{idx, ..} = insn.opnds[0] { if live_ranges[idx] == index { if let Opnd::Reg(reg) = asm.insns[idx].out { out_reg = Opnd::Reg(take_reg(&mut pool, ®s, ®)) @@ -734,9 +704,9 @@ impl Assembler // Allocate a new register for this instruction if out_reg == Opnd::None { - out_reg = if op == Op::LiveReg { + out_reg = if insn.op == Op::LiveReg { // Allocate a specific register - let reg = opnds[0].unwrap_reg(); + let reg = insn.opnds[0].unwrap_reg(); Opnd::Reg(take_reg(&mut pool, ®s, ®)) } else { Opnd::Reg(alloc_reg(&mut pool, ®s)) @@ -745,7 +715,7 @@ impl Assembler } // Replace InsnOut operands by their corresponding register - let reg_opnds: Vec = opnds.into_iter().map(|opnd| + let reg_opnds: Vec = insn.opnds.into_iter().map(|opnd| match opnd { Opnd::InsnOut{idx, ..} => asm.insns[idx].out, Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { @@ -760,7 +730,7 @@ impl Assembler } ).collect(); - asm.push_insn(op, reg_opnds, target, text, pos_marker); + asm.push_insn(insn.op, reg_opnds, insn.target, insn.text, insn.pos_marker); // Set the output register for this instruction let num_insns = asm.insns.len(); @@ -770,7 +740,7 @@ impl Assembler out_reg = Opnd::Reg(reg.sub_reg(num_out_bits)) } new_insn.out = out_reg; - }); + } assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); asm @@ -792,6 +762,123 @@ impl Assembler let alloc_regs = alloc_regs.drain(0..num_regs).collect(); self.compile_with_regs(cb, alloc_regs) } + + /// Consume the assembler by creating a new draining iterator. + pub fn into_draining_iter(self) -> AssemblerDrainingIterator { + AssemblerDrainingIterator::new(self) + } + + /// Consume the assembler by creating a new lookback iterator. + pub fn into_lookback_iter(self) -> AssemblerLookbackIterator { + AssemblerLookbackIterator::new(self) + } + + pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { + let target = Target::FunPtr(fptr); + self.push_insn(Op::CCall, opnds, Some(target), None, None) + } + + // pub fn pos_marker(&mut self, marker_fn: F) + pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr) + 'static) { + self.push_insn(Op::PosMarker, vec![], None, None, Some(Box::new(marker_fn))); + } +} + +/// A struct that allows iterating through an assembler's instructions and +/// consuming them as it iterates. +pub struct AssemblerDrainingIterator { + insns: std::vec::IntoIter, + index: usize, + indices: Vec +} + +impl AssemblerDrainingIterator { + fn new(asm: Assembler) -> Self { + Self { + insns: asm.insns.into_iter(), + index: 0, + indices: Vec::default() + } + } + + /// When you're working with two lists of instructions, you need to make + /// sure you do some bookkeeping to align the indices contained within the + /// operands of the two lists. + /// + /// This function accepts the assembler that is being built and tracks the + /// end of the current list of instructions in order to maintain that + /// alignment. + pub fn map_insn_index(&mut self, asm: &mut Assembler) { + self.indices.push(asm.insns.len() - 1); + } + + /// Map an operand by using this iterator's list of mapped indices. + pub fn map_opnd(&self, opnd: Opnd) -> Opnd { + opnd.map_index(&self.indices) + } + + /// Returns the next instruction in the list with the indices corresponding + /// to the next list of instructions. + pub fn next_mapped(&mut self) -> Option<(usize, Insn)> { + self.next_unmapped().map(|(index, insn)| { + let opnds = insn.opnds.into_iter().map(|opnd| opnd.map_index(&self.indices)).collect(); + (index, Insn { opnds, ..insn }) + }) + } + + /// Returns the next instruction in the list with the indices corresponding + /// to the previous list of instructions. + pub fn next_unmapped(&mut self) -> Option<(usize, Insn)> { + let index = self.index; + self.index += 1; + self.insns.next().map(|insn| (index, insn)) + } +} + +/// A struct that allows iterating through references to an assembler's +/// instructions without consuming them. +pub struct AssemblerLookbackIterator { + asm: Assembler, + index: Cell +} + +impl AssemblerLookbackIterator { + fn new(asm: Assembler) -> Self { + Self { asm, index: Cell::new(0) } + } + + /// Fetches a reference to an instruction at a specific index. + pub fn get(&self, index: usize) -> Option<&Insn> { + self.asm.insns.get(index) + } + + /// Fetches a reference to an instruction in the list relative to the + /// current cursor location of this iterator. + pub fn get_relative(&self, difference: i32) -> Option<&Insn> { + let index: Result = self.index.get().try_into(); + let relative: Result = index.and_then(|value| (value + difference).try_into()); + relative.ok().and_then(|value| self.asm.insns.get(value)) + } + + /// Fetches the previous instruction relative to the current cursor location + /// of this iterator. + pub fn get_previous(&self) -> Option<&Insn> { + self.get_relative(-1) + } + + /// Fetches the next instruction relative to the current cursor location of + /// this iterator. + pub fn get_next(&self) -> Option<&Insn> { + self.get_relative(1) + } + + /// Returns the next instruction in the list with the indices corresponding + /// to the previous list of instructions. + pub fn next_unmapped(&self) -> Option<(usize, &Insn)> { + let index = self.index.get(); + self.index.set(index + 1); + self.asm.insns.get(index).map(|insn| (index, insn)) + } } impl fmt::Debug for Assembler { @@ -806,21 +893,6 @@ impl fmt::Debug for Assembler { } } -impl Assembler -{ - pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd - { - let target = Target::FunPtr(fptr); - self.push_insn(Op::CCall, opnds, Some(target), None, None) - } - - //pub fn pos_marker(&mut self, marker_fn: F) - pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr) + 'static) - { - self.push_insn(Op::PosMarker, vec![], None, None, Some(Box::new(marker_fn))); - } -} - macro_rules! def_push_jcc { ($op_name:ident, $opcode:expr) => { impl Assembler diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index a31e16071bdf75..e4ab95d4ee721e 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -299,3 +299,41 @@ fn test_bake_string() { asm.bake_string("Hello, world!"); asm.compile_with_num_regs(&mut cb, 0); } + +#[test] +fn test_draining_iterator() { + let mut asm = Assembler::new(); + + asm.load(Opnd::None); + asm.store(Opnd::None, Opnd::None); + asm.add(Opnd::None, Opnd::None); + + let mut iter = asm.into_draining_iter(); + + while let Some((index, insn)) = iter.next_unmapped() { + match index { + 0 => assert_eq!(insn.op, Op::Load), + 1 => assert_eq!(insn.op, Op::Store), + 2 => assert_eq!(insn.op, Op::Add), + _ => panic!("Unexpected instruction index"), + }; + } +} + +#[test] +fn test_lookback_iterator() { + let mut asm = Assembler::new(); + + asm.load(Opnd::None); + asm.store(Opnd::None, Opnd::None); + asm.store(Opnd::None, Opnd::None); + + let mut iter = asm.into_lookback_iter(); + + while let Some((index, insn)) = iter.next_unmapped() { + if index > 0 { + assert_eq!(iter.get_previous().unwrap().opnds[0], Opnd::None); + assert_eq!(insn.op, Op::Store); + } + } +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index b0802b31870075..9fcbb69a688541 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -2,11 +2,13 @@ #![allow(unused_variables)] #![allow(unused_imports)] +use std::mem::take; + use crate::asm::*; use crate::asm::x86_64::*; use crate::codegen::{JITState}; use crate::cruby::*; -use crate::backend::ir::{Assembler, Opnd, Target, Op, MemBase, Mem}; +use crate::backend::ir::*; // Use the x86 register type for this platform pub type Reg = X86Reg; @@ -94,31 +96,51 @@ impl Assembler /// Split IR instructions for the x86 platform fn x86_split(mut self) -> Assembler { - let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - - self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_opnds| { - // Load VALUEs into registers because - // - Most instructions can't be encoded with 64-bit immediates. - // - We look for Op::Load specifically when emiting to keep GC'ed - // VALUEs alive. This is a sort of canonicalization. - let opnds = match op { - Op::Load => opnds, - _ => opnds.into_iter().map(|opnd| { - if let Opnd::Value(value) = opnd { - // Since mov(mem64, imm32) sign extends, as_i64() makes sure we split - // when the extended value is different. - if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { - return asm.load(opnd); - } + let live_ranges: Vec = take(&mut self.live_ranges); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); + let mut iterator = self.into_draining_iter(); + + while let Some((index, insn)) = iterator.next_unmapped() { + // When we're iterating through the instructions with x86_split, we + // need to know the previous live ranges in order to tell if a + // register lasts beyond the current instruction. So instead of + // using next_mapped, we call next_unmapped. When you're using the + // next_unmapped API, you need to make sure that you map each + // operand that could reference an old index, which means both + // Opnd::InsnOut operands and Opnd::Mem operands with a base of + // MemBase::InsnOut. + // + // You need to ensure that you only map it _once_, because otherwise + // you'll end up mapping an incorrect index which could end up being + // out of bounds of the old set of indices. + // + // We handle all of that mapping here to ensure that it's only + // mapped once. We also handle loading Opnd::Value operands into + // registers here so that all mapping happens in one place. We load + // Opnd::Value operands into registers here because: + // + // - Most instructions can't be encoded with 64-bit immediates. + // - We look for Op::Load specifically when emiting to keep GC'ed + // VALUEs alive. This is a sort of canonicalization. + let opnds: Vec = insn.opnds.iter().map(|opnd| { + if insn.op == Op::Load { + iterator.map_opnd(*opnd) + } else if let Opnd::Value(value) = opnd { + // Since mov(mem64, imm32) sign extends, as_i64() makes sure + // we split when the extended value is different. + if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { + asm.load(iterator.map_opnd(*opnd)) + } else { + iterator.map_opnd(*opnd) } + } else { + iterator.map_opnd(*opnd) + } + }).collect(); - opnd - }).collect() - }; - - match op { + match insn.op { Op::Add | Op::Sub | Op::And | Op::Cmp | Op::Or | Op::Test => { - let (opnd0, opnd1) = match (opnds[0], opnds[1]) { + let (opnd0, opnd1) = match (insn.opnds[0], insn.opnds[1]) { (Opnd::Mem(_), Opnd::Mem(_)) => { (asm.load(opnds[0]), asm.load(opnds[1])) }, @@ -138,17 +160,7 @@ impl Assembler } }, // Instruction output whose live range spans beyond this instruction - (Opnd::InsnOut { .. }, _) => { - let idx = match original_opnds[0] { - Opnd::InsnOut { idx, .. } => { - idx - }, - _ => panic!("nooooo") - }; - - // Our input must be from a previous instruction! - assert!(idx < index); - + (Opnd::InsnOut { idx, .. }, _) => { if live_ranges[idx] > index { (asm.load(opnds[0]), opnds[1]) } else { @@ -162,24 +174,14 @@ impl Assembler _ => (opnds[0], opnds[1]) }; - asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + asm.push_insn(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); }, // These instructions modify their input operand in-place, so we // may need to load the input value to preserve it Op::LShift | Op::RShift | Op::URShift => { - let (opnd0, opnd1) = match (opnds[0], opnds[1]) { + let (opnd0, opnd1) = match (insn.opnds[0], insn.opnds[1]) { // Instruction output whose live range spans beyond this instruction - (Opnd::InsnOut { .. }, _) => { - let idx = match original_opnds[0] { - Opnd::InsnOut { idx, .. } => { - idx - }, - _ => unreachable!() - }; - - // Our input must be from a previous instruction! - assert!(idx < index); - + (Opnd::InsnOut { idx, .. }, _) => { if live_ranges[idx] > index { (asm.load(opnds[0]), opnds[1]) } else { @@ -193,7 +195,7 @@ impl Assembler _ => (opnds[0], opnds[1]) }; - asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + asm.push_insn(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); }, Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE => { @@ -204,7 +206,7 @@ impl Assembler } }).collect(); - asm.push_insn(op, new_opnds, target, text, pos_marker); + asm.push_insn(insn.op, new_opnds, insn.target, insn.text, insn.pos_marker); }, Op::Mov => { match (opnds[0], opnds[1]) { @@ -236,7 +238,7 @@ impl Assembler } }, Op::Not => { - let opnd0 = match opnds[0] { + let opnd0 = match insn.opnds[0] { // If we have an instruction output whose live range // spans beyond this instruction, we have to load it. Opnd::InsnOut { idx, .. } => { @@ -248,7 +250,9 @@ impl Assembler }, // We have to load memory and register operands to avoid // corrupting them. - Opnd::Mem(_) | Opnd::Reg(_) => asm.load(opnds[0]), + Opnd::Mem(_) | Opnd::Reg(_) => { + asm.load(opnds[0]) + }, // Otherwise we can just reuse the existing operand. _ => opnds[0] }; @@ -256,10 +260,14 @@ impl Assembler asm.not(opnd0); }, _ => { - asm.push_insn(op, opnds, target, text, pos_marker); + asm.push_insn(insn.op, opnds, insn.target, insn.text, insn.pos_marker); } }; - }) + + iterator.map_insn_index(&mut asm); + } + + asm } /// Emit platform-specific machine code From 1cafb1a7a67c776018656a5461965cf15d22b220 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 9 Aug 2022 11:48:23 -0700 Subject: [PATCH 465/546] Prefer asm.store over asm.mov (https://github.com/Shopify/ruby/pull/385) * Prefer asm.store over asm.mov * Reverse a couple of unsure changes * Revert changes that don't work --- yjit/src/codegen.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 491eec0aeb81ae..67cad1c141ba8e 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3979,10 +3979,10 @@ fn gen_send_cfunc( // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); let cfp_self = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); let block_handler = asm.or(cfp_self, Opnd::Imm(1)); - asm.mov(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2), block_handler); + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2), block_handler); } else { let dst_opnd = Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2); - asm.mov(dst_opnd, Opnd::UImm(VM_BLOCK_HANDLER_NONE.into())); + asm.store(dst_opnd, Opnd::UImm(VM_BLOCK_HANDLER_NONE.into())); } // Write env flags at sp[-1] @@ -3991,12 +3991,12 @@ fn gen_send_cfunc( if !kw_arg.is_null() { frame_type |= VM_FRAME_FLAG_CFRAME_KW } - asm.mov(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -1), Opnd::UImm(frame_type.into())); + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -1), Opnd::UImm(frame_type.into())); // Allocate a new CFP (ec->cfp--) let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); let new_cfp = asm.sub(ec_cfp_opnd, Opnd::UImm(RUBY_SIZEOF_CONTROL_FRAME as u64)); - asm.store(ec_cfp_opnd, new_cfp); + asm.mov(ec_cfp_opnd, new_cfp); // Setup the new frame // *cfp = (const struct rb_control_frame_struct) { @@ -4496,7 +4496,7 @@ fn gen_send_iseq( // Store the updated SP on the current frame (pop arguments and receiver) asm.comment("store caller sp"); let caller_sp = asm.lea(ctx.sp_opnd((SIZEOF_VALUE as isize) * -((argc as isize) + 1))); - asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), caller_sp); + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), caller_sp); // Store the next PC in the current frame jit_save_pc(jit, asm); @@ -4505,7 +4505,7 @@ fn gen_send_iseq( // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases // with cfp->block_code. - asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE(block_val as usize).into()); + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE(block_val as usize).into()); } // Adjust the callee's stack pointer @@ -4516,7 +4516,7 @@ fn gen_send_iseq( // Initialize local variables to Qnil for i in 0..num_locals { let offs = (SIZEOF_VALUE as i32) * (i - num_locals - 3); - asm.mov(Opnd::mem(64, callee_sp, offs), Qnil.into()); + asm.store(Opnd::mem(64, callee_sp, offs), Qnil.into()); } asm.comment("push env"); @@ -4524,7 +4524,7 @@ fn gen_send_iseq( // any cme we depend on become outdated. See yjit_method_lookup_change(). // Write method entry at sp[-3] // sp[-3] = me; - asm.mov(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -3), VALUE(cme as usize).into()); + asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -3), VALUE(cme as usize).into()); // Write block handler at sp[-2] // sp[-2] = block_handler; @@ -4533,23 +4533,23 @@ fn gen_send_iseq( // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); let block_handler = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); let block_handler = asm.or(block_handler, 1.into()); - asm.mov(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -2), block_handler); + asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -2), block_handler); } None => { - asm.mov(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -2), VM_BLOCK_HANDLER_NONE.into()); + asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -2), VM_BLOCK_HANDLER_NONE.into()); } } // Write env flags at sp[-1] // sp[-1] = frame_type; let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; - asm.mov(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -1), frame_type.into()); + asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -1), frame_type.into()); asm.comment("push callee CFP"); // Allocate a new CFP (ec->cfp--) let new_cfp = asm.sub(CFP, (RUBY_SIZEOF_CONTROL_FRAME as u64).into()); asm.mov(CFP, new_cfp); - asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); // Setup the new frame // *cfp = (const struct rb_control_frame_struct) { From cad35fb25c4300a53ad33d833d4595c2bdd30e00 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 9 Aug 2022 16:03:39 -0400 Subject: [PATCH 466/546] Try to run make -j in CI backend tests (https://github.com/Shopify/ruby/pull/386) --- .cirrus.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index f7432526557d41..51aadfc6be8294 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -165,4 +165,6 @@ yjit_task: # These are the btests we can't run yet on arm: #bootstraptest/test_yjit.rb (out of executable memory not handled) - # full_build_script: make -j + # FIXME: not currently working on CI, missing cargo + # Check that we can do a full ruby build + #full_build_script: make -j From 6b9cec78a18ae1788c8c939c705b85bd0a6efc3a Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Wed, 10 Aug 2022 16:13:22 +0100 Subject: [PATCH 467/546] Port cfunc lookup, plus simpler cfunc generators. (https://github.com/Shopify/ruby/pull/388) This port does *not* create invalidation regions to ensure minimum invalidatable block sizes, and so it does not port the to_s generator. --- yjit/src/codegen.rs | 161 +++++++++++++++++++++----------------------- 1 file changed, 77 insertions(+), 84 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 67cad1c141ba8e..83d09362d1708c 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3531,6 +3531,7 @@ fn jit_protected_callee_ancestry_guard( counted_exit!(ocb, side_exit, send_se_protected_check_failed), ); } +*/ // Codegen for rb_obj_not(). // Note, caller is responsible for generating all the right guards, including @@ -3538,7 +3539,7 @@ fn jit_protected_callee_ancestry_guard( fn jit_rb_obj_not( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3550,17 +3551,17 @@ fn jit_rb_obj_not( match recv_opnd.known_truthy() { Some(false) => { - add_comment(cb, "rb_obj_not(nil_or_false)"); + asm.comment("rb_obj_not(nil_or_false)"); ctx.stack_pop(1); let out_opnd = ctx.stack_push(Type::True); - mov(cb, out_opnd, uimm_opnd(Qtrue.into())); + asm.mov(out_opnd, Qtrue.into()); }, Some(true) => { // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. - add_comment(cb, "rb_obj_not(truthy)"); + asm.comment("rb_obj_not(truthy)"); ctx.stack_pop(1); let out_opnd = ctx.stack_push(Type::False); - mov(cb, out_opnd, uimm_opnd(Qfalse.into())); + asm.mov(out_opnd, Qfalse.into()); }, _ => { return false; @@ -3574,7 +3575,7 @@ fn jit_rb_obj_not( fn jit_rb_true( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3582,10 +3583,10 @@ fn jit_rb_true( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "nil? == true"); + asm.comment("nil? == true"); ctx.stack_pop(1); let stack_ret = ctx.stack_push(Type::True); - mov(cb, stack_ret, uimm_opnd(Qtrue.into())); + asm.mov(stack_ret, Qtrue.into()); true } @@ -3593,7 +3594,7 @@ fn jit_rb_true( fn jit_rb_false( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3601,10 +3602,10 @@ fn jit_rb_false( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "nil? == false"); + asm.comment("nil? == false"); ctx.stack_pop(1); let stack_ret = ctx.stack_push(Type::False); - mov(cb, stack_ret, uimm_opnd(Qfalse.into())); + asm.mov(stack_ret, Qfalse.into()); true } @@ -3613,7 +3614,7 @@ fn jit_rb_false( fn jit_rb_obj_equal( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3621,18 +3622,15 @@ fn jit_rb_obj_equal( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "equal?"); + asm.comment("equal?"); let obj1 = ctx.stack_pop(1); let obj2 = ctx.stack_pop(1); - mov(cb, REG0, obj1); - cmp(cb, REG0, obj2); - mov(cb, REG0, uimm_opnd(Qtrue.into())); - mov(cb, REG1, uimm_opnd(Qfalse.into())); - cmovne(cb, REG0, REG1); + asm.cmp(obj1, obj2); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); let stack_ret = ctx.stack_push(Type::UnknownImm); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, ret_opnd); true } @@ -3640,7 +3638,7 @@ fn jit_rb_obj_equal( fn jit_rb_str_uplus( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3649,35 +3647,30 @@ fn jit_rb_str_uplus( _known_recv_class: *const VALUE, ) -> bool { - let recv = ctx.stack_pop(1); - - add_comment(cb, "Unary plus on string"); - mov(cb, REG0, recv); - mov(cb, REG1, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS)); - test(cb, REG1, imm_opnd(RUBY_FL_FREEZE as i64)); + asm.comment("Unary plus on string"); + let recv_opnd = asm.load(ctx.stack_pop(1)); + let flags_opnd = asm.load(Opnd::mem(64, recv_opnd, RUBY_OFFSET_RBASIC_FLAGS)); + asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64)); - let ret_label = cb.new_label("stack_ret".to_string()); + let ret_label = asm.new_label("stack_ret"); // If the string isn't frozen, we just return it. It's already in REG0. - jz_label(cb, ret_label); + asm.jz(ret_label); // Str is frozen - duplicate - mov(cb, C_ARG_REGS[0], REG0); - call_ptr(cb, REG0, rb_str_dup as *const u8); - // Return value is in REG0, drop through and return it. + let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); - cb.write_label(ret_label); + asm.write_label(ret_label); // We guard for an exact-class match on the receiver of rb_cString let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, ret_opnd); - cb.link_labels(); true } fn jit_rb_str_bytesize( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3685,18 +3678,18 @@ fn jit_rb_str_bytesize( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "String#bytesize"); + asm.comment("String#bytesize"); let recv = ctx.stack_pop(1); - mov(cb, C_ARG_REGS[0], recv); - call_ptr(cb, REG0, rb_str_bytesize as *const u8); + let ret_opnd = asm.ccall(rb_str_bytesize as *const u8, vec![recv]); let out_opnd = ctx.stack_push(Type::Fixnum); - mov(cb, out_opnd, RAX); + asm.mov(out_opnd, ret_opnd); true } +/* // Codegen for rb_str_to_s() // When String#to_s is called on a String instance, the method returns self and // most of the overhead comes from setting up the method call. We observed that @@ -3727,7 +3720,7 @@ fn jit_rb_str_to_s( fn jit_rb_str_concat( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3748,64 +3741,69 @@ fn jit_rb_str_concat( let side_exit = get_side_exit(jit, ocb, ctx); // Guard that the argument is of class String at runtime. - jit_guard_known_klass( - jit, - ctx, - cb, - ocb, - unsafe { rb_cString }, - ctx.stack_opnd(0), - StackOpnd(0), - comptime_arg, - SEND_MAX_DEPTH, - side_exit, - ); + let insn_opnd = StackOpnd(0); + let arg_opnd = asm.load(ctx.stack_opnd(0)); + let arg_type = ctx.get_opnd_type(insn_opnd); + + if arg_type != Type::CString && arg_type != Type::TString { + if !arg_type.is_heap() { + asm.comment("guard arg not immediate"); + asm.test(REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64)); + asm.jnz(Target::CodePtr(side_exit)); + asm.cmp(arg_opnd, Qnil.into()); + asm.jbe(Target::CodePtr(side_exit)); + + ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); + } + guard_object_is_string(cb, REG0, REG1, side_exit); + // We know this has type T_STRING, but not necessarily that it's a ::String + ctx.upgrade_opnd_type(insn_opnd, Type::TString); + } let concat_arg = ctx.stack_pop(1); let recv = ctx.stack_pop(1); // Test if string encodings differ. If different, use rb_str_append. If the same, // use rb_yjit_str_simple_append, which calls rb_str_cat. - add_comment(cb, "<< on strings"); + asm.comment("<< on strings"); // Both rb_str_append and rb_yjit_str_simple_append take identical args - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], concat_arg); + let ccall_args = vec![recv, concat_arg]; // Take receiver's object flags XOR arg's flags. If any // string-encoding flags are different between the two, // the encodings don't match. - mov(cb, REG0, recv); - mov(cb, REG1, concat_arg); - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS)); - xor(cb, REG0, mem_opnd(64, REG1, RUBY_OFFSET_RBASIC_FLAGS)); - test(cb, REG0, uimm_opnd(RUBY_ENCODING_MASK as u64)); + let flags_xor = asm.xor( + Opnd::mem(64, asm.load(recv), RUBY_OFFSET_RBASIC_FLAGS), + Opnd::mem(64, asm.load(concat_arg), RUBY_OFFSET_RBASIC_FLAGS) + ); + asm.test(flags_xor, Opnd::UImm(RUBY_ENCODING_MASK as u64)); - let enc_mismatch = cb.new_label("enc_mismatch".to_string()); - jnz_label(cb, enc_mismatch); + let enc_mismatch = asm.new_label("enc_mismatch"); + asm.jnz(enc_mismatch); // If encodings match, call the simple append function and jump to return - call_ptr(cb, REG0, rb_yjit_str_simple_append as *const u8); - let ret_label: usize = cb.new_label("stack_return".to_string()); - jmp_label(cb, ret_label); + let ret_opnd = asm.ccall(rb_yjit_str_simple_append as *const u8, ccall_args); + let ret_label = asm.new_label("stack_return"); + asm.jmp(ret_label); // If encodings are different, use a slower encoding-aware concatenate - cb.write_label(enc_mismatch); - call_ptr(cb, REG0, rb_str_buf_append as *const u8); + asm.write_label(enc_mismatch); + asm.ccall(rb_str_buf_append as *const u8, ccall_args); // Drop through to return - cb.write_label(ret_label); + asm.write_label(ret_label); let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, ret_opnd); - cb.link_labels(); true } +*/ fn jit_thread_s_current( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3813,22 +3811,19 @@ fn jit_thread_s_current( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "Thread.current"); + asm.comment("Thread.current"); ctx.stack_pop(1); // ec->thread_ptr - let ec_thread_ptr = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_THREAD_PTR); - mov(cb, REG0, ec_thread_ptr); + let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR)); // thread->self - let thread_self = mem_opnd(64, REG0, RUBY_OFFSET_THREAD_SELF); - mov(cb, REG0, thread_self); + let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF); let stack_ret = ctx.stack_push(Type::UnknownHeap); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, thread_self); true } -*/ // Check if we know how to codegen for a particular cfunc method fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option { @@ -5976,7 +5971,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { type MethodGenFn = fn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -6151,7 +6146,6 @@ impl CodegenGlobals { /// Register codegen functions for some Ruby core methods fn reg_method_codegen_fns(&mut self) { - /* unsafe { // Specialization for C methods. See yjit_reg_method() for details. self.yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not); @@ -6167,10 +6161,10 @@ impl CodegenGlobals { self.yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal); // rb_str_to_s() methods in string.c - self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); - self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); + //self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); + //self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); self.yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize); - self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); + //self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); self.yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); // Thread.current @@ -6180,7 +6174,6 @@ impl CodegenGlobals { jit_thread_s_current, ); } - */ } /// Get a mutable reference to the codegen globals instance From 8c45b8a989fea4c74d68c464a55aa54e9144994a Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 10 Aug 2022 13:37:49 -0400 Subject: [PATCH 468/546] Update asm comments for gen_send_iseq --- yjit/src/codegen.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 83d09362d1708c..d8998b9a98729c 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -4514,11 +4514,11 @@ fn gen_send_iseq( asm.store(Opnd::mem(64, callee_sp, offs), Qnil.into()); } - asm.comment("push env"); - // Put compile time cme into REG1. It's assumed to be valid because we are notified when + // Write the callee CME on the stack. It's assumed to be valid because we are notified when // any cme we depend on become outdated. See yjit_method_lookup_change(). // Write method entry at sp[-3] // sp[-3] = me; + asm.comment("push cme, block handler, frame type"); asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -3), VALUE(cme as usize).into()); // Write block handler at sp[-2] @@ -4540,7 +4540,7 @@ fn gen_send_iseq( let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -1), frame_type.into()); - asm.comment("push callee CFP"); + asm.comment("push callee control frame"); // Allocate a new CFP (ec->cfp--) let new_cfp = asm.sub(CFP, (RUBY_SIZEOF_CONTROL_FRAME as u64).into()); asm.mov(CFP, new_cfp); From ca2afba4a7162ca9bbaa12891da5d55f81752d23 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 10 Aug 2022 13:05:59 -0700 Subject: [PATCH 469/546] Port the remaining method types in opt_send_without_block (https://github.com/Shopify/ruby/pull/390) --- bootstraptest/test_yjit.rb | 22 ++++++++++ yjit/src/codegen.rs | 82 +++++++++++++++----------------------- 2 files changed, 54 insertions(+), 50 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 833d1393f0de79..7aed5ac43cc1d6 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3198,3 +3198,25 @@ def foo(a, &block) = [a, block.call] def bar = foo(1) { 2 } bar } + +# opt_send_without_block (VM_METHOD_TYPE_IVAR) +assert_equal 'foo', %q{ + class Foo + attr_reader :foo + + def initialize + @foo = "foo" + end + end + Foo.new.foo +} + +# opt_send_without_block (VM_METHOD_TYPE_OPTIMIZED) +assert_equal 'foo', %q{ + Foo = Struct.new(:bar) + Foo.new("bar").bar = "foo" +} +assert_equal 'foo', %q{ + Foo = Struct.new(:bar) + Foo.new("foo").bar +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index d8998b9a98729c..4e79eb5c00c853 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3503,35 +3503,30 @@ fn jit_guard_known_klass( } } -/* // Generate ancestry guard for protected callee. // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee). fn jit_protected_callee_ancestry_guard( jit: &mut JITState, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, cme: *const rb_callable_method_entry_t, side_exit: CodePtr, ) { // See vm_call_method(). - mov( - cb, - C_ARG_REGS[0], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); let def_class = unsafe { (*cme).defined_class }; - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], def_class); // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise. // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); - call_ptr(cb, REG0, rb_obj_is_kind_of as *mut u8); - test(cb, RAX, RAX); - jz_ptr( - cb, - counted_exit!(ocb, side_exit, send_se_protected_check_failed), + let val = asm.ccall( + rb_obj_is_kind_of as *mut u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), + def_class.into(), + ], ); + asm.test(val, val); + asm.jz(counted_exit!(ocb, side_exit, send_se_protected_check_failed).into()) } -*/ // Codegen for rb_obj_not(). // Note, caller is responsible for generating all the right guards, including @@ -4631,11 +4626,10 @@ fn gen_send_iseq( EndBlock } -/* fn gen_struct_aref( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -4669,32 +4663,28 @@ fn gen_struct_aref( // true of the converse. let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) }; - add_comment(cb, "struct aref"); - - let recv = ctx.stack_pop(1); + asm.comment("struct aref"); - mov(cb, REG0, recv); + let recv = asm.load(ctx.stack_pop(1)); - if embedded != VALUE(0) { - let ary_elt = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_ARY + (8 * off)); - mov(cb, REG0, ary_elt); + let val = if embedded != VALUE(0) { + Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_ARY + ((SIZEOF_VALUE as i32) * off)) } else { - let rstruct_ptr = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR); - mov(cb, REG0, rstruct_ptr); - mov(cb, REG0, mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * off)); - } + let rstruct_ptr = asm.load(Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR)); + Opnd::mem(64, rstruct_ptr, (SIZEOF_VALUE as i32) * off) + }; let ret = ctx.stack_push(Type::Unknown); - mov(cb, ret, REG0); + asm.mov(ret, val); - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } fn gen_struct_aset( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -4713,23 +4703,19 @@ fn gen_struct_aset( assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) }); assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) }); - add_comment(cb, "struct aset"); + asm.comment("struct aset"); let val = ctx.stack_pop(1); let recv = ctx.stack_pop(1); - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], imm_opnd(off as i64)); - mov(cb, C_ARG_REGS[2], val); - call_ptr(cb, REG0, RSTRUCT_SET as *const u8); + let val = asm.ccall(RSTRUCT_SET as *const u8, vec![recv, (off as i64).into(), val]); let ret = ctx.stack_push(Type::Unknown); - mov(cb, ret, RAX); + asm.mov(ret, val); - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } -*/ fn gen_send_general( jit: &mut JITState, @@ -4823,7 +4809,7 @@ fn gen_send_general( if flags & VM_CALL_FCALL == 0 { // otherwise we need an ancestry check to ensure the receiver is vaild to be called // as protected - return CantCompile; // jit_protected_callee_ancestry_guard(jit, cb, ocb, cme, side_exit); + jit_protected_callee_ancestry_guard(jit, asm, ocb, cme, side_exit); } } _ => { @@ -4876,22 +4862,20 @@ fn gen_send_general( return CantCompile; } - return CantCompile; /* - mov(cb, REG0, recv); let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; return gen_get_ivar( jit, ctx, - cb, + asm, ocb, SEND_MAX_DEPTH, comptime_recv, ivar_name, + recv, recv_opnd, side_exit, ); - */ } VM_METHOD_TYPE_ATTRSET => { if flags & VM_CALL_KWARG != 0 { @@ -4934,26 +4918,25 @@ fn gen_send_general( } // Send family of methods, e.g. call/apply VM_METHOD_TYPE_OPTIMIZED => { - return CantCompile; /* let opt_type = unsafe { get_cme_def_body_optimized_type(cme) }; match opt_type { OPTIMIZED_METHOD_TYPE_SEND => { - gen_counter_incr!(cb, send_optimized_method_send); + gen_counter_incr!(asm, send_optimized_method_send); return CantCompile; } OPTIMIZED_METHOD_TYPE_CALL => { - gen_counter_incr!(cb, send_optimized_method_call); + gen_counter_incr!(asm, send_optimized_method_call); return CantCompile; } OPTIMIZED_METHOD_TYPE_BLOCK_CALL => { - gen_counter_incr!(cb, send_optimized_method_block_call); + gen_counter_incr!(asm, send_optimized_method_block_call); return CantCompile; } OPTIMIZED_METHOD_TYPE_STRUCT_AREF => { return gen_struct_aref( jit, ctx, - cb, + asm, ocb, ci, cme, @@ -4965,7 +4948,7 @@ fn gen_send_general( return gen_struct_aset( jit, ctx, - cb, + asm, ocb, ci, cme, @@ -4977,7 +4960,6 @@ fn gen_send_general( panic!("unknown optimized method type!") } } - */ } VM_METHOD_TYPE_MISSING => { gen_counter_incr!(asm, send_missing_method); From e5969f8587dd80367347356e11ddcf3976439d1c Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 10 Aug 2022 13:09:23 -0700 Subject: [PATCH 470/546] Port invokesuper to the new backend IR (https://github.com/Shopify/ruby/pull/391) --- yjit/src/codegen.rs | 48 ++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 4e79eb5c00c853..1ac4f54952ec74 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -4998,11 +4998,10 @@ fn gen_send( return gen_send_general(jit, ctx, asm, ocb, cd, block); } -/* fn gen_invokesuper( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr(); @@ -5010,7 +5009,7 @@ fn gen_invokesuper( // Defer compilation so we can specialize on class of receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -5048,19 +5047,19 @@ fn gen_invokesuper( // Don't JIT calls that aren't simple // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. if ci_flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(cb, send_args_splat); + gen_counter_incr!(asm, send_args_splat); return CantCompile; } if ci_flags & VM_CALL_KWARG != 0 { - gen_counter_incr!(cb, send_keywords); + gen_counter_incr!(asm, send_keywords); return CantCompile; } if ci_flags & VM_CALL_KW_SPLAT != 0 { - gen_counter_incr!(cb, send_kw_splat); + gen_counter_incr!(asm, send_kw_splat); return CantCompile; } if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(cb, send_block_arg); + gen_counter_incr!(asm, send_block_arg); return CantCompile; } @@ -5100,16 +5099,15 @@ fn gen_invokesuper( return CantCompile; } - add_comment(cb, "guard known me"); - mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); - let ep_me_opnd = mem_opnd( + asm.comment("guard known me"); + let ep_opnd = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); + let ep_me_opnd = Opnd::mem( 64, - REG0, + ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_ME_CREF as i32), ); - jit_mov_gc_ptr(jit, cb, REG1, me_as_value); - cmp(cb, ep_me_opnd, REG1); - jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_me_changed)); + asm.cmp(ep_me_opnd, me_as_value.into()); + asm.jne(counted_exit!(ocb, side_exit, invokesuper_me_changed).into()); if block.is_none() { // Guard no block passed @@ -5118,21 +5116,18 @@ fn gen_invokesuper( // // TODO: this could properly forward the current block handler, but // would require changes to gen_send_* - add_comment(cb, "guard no block given"); + asm.comment("guard no block given"); // EP is in REG0 from above - let ep_specval_opnd = mem_opnd( + let ep_opnd = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); + let ep_specval_opnd = Opnd::mem( 64, - REG0, + ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), ); - cmp(cb, ep_specval_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into())); - jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_block)); + asm.cmp(ep_specval_opnd, VM_BLOCK_HANDLER_NONE.into()); + asm.jne(counted_exit!(ocb, side_exit, invokesuper_block).into()); } - // Points to the receiver operand on the stack - let recv = ctx.stack_opnd(argc); - mov(cb, REG0, recv); - // We need to assume that both our current method entry and the super // method entry we invoke remain stable assume_method_lookup_stable(jit, ocb, current_defined_class, me); @@ -5142,14 +5137,13 @@ fn gen_invokesuper( ctx.clear_local_types(); match cme_def_type { - VM_METHOD_TYPE_ISEQ => gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc), + VM_METHOD_TYPE_ISEQ => gen_send_iseq(jit, ctx, asm, ocb, ci, cme, block, argc), VM_METHOD_TYPE_CFUNC => { - gen_send_cfunc(jit, ctx, cb, ocb, ci, cme, block, argc, ptr::null()) + gen_send_cfunc(jit, ctx, asm, ocb, ci, cme, block, argc, ptr::null()) } _ => unreachable!(), } } -*/ fn gen_leave( jit: &mut JITState, @@ -5929,7 +5923,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { //YARVINSN_getblockparam => Some(gen_getblockparam), YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), YARVINSN_send => Some(gen_send), - //YARVINSN_invokesuper => Some(gen_invokesuper), + YARVINSN_invokesuper => Some(gen_invokesuper), YARVINSN_leave => Some(gen_leave), YARVINSN_getglobal => Some(gen_getglobal), From df84832c758e3dcb360f18335c2c93dcc90344aa Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 10 Aug 2022 13:29:01 -0700 Subject: [PATCH 471/546] Port getblockparamproxy and getblockparam (https://github.com/Shopify/ruby/pull/394) --- bootstraptest/test_yjit.rb | 16 +++++ yjit/src/codegen.rs | 132 +++++++++++++++---------------------- 2 files changed, 70 insertions(+), 78 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 7aed5ac43cc1d6..d44fe258005ee0 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3220,3 +3220,19 @@ def initialize Foo = Struct.new(:bar) Foo.new("foo").bar } + +# getblockparamproxy +assert_equal 'foo', %q{ + def foo(&block) + block.call + end + foo { "foo" } +} + +# getblockparam +assert_equal 'foo', %q{ + def foo(&block) + block + end + foo { "foo" }.call +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 1ac4f54952ec74..b5b145790c7de4 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5556,6 +5556,7 @@ fn gen_opt_getinlinecache( ); EndBlock } +*/ // Push the explicit block parameter onto the temporary stack. Part of the // interpreter's scheme for avoiding Proc allocations when delegating @@ -5563,11 +5564,11 @@ fn gen_opt_getinlinecache( fn gen_getblockparamproxy( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -5590,79 +5591,64 @@ fn gen_getblockparamproxy( } // Load environment pointer EP from CFP - gen_get_ep(cb, REG0, level); + let ep_opnd = gen_get_ep(asm, level); // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero - let flag_check = mem_opnd( + let flag_check = Opnd::mem( 64, - REG0, + ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32), ); - test( - cb, - flag_check, - uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()), - ); - jnz_ptr(cb, counted_exit!(ocb, side_exit, gbpp_block_param_modified)); + asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.jnz(counted_exit!(ocb, side_exit, gbpp_block_param_modified).into()); // Load the block handler for the current frame // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) - mov( - cb, - REG0, - mem_opnd( - 64, - REG0, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), - ), + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32)) ); // Specialize compilation for the case where no block handler is present if comptime_handler.as_u64() == 0 { // Bail if there is a block handler - cmp(cb, REG0, uimm_opnd(0)); + asm.cmp(block_handler, Opnd::UImm(0)); jit_chain_guard( JCC_JNZ, jit, &starting_context, - cb, + asm, ocb, SEND_MAX_DEPTH, side_exit, ); - jit_putobject(jit, ctx, cb, Qnil); + jit_putobject(jit, ctx, asm, Qnil); } else { // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P(). - and(cb, REG0_8, imm_opnd(0x3)); + let block_handler = asm.and(block_handler, 0x3.into()); // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null. - cmp(cb, REG0_8, imm_opnd(0x1)); + asm.cmp(block_handler, 0x1.into()); jit_chain_guard( JCC_JNZ, jit, &starting_context, - cb, + asm, ocb, SEND_MAX_DEPTH, side_exit, ); // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr. - mov( - cb, - REG0, - const_ptr_opnd(unsafe { rb_block_param_proxy }.as_ptr()), - ); assert!(!unsafe { rb_block_param_proxy }.special_const_p()); let top = ctx.stack_push(Type::Unknown); - mov(cb, top, REG0); + asm.mov(top, Opnd::const_ptr(unsafe { rb_block_param_proxy }.as_ptr())); } - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } @@ -5670,95 +5656,85 @@ fn gen_getblockparamproxy( fn gen_getblockparam( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // EP level let level = jit_get_arg(jit, 1).as_u32(); // Save the PC and SP because we might allocate - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // A mirror of the interpreter code. Checking for the case // where it's pushing rb_block_param_proxy. let side_exit = get_side_exit(jit, ocb, ctx); // Load environment pointer EP from CFP - gen_get_ep(cb, REG1, level); + let ep_opnd = gen_get_ep(asm, level); // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero - let flag_check = mem_opnd( - 64, - REG1, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32), - ); + let flag_check = Opnd::mem(64, ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32)); // FIXME: This is testing bits in the same place that the WB check is testing. // We should combine these at some point - test( - cb, - flag_check, - uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()), - ); + asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); // If the frame flag has been modified, then the actual proc value is // already in the EP and we should just use the value. - let frame_flag_modified = cb.new_label("frame_flag_modified".to_string()); - jnz_label(cb, frame_flag_modified); + let frame_flag_modified = asm.new_label("frame_flag_modified"); + asm.jnz(frame_flag_modified); // This instruction writes the block handler to the EP. If we need to // fire a write barrier for the write, then exit (we'll let the // interpreter handle it so it can fire the write barrier). // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = mem_opnd( + let flags_opnd = Opnd::mem( 64, - REG1, + ep_opnd, SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); - test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED.into())); + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - jnz_ptr(cb, side_exit); - - // Load the block handler for the current frame - // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) - mov( - cb, - C_ARG_REGS[1], - mem_opnd( - 64, - REG1, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), - ), - ); + asm.jnz(side_exit.into()); // Convert the block handler in to a proc // call rb_vm_bh_to_procval(const rb_execution_context_t *ec, VALUE block_handler) - mov(cb, C_ARG_REGS[0], REG_EC); - call_ptr(cb, REG0, rb_vm_bh_to_procval as *const u8); + let proc = asm.ccall( + rb_vm_bh_to_procval as *const u8, + vec![ + EC, + // The block handler for the current frame + // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) + Opnd::mem( + 64, + ep_opnd, + (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), + ), + ] + ); // Load environment pointer EP from CFP (again) - gen_get_ep(cb, REG1, level); - - // Set the frame modified flag - or(cb, flag_check, uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into())); + let ep_opnd = gen_get_ep(asm, level); // Write the value at the environment pointer let idx = jit_get_arg(jit, 0).as_i32(); let offs = -(SIZEOF_VALUE as i32 * idx); - mov(cb, mem_opnd(64, REG1, offs), RAX); + asm.mov(Opnd::mem(64, ep_opnd, offs), proc); + + // Set the frame modified flag + let flag_check = Opnd::mem(64, ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32)); + let modified_flag = asm.or(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.store(flag_check, modified_flag); - cb.write_label(frame_flag_modified); + asm.write_label(frame_flag_modified); // Push the proc on the stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, RAX, mem_opnd(64, REG1, offs)); - mov(cb, stack_ret, RAX); - - cb.link_labels(); + let ep_opnd = gen_get_ep(asm, level); + asm.mov(stack_ret, Opnd::mem(64, ep_opnd, offs)); KeepCompiling } -*/ fn gen_invokebuiltin( jit: &mut JITState, @@ -5919,8 +5895,8 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_branchnil => Some(gen_branchnil), YARVINSN_jump => Some(gen_jump), - //YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), - //YARVINSN_getblockparam => Some(gen_getblockparam), + YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), + YARVINSN_getblockparam => Some(gen_getblockparam), YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), YARVINSN_send => Some(gen_send), YARVINSN_invokesuper => Some(gen_invokesuper), From b54643d13ac4194f05151a366e9980350efec829 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 10 Aug 2022 16:38:42 -0400 Subject: [PATCH 472/546] Handle out of memory tests (https://github.com/Shopify/ruby/pull/393) --- .cirrus.yml | 6 ++---- yjit/src/backend/arm64/mod.rs | 5 ++++- yjit/src/backend/x86_64/mod.rs | 4 +++- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 51aadfc6be8294..3425ebd175e6ef 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -160,10 +160,8 @@ yjit_task: bootstraptest/test_yjit_30k_ifelse.rb \ bootstraptest/test_yjit_30k_methods.rb \ bootstraptest/test_yjit_new_backend.rb \ - bootstraptest/test_yjit_rust_port.rb - - # These are the btests we can't run yet on arm: - #bootstraptest/test_yjit.rb (out of executable memory not handled) + bootstraptest/test_yjit_rust_port.rb \ + bootstraptest/test_yjit.rb # FIXME: not currently working on CI, missing cargo # Check that we can do a full ruby build diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index fac77f972d2b37..440c4368c50d79 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -866,7 +866,10 @@ impl Assembler } let gc_offsets = asm.arm64_emit(cb); - cb.link_labels(); + + if !cb.has_dropped_bytes() { + cb.link_labels(); + } gc_offsets } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 9fcbb69a688541..417474ee683838 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -561,7 +561,9 @@ impl Assembler let gc_offsets = asm.x86_emit(cb); - cb.link_labels(); + if !cb.has_dropped_bytes() { + cb.link_labels(); + } gc_offsets } From ee1697ee0727c29fc61c88ccb6036aa763d2d2b6 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 10 Aug 2022 14:01:21 -0700 Subject: [PATCH 473/546] Port opt_aref and opt_aset to the new backend IR (https://github.com/Shopify/ruby/pull/387) * Port opt_aref and opt_aset to the new backend IR * Recompute memory operands --- yjit/src/codegen.rs | 120 ++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 66 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index b5b145790c7de4..047fb89cfbda42 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1869,10 +1869,8 @@ fn jit_chain_guard( // up to 5 different classes, and embedded or not for each pub const GET_IVAR_MAX_DEPTH: i32 = 10; -/* // hashes and arrays pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2; -*/ // up to 5 different classes pub const SEND_MAX_DEPTH: i32 = 5; @@ -2548,11 +2546,10 @@ fn gen_opt_neq( return gen_send_general(jit, ctx, asm, ocb, cd, None); } -/* fn gen_opt_aref( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr(); @@ -2560,13 +2557,13 @@ fn gen_opt_aref( // Only JIT one arg calls like `ary[6]` if argc != 1 { - gen_counter_incr!(cb, oaref_argc_not_one); + gen_counter_incr!(asm, oaref_argc_not_one); return CantCompile; } // Defer compilation so we can specialize base on a runtime receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2588,66 +2585,61 @@ fn gen_opt_aref( // Pop the stack operands let idx_opnd = ctx.stack_pop(1); let recv_opnd = ctx.stack_pop(1); - mov(cb, REG0, recv_opnd); + let recv_reg = asm.load(recv_opnd); // if (SPECIAL_CONST_P(recv)) { // Bail if receiver is not a heap object - test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_ptr(cb, side_exit); - cmp(cb, REG0, uimm_opnd(Qfalse.into())); - je_ptr(cb, side_exit); - cmp(cb, REG0, uimm_opnd(Qnil.into())); - je_ptr(cb, side_exit); + asm.test(recv_reg, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(side_exit.into()); + asm.cmp(recv_reg, Qfalse.into()); + asm.je(side_exit.into()); + asm.cmp(recv_reg, Qnil.into()); + asm.je(side_exit.into()); // Bail if recv has a class other than ::Array. // BOP_AREF check above is only good for ::Array. - mov(cb, REG1, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS)); - mov(cb, REG0, uimm_opnd(unsafe { rb_cArray }.into())); - cmp(cb, REG0, REG1); + asm.cmp(unsafe { rb_cArray }.into(), Opnd::mem(64, recv_reg, RUBY_OFFSET_RBASIC_KLASS)); jit_chain_guard( JCC_JNE, jit, &starting_context, - cb, + asm, ocb, OPT_AREF_MAX_CHAIN_DEPTH, side_exit, ); // Bail if idx is not a FIXNUM - mov(cb, REG1, idx_opnd); - test(cb, REG1, uimm_opnd(RUBY_FIXNUM_FLAG as u64)); - jz_ptr(cb, counted_exit!(ocb, side_exit, oaref_arg_not_fixnum)); + let idx_reg = asm.load(idx_opnd); + asm.test(idx_reg, (RUBY_FIXNUM_FLAG as u64).into()); + asm.jz(counted_exit!(ocb, side_exit, oaref_arg_not_fixnum).into()); // Call VALUE rb_ary_entry_internal(VALUE ary, long offset). // It never raises or allocates, so we don't need to write to cfp->pc. { - mov(cb, RDI, recv_opnd); - sar(cb, REG1, uimm_opnd(1)); // Convert fixnum to int - mov(cb, RSI, REG1); - call_ptr(cb, REG0, rb_ary_entry_internal as *const u8); + let idx_reg = asm.rshift(idx_reg, Opnd::UImm(1)); // Convert fixnum to int + let val = asm.ccall(rb_ary_entry_internal as *const u8, vec![recv_opnd, idx_reg]); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); } // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); return EndBlock; } else if comptime_recv.class_of() == unsafe { rb_cHash } { if !assume_bop_not_redefined(jit, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) { return CantCompile; } - let key_opnd = ctx.stack_opnd(0); let recv_opnd = ctx.stack_opnd(1); // Guard that the receiver is a hash jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cHash }, recv_opnd, @@ -2657,40 +2649,39 @@ fn gen_opt_aref( side_exit, ); - // Setup arguments for rb_hash_aref(). - mov(cb, C_ARG_REGS[0], REG0); - mov(cb, C_ARG_REGS[1], key_opnd); - // Prepare to call rb_hash_aref(). It might call #hash on the key. - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_hash_aref as *const u8); + // Call rb_hash_aref + let key_opnd = ctx.stack_opnd(0); + let recv_opnd = ctx.stack_opnd(1); + let val = asm.ccall(rb_hash_aref as *const u8, vec![recv_opnd, key_opnd]); // Pop the key and the receiver ctx.stack_pop(2); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } else { // General case. Call the [] method. - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_aset( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2709,7 +2700,7 @@ fn gen_opt_aset( jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cArray }, recv, @@ -2723,7 +2714,7 @@ fn gen_opt_aset( jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cInteger }, key, @@ -2733,27 +2724,26 @@ fn gen_opt_aset( side_exit, ); - // Call rb_ary_store - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], key); - sar(cb, C_ARG_REGS[1], uimm_opnd(1)); // FIX2LONG(key) - mov(cb, C_ARG_REGS[2], val); - // We might allocate or raise - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_ary_store as *const u8); + // Call rb_ary_store + let recv = ctx.stack_opnd(2); + let key = asm.load(ctx.stack_opnd(1)); + let key = asm.rshift(key, Opnd::UImm(1)); // FIX2LONG(key) + let val = ctx.stack_opnd(0); + asm.ccall(rb_ary_store as *const u8, vec![recv, key, val]); // rb_ary_store returns void // stored value should still be on stack - mov(cb, REG0, ctx.stack_opnd(0)); + let val = asm.load(ctx.stack_opnd(0)); // Push the return value onto the stack ctx.stack_pop(3); let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, val); - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); return EndBlock; } else if comptime_recv.class_of() == unsafe { rb_cHash } { let side_exit = get_side_exit(jit, ocb, ctx); @@ -2762,7 +2752,7 @@ fn gen_opt_aset( jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cHash }, recv, @@ -2772,28 +2762,26 @@ fn gen_opt_aset( side_exit, ); - // Call rb_hash_aset - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], key); - mov(cb, C_ARG_REGS[2], val); - // We might allocate or raise - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_hash_aset as *const u8); + // Call rb_hash_aset + let recv = ctx.stack_opnd(2); + let key = ctx.stack_opnd(1); + let val = ctx.stack_opnd(0); + let ret = asm.ccall(rb_hash_aset as *const u8, vec![recv, key, val]); // Push the return value onto the stack ctx.stack_pop(3); let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, ret); - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } -*/ fn gen_opt_and( jit: &mut JITState, @@ -5873,8 +5861,8 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_eq => Some(gen_opt_eq), YARVINSN_opt_neq => Some(gen_opt_neq), - //YARVINSN_opt_aref => Some(gen_opt_aref), - //YARVINSN_opt_aset => Some(gen_opt_aset), + YARVINSN_opt_aref => Some(gen_opt_aref), + YARVINSN_opt_aset => Some(gen_opt_aset), YARVINSN_opt_mult => Some(gen_opt_mult), YARVINSN_opt_div => Some(gen_opt_div), YARVINSN_opt_ltlt => Some(gen_opt_ltlt), From 4d811d7a2b92d110e3e70cb77e5f499acfa7112a Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Wed, 10 Aug 2022 17:22:55 -0400 Subject: [PATCH 474/546] Fix code invalidation while OOM and OOM simulation (https://github.com/Shopify/ruby/pull/395) `YJIT.simulate_oom!` used to leave one byte of space in the code block, so our test didn't expose a problem with asserting that the write position is in bounds in `CodeBlock::set_pos`. We do the following when patching code: 1. save current write position 2. seek to middle of the code block and patch 3. restore old write position The bounds check fails on (3) when the code block is already filled up. Leaving one byte of space also meant that when we write that byte, we need to fill the entire code region with trapping instruction in `VirtualMem`, which made the OOM tests unnecessarily slow. Remove the incorrect bounds check and stop leaving space in the code block when simulating OOM. --- bootstraptest/test_yjit.rb | 9 +++++++++ yjit/src/asm/mod.rs | 12 ++++++------ yjit/src/yjit.rs | 4 ++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index d44fe258005ee0..89d7c9a038051d 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -2901,11 +2901,20 @@ def test foo end + def bar + :bar + end + + test test RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT) + # Old simulat_omm! leaves one byte of space and this fills it up + bar + bar + def foo :new end diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 0e05eb57831751..fef4518816f177 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -121,10 +121,10 @@ impl CodeBlock { // Set the current write position pub fn set_pos(&mut self, pos: usize) { - // Assert here since while CodeBlock functions do bounds checking, there is - // nothing stopping users from taking out an out-of-bounds pointer and - // doing bad accesses with it. - assert!(pos < self.mem_size); + // No bounds check here since we can be out of bounds + // when the code block fills up. We want to be able to + // restore to the filled up state after patching something + // in the middle. self.write_pos = pos; } @@ -152,12 +152,12 @@ impl CodeBlock { self.set_pos(pos); } - // Get a direct pointer into the executable memory block + /// Get a (possibly dangling) direct pointer into the executable memory block pub fn get_ptr(&self, offset: usize) -> CodePtr { self.mem_block.start_ptr().add_bytes(offset) } - // Get a direct pointer to the current write position + /// Get a (possibly dangling) direct pointer to the current write position pub fn get_write_ptr(&mut self) -> CodePtr { self.get_ptr(self.write_pos) } diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs index bfa9188d3e9bb2..5cd23f066f52d8 100644 --- a/yjit/src/yjit.rs +++ b/yjit/src/yjit.rs @@ -91,8 +91,8 @@ pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VA if cfg!(debug_assertions) { let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb().unwrap(); - cb.set_pos(cb.get_mem_size() - 1); - ocb.set_pos(ocb.get_mem_size() - 1); + cb.set_pos(cb.get_mem_size()); + ocb.set_pos(ocb.get_mem_size()); } return Qnil; From 7f4ab24f2b99c87874a5540a55026ea5a3d43d3e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 11 Aug 2022 08:32:06 -0400 Subject: [PATCH 475/546] Op::Xor for backend IR (https://github.com/Shopify/ruby/pull/397) --- yjit/src/asm/arm64/inst/logical_imm.rs | 16 +++++++++++++ yjit/src/asm/arm64/inst/logical_reg.rs | 16 +++++++++++++ yjit/src/asm/arm64/mod.rs | 32 ++++++++++++++++++++++++++ yjit/src/backend/arm64/mod.rs | 15 +++++++++++- yjit/src/backend/ir.rs | 5 ++++ yjit/src/backend/x86_64/mod.rs | 6 ++++- 6 files changed, 88 insertions(+), 2 deletions(-) diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs index 13865697f6cd42..73eec8b37c305f 100644 --- a/yjit/src/asm/arm64/inst/logical_imm.rs +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -8,6 +8,9 @@ enum Opc { /// The ORR operation. Orr = 0b01, + /// The EOR operation. + Eor = 0b10, + /// The ANDS operation. Ands = 0b11 } @@ -52,6 +55,12 @@ impl LogicalImm { Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() } } + /// EOR (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--immediate---Bitwise-Exclusive-OR--immediate-- + pub fn eor(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Eor, sf: num_bits.into() } + } + /// MOV (bitmask immediate) /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en pub fn mov(rd: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { @@ -115,6 +124,13 @@ mod tests { assert_eq!(0xf2400820, result); } + #[test] + fn test_eor() { + let inst = LogicalImm::eor(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xd2400820, result); + } + #[test] fn test_mov() { let inst = LogicalImm::mov(0, 0x5555555555555555.try_into().unwrap(), 64); diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs index 5d7954c587a4c0..83230ac5b2ba62 100644 --- a/yjit/src/asm/arm64/inst/logical_reg.rs +++ b/yjit/src/asm/arm64/inst/logical_reg.rs @@ -25,6 +25,9 @@ enum Opc { /// The ORR operation. Orr = 0b01, + /// The EOR operation. + Eor = 0b10, + /// The ANDS operation. Ands = 0b11 } @@ -78,6 +81,12 @@ impl LogicalReg { Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } } + /// EOR (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--shifted-register---Bitwise-Exclusive-OR--shifted-register-- + pub fn eor(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Eor, sf: num_bits.into() } + } + /// MOV (register) /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en pub fn mov(rd: u8, rm: u8, num_bits: u8) -> Self { @@ -156,6 +165,13 @@ mod tests { assert_eq!(0xea020020, result); } + #[test] + fn test_eor() { + let inst = LogicalReg::eor(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xca020020, result); + } + #[test] fn test_mov() { let inst = LogicalReg::mov(0, 1, 64); diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 93b44dba4b5bc4..e5ba2f81ead4ab 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -309,6 +309,28 @@ pub fn csel(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd, cond: u8) cb.write_bytes(&bytes); } +/// EOR - perform a bitwise XOR of rn and rm, put the result in rd, don't update flags +pub fn eor(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::eor(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::eor(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to eor instruction."), + }; + + cb.write_bytes(&bytes); +} + /// LDADDAL - atomic add with acquire and release semantics pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rs, rt, rn) { @@ -1023,6 +1045,16 @@ mod tests { check_bytes("6a018c9a", |cb| csel(cb, X10, X11, X12, Condition::EQ)); } + #[test] + fn test_eor_register() { + check_bytes("6a010cca", |cb| eor(cb, X10, X11, X12)); + } + + #[test] + fn test_eor_immediate() { + check_bytes("6a0940d2", |cb| eor(cb, X10, X11, A64Opnd::new_uimm(7))); + } + #[test] fn test_ldaddal() { check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 440c4368c50d79..2cd893923ac8df 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -216,7 +216,7 @@ impl Assembler } } }, - Op::And | Op::Or => { + Op::And | Op::Or | Op::Xor => { match (opnds[0], opnds[1]) { (Opnd::Reg(_), Opnd::Reg(_)) => { asm.push_insn(insn.op, vec![opnds[0], opnds[1]], insn.target, insn.text, insn.pos_marker); @@ -615,6 +615,9 @@ impl Assembler Op::Or => { orr(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); }, + Op::Xor => { + eor(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + }, Op::Not => { mvn(cb, insn.out.into(), insn.opnds[0].into()); }, @@ -1084,6 +1087,16 @@ mod tests { assert_eq!(8, cb.get_write_pos()); } + #[test] + fn test_emit_xor() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.xor(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + + asm.compile_with_num_regs(&mut cb, 1); + } + #[test] #[cfg(feature = "disasm")] fn test_simple_disasm() -> std::result::Result<(), capstone::Error> { diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 2dfb859fe9d548..ef8cd5e8728d4c 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -63,6 +63,10 @@ pub enum Op // binary OR operation. Or, + // This is the same as the OP_ADD instruction, except that it performs the + // binary XOR operation. + Xor, + // Perform the NOT operation on an individual operand, and return the result // as a new operand. This operand can then be used as the operand on another // instruction. @@ -992,6 +996,7 @@ def_push_2_opnd!(add, Op::Add); def_push_2_opnd!(sub, Op::Sub); def_push_2_opnd!(and, Op::And); def_push_2_opnd!(or, Op::Or); +def_push_2_opnd!(xor, Op::Xor); def_push_1_opnd!(not, Op::Not); def_push_2_opnd!(lshift, Op::LShift); def_push_2_opnd!(rshift, Op::RShift); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 417474ee683838..d474c9fe59a71b 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -139,7 +139,7 @@ impl Assembler }).collect(); match insn.op { - Op::Add | Op::Sub | Op::And | Op::Cmp | Op::Or | Op::Test => { + Op::Add | Op::Sub | Op::And | Op::Cmp | Op::Or | Op::Test | Op::Xor => { let (opnd0, opnd1) = match (insn.opnds[0], insn.opnds[1]) { (Opnd::Mem(_), Opnd::Mem(_)) => { (asm.load(opnds[0]), asm.load(opnds[1])) @@ -328,6 +328,10 @@ impl Assembler or(cb, insn.opnds[0].into(), insn.opnds[1].into()); }, + Op::Xor => { + xor(cb, insn.opnds[0].into(), insn.opnds[1].into()); + }, + Op::Not => { not(cb, insn.opnds[0].into()) }, From 471de2ab78ef75c7f22f80c77460e16e2356e190 Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Thu, 11 Aug 2022 17:11:23 +0100 Subject: [PATCH 476/546] Enable skipdata on Capstone to allow embedded data without early stop to disasm (https://github.com/Shopify/ruby/pull/398) --- yjit/src/disasm.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 62b270bce9cc94..015c0c25ef5d3c 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -71,7 +71,7 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> St use capstone::prelude::*; #[cfg(target_arch = "x86_64")] - let cs = Capstone::new() + let mut cs = Capstone::new() .x86() .mode(arch::x86::ArchMode::Mode64) .syntax(arch::x86::ArchSyntax::Intel) @@ -79,11 +79,13 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> St .unwrap(); #[cfg(target_arch = "aarch64")] - let cs = Capstone::new() + let mut cs = Capstone::new() .arm64() .mode(arch::arm64::ArchMode::Arm) + .detail(true) .build() .unwrap(); + cs.set_skipdata(true); out.push_str(&format!("NUM BLOCK VERSIONS: {}\n", block_list.len())); out.push_str(&format!( From dca5e74aa17bd315ff83cffbdbba3f7c641174ac Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 11 Aug 2022 09:12:44 -0700 Subject: [PATCH 477/546] Fix test_rubyoptions.rb for arm64 (https://github.com/Shopify/ruby/pull/396) --- test/lib/jit_support.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lib/jit_support.rb b/test/lib/jit_support.rb index e607df4cabbf9d..ccd3b83be76b5f 100644 --- a/test/lib/jit_support.rb +++ b/test/lib/jit_support.rb @@ -64,7 +64,7 @@ def supported? def yjit_supported? # e.g. x86_64-linux, x64-mswin64_140, x64-mingw32, x64-mingw-ucrt - RUBY_PLATFORM.match?(/^(x86_64|x64)-/) + RUBY_PLATFORM.match?(/^(x86_64|x64|arm64)-/) end # AppVeyor's Visual Studio 2013 / 2015 are known to spuriously generate broken pch / pdb, like: From cb15886e612e17685e606f8b4f04301026f18b46 Mon Sep 17 00:00:00 2001 From: Zack Deveau Date: Thu, 11 Aug 2022 12:49:35 -0400 Subject: [PATCH 478/546] Port opt_getinlinecache to the new backend (https://github.com/Shopify/ruby/pull/399) --- yjit/src/codegen.rs | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 047fb89cfbda42..def3d38cbf0e53 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5475,11 +5475,10 @@ fn gen_setclassvariable( KeepCompiling } -/* fn gen_opt_getinlinecache( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0); @@ -5498,25 +5497,35 @@ fn gen_opt_getinlinecache( // to invalidate this block from yjit_constant_ic_update(). jit_ensure_block_entry_exit(jit, ocb); + let inline_cache = Opnd::const_ptr(ic as *const u8); if !unsafe { (*ice).ic_cref }.is_null() { // Cache is keyed on a certain lexical scope. Use the interpreter's cache. let side_exit = get_side_exit(jit, ocb, ctx); // Call function to verify the cache. It doesn't allocate or call methods. - mov(cb, C_ARG_REGS[0], const_ptr_opnd(ic as *const u8)); - mov(cb, C_ARG_REGS[1], mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); - call_ptr(cb, REG0, rb_vm_ic_hit_p as *const u8); + let ret_val = asm.ccall( + rb_vm_ic_hit_p as *const u8, + vec![inline_cache, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)] + ); // Check the result. _Bool is one byte in SysV. - test(cb, AL, AL); - jz_ptr(cb, counted_exit!(ocb, side_exit, opt_getinlinecache_miss)); + asm.test(ret_val, ret_val); + asm.jz(counted_exit!(ocb, side_exit, opt_getinlinecache_miss).into()); + + let inline_cache_entry = Opnd::mem( + 64, + inline_cache, + RUBY_OFFSET_IC_ENTRY + ); + let inline_cache_entry_val = Opnd::mem( + 64, + inline_cache_entry, + RUBY_OFFSET_ICE_VALUE + ); // Push ic->entry->value - mov(cb, REG0, const_ptr_opnd(ic as *mut u8)); - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_IC_ENTRY)); let stack_top = ctx.stack_push(Type::Unknown); - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_ICE_VALUE)); - mov(cb, stack_top, REG0); + asm.mov(stack_top, inline_cache_entry_val); } else { // Optimize for single ractor mode. // FIXME: This leaks when st_insert raises NoMemoryError @@ -5528,7 +5537,7 @@ fn gen_opt_getinlinecache( // constants referenced within the current block. assume_stable_constant_names(jit, ocb); - jit_putobject(jit, ctx, cb, unsafe { (*ice).value }); + jit_putobject(jit, ctx, asm, unsafe { (*ice).value }); } // Jump over the code for filling the cache @@ -5540,11 +5549,11 @@ fn gen_opt_getinlinecache( iseq: jit.iseq, idx: jump_idx, }, - cb, + asm, ); EndBlock } -*/ + // Push the explicit block parameter onto the temporary stack. Part of the // interpreter's scheme for avoiding Proc allocations when delegating @@ -5873,7 +5882,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_size => Some(gen_opt_size), YARVINSN_opt_length => Some(gen_opt_length), YARVINSN_opt_regexpmatch2 => Some(gen_opt_regexpmatch2), - //YARVINSN_opt_getinlinecache => Some(gen_opt_getinlinecache), + YARVINSN_opt_getinlinecache => Some(gen_opt_getinlinecache), YARVINSN_invokebuiltin => Some(gen_invokebuiltin), YARVINSN_opt_invokebuiltin_delegate => Some(gen_opt_invokebuiltin_delegate), YARVINSN_opt_invokebuiltin_delegate_leave => Some(gen_opt_invokebuiltin_delegate), From c022a605401ccbc591640720a28c616cbaa19931 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 11 Aug 2022 13:26:30 -0400 Subject: [PATCH 479/546] Fix bugs in gen_opt_getinlinecache --- yjit/src/codegen.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index def3d38cbf0e53..08806f84bfd76a 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5497,11 +5497,12 @@ fn gen_opt_getinlinecache( // to invalidate this block from yjit_constant_ic_update(). jit_ensure_block_entry_exit(jit, ocb); - let inline_cache = Opnd::const_ptr(ic as *const u8); if !unsafe { (*ice).ic_cref }.is_null() { // Cache is keyed on a certain lexical scope. Use the interpreter's cache. let side_exit = get_side_exit(jit, ocb, ctx); + let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); + // Call function to verify the cache. It doesn't allocate or call methods. let ret_val = asm.ccall( rb_vm_ic_hit_p as *const u8, @@ -5512,20 +5513,23 @@ fn gen_opt_getinlinecache( asm.test(ret_val, ret_val); asm.jz(counted_exit!(ocb, side_exit, opt_getinlinecache_miss).into()); - let inline_cache_entry = Opnd::mem( + let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); + + let ic_entry = asm.load(Opnd::mem( 64, inline_cache, RUBY_OFFSET_IC_ENTRY - ); - let inline_cache_entry_val = Opnd::mem( + )); + + let ic_entry_val = asm.load(Opnd::mem( 64, - inline_cache_entry, + ic_entry, RUBY_OFFSET_ICE_VALUE - ); + )); // Push ic->entry->value let stack_top = ctx.stack_push(Type::Unknown); - asm.mov(stack_top, inline_cache_entry_val); + asm.store(stack_top, ic_entry_val); } else { // Optimize for single ractor mode. // FIXME: This leaks when st_insert raises NoMemoryError @@ -5554,7 +5558,6 @@ fn gen_opt_getinlinecache( EndBlock } - // Push the explicit block parameter onto the temporary stack. Part of the // interpreter's scheme for avoiding Proc allocations when delegating // explicit block parameters. From be730cdae5ac54a5ffd167983c3dffe50a055901 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 11 Aug 2022 15:46:13 -0400 Subject: [PATCH 480/546] AArch64 Ruby immediates (https://github.com/Shopify/ruby/pull/400) --- yjit/src/backend/arm64/mod.rs | 66 +++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 2cd893923ac8df..64136bfdd221da 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -187,16 +187,25 @@ impl Assembler let mut iterator = self.into_draining_iter(); while let Some((index, insn)) = iterator.next_mapped() { - let opnds = match insn.op { - Op::Load => insn.opnds, - _ => insn.opnds.into_iter().map(|opnd| { - if let Opnd::Value(_) = opnd { - asm.load(opnd) - } else { - opnd - } - }).collect() - }; + // Here we're going to map the operands of the instruction to load + // any Opnd::Value operands into registers if they are heap objects + // such that only the Op::Load instruction needs to handle that + // case. If the values aren't heap objects then we'll treat them as + // if they were just unsigned integer. + let opnds: Vec = insn.opnds.into_iter().map(|opnd| { + match opnd { + Opnd::Value(value) => { + if value.special_const_p() { + Opnd::UImm(value.as_u64()) + } else if insn.op == Op::Load { + opnd + } else { + asm.load(opnd) + } + }, + _ => opnd + } + }).collect(); match insn.op { Op::Add => { @@ -652,6 +661,11 @@ impl Assembler ldur(cb, insn.out.into(), insn.opnds[0].into()); }, Opnd::Value(value) => { + // We dont need to check if it's a special const + // here because we only allow these operands to hit + // this point if they're not a special const. + assert!(!value.special_const_p()); + // This assumes only load instructions can contain // references to GC'd Value operands. If the value // being loaded is a heap object, we'll report that @@ -660,10 +674,8 @@ impl Assembler b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4)); cb.write_bytes(&value.as_u64().to_le_bytes()); - if !value.special_const_p() { - let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - gc_offsets.push(ptr_offset); - } + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + gc_offsets.push(ptr_offset); }, Opnd::None => { unreachable!("Attempted to load from None operand"); @@ -985,6 +997,32 @@ mod tests { assert_eq!(16, cb.get_write_pos()); } + #[test] + fn test_emit_load_value_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::Value(Qnil)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that only two instructions were written since the value is an + // immediate. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_value_non_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::Value(VALUE(0xCAFECAFECAFE0000))); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that five instructions were written since the value is not an + // immediate and needs to be loaded into a register. + assert_eq!(20, cb.get_write_pos()); + } + #[test] fn test_emit_or() { let (mut asm, mut cb) = setup_asm(); From ff3f1d15d2244dcafe5d7a748922e7c8b6b0f3bd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 12 Aug 2022 11:59:53 -0400 Subject: [PATCH 481/546] Optimize bitmask immediates (https://github.com/Shopify/ruby/pull/403) --- yjit/src/asm/arm64/arg/bitmask_imm.rs | 95 +++++---------------------- 1 file changed, 18 insertions(+), 77 deletions(-) diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs index b3a821fe94a2e7..54a6e6c34416c3 100644 --- a/yjit/src/asm/arm64/arg/bitmask_imm.rs +++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs @@ -39,94 +39,35 @@ pub struct BitmaskImmediate { impl TryFrom for BitmaskImmediate { type Error = (); - /// Attempt to convert a u64 into a BitmaskImm. + /// Attempt to convert a u64 into a BitmaskImmediate. + /// + /// The implementation here is largely based on this blog post: + /// https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ fn try_from(value: u64) -> Result { - // 0 is not encodable as a bitmask immediate. Immediately return here so - // that we don't have any issues with underflow. if value == 0 || value == u64::MAX { return Err(()); } - /// Is this number's binary representation all 1s? - fn is_mask(imm: u64) -> bool { - if imm == u64::MAX { true } else { ((imm + 1) & imm) == 0 } + fn rotate_right(value: u64, rotations: u32) -> u64 { + (value >> (rotations & 0x3F)) | + (value << (rotations.wrapping_neg() & 0x3F)) } - /// Is this number's binary representation one or more 1s followed by - /// one or more 0s? - fn is_shifted_mask(imm: u64) -> bool { - is_mask((imm - 1) | imm) - } - - let mut imm = value; - let mut size = 64; - - // First, determine the element size. - loop { - size >>= 1; - let mask = (1 << size) - 1; - - if (imm & mask) != ((imm >> size) & mask) { - size <<= 1; - break; - } - - if size <= 2 { - break; - } - } + let rotations = (value & (value + 1)).trailing_zeros(); + let normalized = rotate_right(value, rotations & 0x3F); - // Second, determine the rotation to make the pattern be aligned such - // that all of the least significant bits are 1. - let trailing_ones: u32; - let left_rotations: u32; + let zeroes = normalized.leading_zeros(); + let ones = (!normalized).trailing_zeros(); + let size = zeroes + ones; - let mask = u64::MAX >> (64 - size); - imm &= mask; - - if is_shifted_mask(imm) { - left_rotations = imm.trailing_zeros(); - assert!(left_rotations < 64); - trailing_ones = (imm >> left_rotations).trailing_ones(); - } else { - imm |= !mask; - if !is_shifted_mask(!imm) { - return Err(()); - } - - let leading_ones = imm.leading_ones(); - left_rotations = 64 - leading_ones; - trailing_ones = leading_ones + imm.trailing_ones() - (64 - size); + if rotate_right(value, size & 0x3F) != value { + return Err(()); } - // immr is the number of right rotations it takes to get from the - // matching unrotated pattern to the target value. - let immr = (size - left_rotations) & (size - 1); - assert!(size > left_rotations); - - // imms is encoded as the size of the pattern, a 0, and then one less - // than the number of sequential 1s. The table below shows how this is - // encoded. (Note that the way it works out, it's impossible for every x - // in a row to be 1 at the same time). - // +-------------+--------------+--------------+ - // | imms | element size | number of 1s | - // +-------------+--------------+--------------+ - // | 1 1 1 1 0 x | 2 bits | 1 | - // | 1 1 1 0 x x | 4 bits | 1-3 | - // | 1 1 0 x x x | 8 bits | 1-7 | - // | 1 0 x x x x | 16 bits | 1-15 | - // | 0 x x x x x | 32 bits | 1-31 | - // | x x x x x x | 64 bits | 1-63 | - // +-------------+--------------+--------------+ - let imms = (!(size - 1) << 1) | (trailing_ones - 1); - - // n is 1 if the element size is 64-bits, and 0 otherwise. - let n = ((imms >> 6) & 1) ^ 1; - Ok(BitmaskImmediate { - n: n as u8, - imms: (imms & 0x3f) as u8, - immr: (immr & 0x3f) as u8 + n: ((size >> 6) & 1) as u8, + imms: (((size << 1).wrapping_neg() | (ones - 1)) & 0x3F) as u8, + immr: ((rotations.wrapping_neg() & (size - 1)) & 0x3F) as u8 }) } } @@ -135,7 +76,7 @@ impl From for u32 { /// Encode a bitmask immediate into a 32-bit value. fn from(bitmask: BitmaskImmediate) -> Self { 0 - | (((bitmask.n as u32) & 1) << 12) + | ((bitmask.n as u32) << 12) | ((bitmask.immr as u32) << 6) | (bitmask.imms as u32) } From 2f9df466546263028ece7757cb6f813800d2d6b5 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Mon, 15 Aug 2022 11:25:49 -0400 Subject: [PATCH 482/546] Use bindgen for old manual extern declarations (https://github.com/Shopify/ruby/pull/404) We have a large extern block in cruby.rs leftover from the port. We can use bindgen for it now and reserve the manual declaration for just a handful of vm_insnhelper.c functions. Fixup a few minor discrepencies bindgen found between the C declaration and the manual declaration. Mostly missing `const` on the C side. --- yjit.c | 60 ++++----- yjit/bindgen/src/main.rs | 56 ++++++++ yjit/src/codegen.rs | 10 +- yjit/src/cruby.rs | 231 +++++++++------------------------ yjit/src/cruby_bindings.inc.rs | 187 ++++++++++++++++++++++++++ 5 files changed, 337 insertions(+), 207 deletions(-) diff --git a/yjit.c b/yjit.c index 584f909473f126..70b98d4844a42d 100644 --- a/yjit.c +++ b/yjit.c @@ -472,61 +472,61 @@ rb_get_cikw_keywords_idx(const struct rb_callinfo_kwarg *cikw, int idx) } rb_method_visibility_t -rb_METHOD_ENTRY_VISI(rb_callable_method_entry_t *me) +rb_METHOD_ENTRY_VISI(const rb_callable_method_entry_t *me) { return METHOD_ENTRY_VISI(me); } rb_method_type_t -rb_get_cme_def_type(rb_callable_method_entry_t *cme) +rb_get_cme_def_type(const rb_callable_method_entry_t *cme) { return cme->def->type; } ID -rb_get_cme_def_body_attr_id(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_attr_id(const rb_callable_method_entry_t *cme) { return cme->def->body.attr.id; } enum method_optimized_type -rb_get_cme_def_body_optimized_type(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_optimized_type(const rb_callable_method_entry_t *cme) { return cme->def->body.optimized.type; } unsigned int -rb_get_cme_def_body_optimized_index(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_optimized_index(const rb_callable_method_entry_t *cme) { return cme->def->body.optimized.index; } rb_method_cfunc_t * -rb_get_cme_def_body_cfunc(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_cfunc(const rb_callable_method_entry_t *cme) { return UNALIGNED_MEMBER_PTR(cme->def, body.cfunc); } uintptr_t -rb_get_def_method_serial(rb_method_definition_t *def) +rb_get_def_method_serial(const rb_method_definition_t *def) { return def->method_serial; } ID -rb_get_def_original_id(rb_method_definition_t *def) +rb_get_def_original_id(const rb_method_definition_t *def) { return def->original_id; } int -rb_get_mct_argc(rb_method_cfunc_t *mct) +rb_get_mct_argc(const rb_method_cfunc_t *mct) { return mct->argc; } void * -rb_get_mct_func(rb_method_cfunc_t *mct) +rb_get_mct_func(const rb_method_cfunc_t *mct) { return (void*)mct->func; // this field is defined as type VALUE (*func)(ANYARGS) } @@ -537,104 +537,104 @@ rb_get_def_iseq_ptr(rb_method_definition_t *def) return def_iseq_ptr(def); } -rb_iseq_t * -rb_get_iseq_body_local_iseq(rb_iseq_t *iseq) +const rb_iseq_t * +rb_get_iseq_body_local_iseq(const rb_iseq_t *iseq) { return iseq->body->local_iseq; } unsigned int -rb_get_iseq_body_local_table_size(rb_iseq_t *iseq) +rb_get_iseq_body_local_table_size(const rb_iseq_t *iseq) { return iseq->body->local_table_size; } VALUE * -rb_get_iseq_body_iseq_encoded(rb_iseq_t *iseq) +rb_get_iseq_body_iseq_encoded(const rb_iseq_t *iseq) { return iseq->body->iseq_encoded; } bool -rb_get_iseq_body_builtin_inline_p(rb_iseq_t *iseq) +rb_get_iseq_body_builtin_inline_p(const rb_iseq_t *iseq) { return iseq->body->builtin_inline_p; } unsigned -rb_get_iseq_body_stack_max(rb_iseq_t *iseq) +rb_get_iseq_body_stack_max(const rb_iseq_t *iseq) { return iseq->body->stack_max; } bool -rb_get_iseq_flags_has_opt(rb_iseq_t *iseq) +rb_get_iseq_flags_has_opt(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_opt; } bool -rb_get_iseq_flags_has_kw(rb_iseq_t *iseq) +rb_get_iseq_flags_has_kw(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_kw; } bool -rb_get_iseq_flags_has_post(rb_iseq_t *iseq) +rb_get_iseq_flags_has_post(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_post; } bool -rb_get_iseq_flags_has_kwrest(rb_iseq_t *iseq) +rb_get_iseq_flags_has_kwrest(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_kwrest; } bool -rb_get_iseq_flags_has_rest(rb_iseq_t *iseq) +rb_get_iseq_flags_has_rest(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_rest; } bool -rb_get_iseq_flags_has_block(rb_iseq_t *iseq) +rb_get_iseq_flags_has_block(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_block; } bool -rb_get_iseq_flags_has_accepts_no_kwarg(rb_iseq_t *iseq) +rb_get_iseq_flags_has_accepts_no_kwarg(const rb_iseq_t *iseq) { return iseq->body->param.flags.accepts_no_kwarg; } const rb_seq_param_keyword_struct * -rb_get_iseq_body_param_keyword(rb_iseq_t *iseq) +rb_get_iseq_body_param_keyword(const rb_iseq_t *iseq) { return iseq->body->param.keyword; } unsigned -rb_get_iseq_body_param_size(rb_iseq_t *iseq) +rb_get_iseq_body_param_size(const rb_iseq_t *iseq) { return iseq->body->param.size; } int -rb_get_iseq_body_param_lead_num(rb_iseq_t *iseq) +rb_get_iseq_body_param_lead_num(const rb_iseq_t *iseq) { return iseq->body->param.lead_num; } int -rb_get_iseq_body_param_opt_num(rb_iseq_t *iseq) +rb_get_iseq_body_param_opt_num(const rb_iseq_t *iseq) { return iseq->body->param.opt_num; } const VALUE * -rb_get_iseq_body_param_opt_table(rb_iseq_t *iseq) +rb_get_iseq_body_param_opt_table(const rb_iseq_t *iseq) { return iseq->body->param.opt_table; } @@ -669,7 +669,7 @@ rb_yjit_str_simple_append(VALUE str1, VALUE str2) } struct rb_control_frame_struct * -rb_get_ec_cfp(rb_execution_context_t *ec) +rb_get_ec_cfp(const rb_execution_context_t *ec) { return ec->cfp; } @@ -803,7 +803,7 @@ rb_RSTRUCT_SET(VALUE st, int k, VALUE v) } const struct rb_callinfo * -rb_get_call_data_ci(struct rb_call_data *cd) +rb_get_call_data_ci(const struct rb_call_data *cd) { return cd->ci; } diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index f8d87aeec8c48c..a4c0b9850444b9 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -311,6 +311,62 @@ fn main() { // From include/ruby/debug.h .allowlist_function("rb_profile_frames") + // Functions used for code generation + .allowlist_function("rb_insn_name") + .allowlist_function("rb_insn_len") + .allowlist_function("rb_yarv_class_of") + .allowlist_function("rb_get_ec_cfp") + .allowlist_function("rb_get_cfp_pc") + .allowlist_function("rb_get_cfp_sp") + .allowlist_function("rb_get_cfp_self") + .allowlist_function("rb_get_cfp_ep") + .allowlist_function("rb_get_cfp_ep_level") + .allowlist_function("rb_get_cme_def_type") + .allowlist_function("rb_get_cme_def_body_attr_id") + .allowlist_function("rb_get_cme_def_body_optimized_type") + .allowlist_function("rb_get_cme_def_body_optimized_index") + .allowlist_function("rb_get_cme_def_body_cfunc") + .allowlist_function("rb_get_def_method_serial") + .allowlist_function("rb_get_def_original_id") + .allowlist_function("rb_get_mct_argc") + .allowlist_function("rb_get_mct_func") + .allowlist_function("rb_get_def_iseq_ptr") + .allowlist_function("rb_iseq_encoded_size") + .allowlist_function("rb_get_iseq_body_local_iseq") + .allowlist_function("rb_get_iseq_body_iseq_encoded") + .allowlist_function("rb_get_iseq_body_stack_max") + .allowlist_function("rb_get_iseq_flags_has_opt") + .allowlist_function("rb_get_iseq_flags_has_kw") + .allowlist_function("rb_get_iseq_flags_has_rest") + .allowlist_function("rb_get_iseq_flags_has_post") + .allowlist_function("rb_get_iseq_flags_has_kwrest") + .allowlist_function("rb_get_iseq_flags_has_block") + .allowlist_function("rb_get_iseq_flags_has_accepts_no_kwarg") + .allowlist_function("rb_get_iseq_body_local_table_size") + .allowlist_function("rb_get_iseq_body_param_keyword") + .allowlist_function("rb_get_iseq_body_param_size") + .allowlist_function("rb_get_iseq_body_param_lead_num") + .allowlist_function("rb_get_iseq_body_param_opt_num") + .allowlist_function("rb_get_iseq_body_param_opt_table") + .allowlist_function("rb_get_cikw_keyword_len") + .allowlist_function("rb_get_cikw_keywords_idx") + .allowlist_function("rb_get_call_data_ci") + .allowlist_function("rb_yarv_str_eql_internal") + .allowlist_function("rb_yarv_ary_entry_internal") + .allowlist_function("rb_yarv_fix_mod_fix") + .allowlist_function("rb_FL_TEST") + .allowlist_function("rb_FL_TEST_RAW") + .allowlist_function("rb_RB_TYPE_P") + .allowlist_function("rb_BASIC_OP_UNREDEFINED_P") + .allowlist_function("rb_RSTRUCT_LEN") + .allowlist_function("rb_RSTRUCT_SET") + .allowlist_function("rb_vm_ci_argc") + .allowlist_function("rb_vm_ci_mid") + .allowlist_function("rb_vm_ci_flag") + .allowlist_function("rb_vm_ci_kwarg") + .allowlist_function("rb_METHOD_ENTRY_VISI") + .allowlist_function("rb_RCLASS_ORIGIN") + // We define VALUE manually, don't import it .blocklist_type("VALUE") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 08806f84bfd76a..3428466297c736 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -4058,7 +4058,7 @@ fn gen_send_cfunc( // cfunc comes from compile-time cme->def, which we assume to be stable. // Invalidation logic is in yjit_method_lookup_change() asm.comment("call C function"); - let ret = asm.ccall(unsafe { get_mct_func(cfunc) }, args); + let ret = asm.ccall(unsafe { get_mct_func(cfunc) }.cast(), args); // Record code position for TracePoint patching. See full_cfunc_return(). record_global_inval_patch(asm, CodegenGlobals::get_outline_full_cfunc_return_pos()); @@ -4724,7 +4724,7 @@ fn gen_send_general( // see vm_call_method(). let ci = unsafe { get_call_data_ci(cd) }; // info about the call site - let argc = unsafe { vm_ci_argc(ci) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); let mid = unsafe { vm_ci_mid(ci) }; let flags = unsafe { vm_ci_flag(ci) }; @@ -5028,7 +5028,7 @@ fn gen_invokesuper( unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) }; let ci = unsafe { get_call_data_ci(cd) }; - let argc = unsafe { vm_ci_argc(ci) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); let ci_flags = unsafe { vm_ci_flag(ci) }; @@ -5960,7 +5960,7 @@ pub struct CodegenGlobals { inline_frozen_bytes: usize, // Methods for generating code for hardcoded (usually C) methods - method_codegen_table: HashMap, + method_codegen_table: HashMap, } /// For implementing global code invalidation. A position in the inline @@ -6179,7 +6179,7 @@ impl CodegenGlobals { CodegenGlobals::get_instance().outline_full_cfunc_return_pos } - pub fn look_up_codegen_method(method_serial: u64) -> Option { + pub fn look_up_codegen_method(method_serial: usize) -> Option { let table = &CodegenGlobals::get_instance().method_codegen_table; let option_ref = table.get(&method_serial); diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 1c31b8c1494c69..2cf5134e8199c9 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -106,161 +106,11 @@ pub use autogened::*; // TODO: For #defines that affect memory layout, we need to check for them // on build and fail if they're wrong. e.g. USE_FLONUM *must* be true. -// TODO: -// Temporary, these external bindings will likely be auto-generated -// and textually included in this file +// These are functions we expose from vm_insnhelper.c, not in any header. +// Parsing it would result in a lot of duplicate definitions. +// Use bindgen for functions that are defined in headers or in yjit.c. #[cfg_attr(test, allow(unused))] // We don't link against C code when testing extern "C" { - #[link_name = "rb_insn_name"] - pub fn raw_insn_name(insn: VALUE) -> *const c_char; - - #[link_name = "rb_insn_len"] - pub fn raw_insn_len(v: VALUE) -> c_int; - - #[link_name = "rb_yarv_class_of"] - pub fn CLASS_OF(v: VALUE) -> VALUE; - - #[link_name = "rb_get_ec_cfp"] - pub fn get_ec_cfp(ec: EcPtr) -> CfpPtr; - - #[link_name = "rb_get_cfp_pc"] - pub fn get_cfp_pc(cfp: CfpPtr) -> *mut VALUE; - - #[link_name = "rb_get_cfp_sp"] - pub fn get_cfp_sp(cfp: CfpPtr) -> *mut VALUE; - - #[link_name = "rb_get_cfp_self"] - pub fn get_cfp_self(cfp: CfpPtr) -> VALUE; - - #[link_name = "rb_get_cfp_ep"] - pub fn get_cfp_ep(cfp: CfpPtr) -> *mut VALUE; - - #[link_name = "rb_get_cfp_ep_level"] - pub fn get_cfp_ep_level(cfp: CfpPtr, lv: u32) -> *const VALUE; - - #[link_name = "rb_get_cme_def_type"] - pub fn get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t; - - #[link_name = "rb_get_cme_def_body_attr_id"] - pub fn get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID; - - #[link_name = "rb_get_cme_def_body_optimized_type"] - pub fn get_cme_def_body_optimized_type( - cme: *const rb_callable_method_entry_t, - ) -> method_optimized_type; - - #[link_name = "rb_get_cme_def_body_optimized_index"] - pub fn get_cme_def_body_optimized_index(cme: *const rb_callable_method_entry_t) -> c_uint; - - #[link_name = "rb_get_cme_def_body_cfunc"] - pub fn get_cme_def_body_cfunc(cme: *const rb_callable_method_entry_t) - -> *mut rb_method_cfunc_t; - - #[link_name = "rb_get_def_method_serial"] - /// While this returns a uintptr_t in C, we always use it as a Rust u64 - pub fn get_def_method_serial(def: *const rb_method_definition_t) -> u64; - - #[link_name = "rb_get_def_original_id"] - pub fn get_def_original_id(def: *const rb_method_definition_t) -> ID; - - #[link_name = "rb_get_mct_argc"] - pub fn get_mct_argc(mct: *const rb_method_cfunc_t) -> c_int; - - #[link_name = "rb_get_mct_func"] - pub fn get_mct_func(mct: *const rb_method_cfunc_t) -> *const u8; - - #[link_name = "rb_get_def_iseq_ptr"] - pub fn get_def_iseq_ptr(def: *const rb_method_definition_t) -> IseqPtr; - - #[link_name = "rb_iseq_encoded_size"] - pub fn get_iseq_encoded_size(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_body_local_iseq"] - pub fn get_iseq_body_local_iseq(iseq: IseqPtr) -> IseqPtr; - - #[link_name = "rb_get_iseq_body_iseq_encoded"] - pub fn get_iseq_body_iseq_encoded(iseq: IseqPtr) -> *mut VALUE; - - #[link_name = "rb_get_iseq_body_stack_max"] - pub fn get_iseq_body_stack_max(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_flags_has_opt"] - pub fn get_iseq_flags_has_opt(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_kw"] - pub fn get_iseq_flags_has_kw(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_rest"] - pub fn get_iseq_flags_has_rest(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_post"] - pub fn get_iseq_flags_has_post(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_kwrest"] - pub fn get_iseq_flags_has_kwrest(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_block"] - pub fn get_iseq_flags_has_block(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_accepts_no_kwarg"] - pub fn get_iseq_flags_has_accepts_no_kwarg(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_body_local_table_size"] - pub fn get_iseq_body_local_table_size(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_body_param_keyword"] - pub fn get_iseq_body_param_keyword(iseq: IseqPtr) -> *const rb_seq_param_keyword_struct; - - #[link_name = "rb_get_iseq_body_param_size"] - pub fn get_iseq_body_param_size(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_body_param_lead_num"] - pub fn get_iseq_body_param_lead_num(iseq: IseqPtr) -> c_int; - - #[link_name = "rb_get_iseq_body_param_opt_num"] - pub fn get_iseq_body_param_opt_num(iseq: IseqPtr) -> c_int; - - #[link_name = "rb_get_iseq_body_param_opt_table"] - pub fn get_iseq_body_param_opt_table(iseq: IseqPtr) -> *const VALUE; - - #[link_name = "rb_get_cikw_keyword_len"] - pub fn get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> c_int; - - #[link_name = "rb_get_cikw_keywords_idx"] - pub fn get_cikw_keywords_idx(cikw: *const rb_callinfo_kwarg, idx: c_int) -> VALUE; - - #[link_name = "rb_get_call_data_ci"] - pub fn get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; - - #[link_name = "rb_yarv_str_eql_internal"] - pub fn rb_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; - - #[link_name = "rb_yarv_ary_entry_internal"] - pub fn rb_ary_entry_internal(ary: VALUE, offset: c_long) -> VALUE; - - #[link_name = "rb_yarv_fix_mod_fix"] - pub fn rb_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; - - #[link_name = "rb_FL_TEST"] - pub fn FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; - - #[link_name = "rb_FL_TEST_RAW"] - pub fn FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; - - #[link_name = "rb_RB_TYPE_P"] - pub fn RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; - - #[link_name = "rb_BASIC_OP_UNREDEFINED_P"] - pub fn BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: RedefinitionFlag) -> bool; - - #[link_name = "rb_RSTRUCT_LEN"] - pub fn RSTRUCT_LEN(st: VALUE) -> c_long; - - #[link_name = "rb_RSTRUCT_SET"] - pub fn RSTRUCT_SET(st: VALUE, k: c_int, v: VALUE); - - // Ruby only defines these in vm_insnhelper.c, not in any header. - // Parsing it would result in a lot of duplicate definitions. pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; pub fn rb_vm_defined( ec: EcPtr, @@ -283,28 +133,65 @@ extern "C" { ic: ICVARC, ) -> VALUE; pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool; - - #[link_name = "rb_vm_ci_argc"] - pub fn vm_ci_argc(ci: *const rb_callinfo) -> c_int; - - #[link_name = "rb_vm_ci_mid"] - pub fn vm_ci_mid(ci: *const rb_callinfo) -> ID; - - #[link_name = "rb_vm_ci_flag"] - pub fn vm_ci_flag(ci: *const rb_callinfo) -> c_uint; - - #[link_name = "rb_vm_ci_kwarg"] - pub fn vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg; - - #[link_name = "rb_METHOD_ENTRY_VISI"] - pub fn METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t; - pub fn rb_str_bytesize(str: VALUE) -> VALUE; - - #[link_name = "rb_RCLASS_ORIGIN"] - pub fn RCLASS_ORIGIN(v: VALUE) -> VALUE; } +// Renames +pub use rb_insn_name as raw_insn_name; +pub use rb_insn_len as raw_insn_len; +pub use rb_yarv_class_of as CLASS_OF; +pub use rb_get_ec_cfp as get_ec_cfp; +pub use rb_get_cfp_pc as get_cfp_pc; +pub use rb_get_cfp_sp as get_cfp_sp; +pub use rb_get_cfp_self as get_cfp_self; +pub use rb_get_cfp_ep as get_cfp_ep; +pub use rb_get_cfp_ep_level as get_cfp_ep_level; +pub use rb_get_cme_def_type as get_cme_def_type; +pub use rb_get_cme_def_body_attr_id as get_cme_def_body_attr_id; +pub use rb_get_cme_def_body_optimized_type as get_cme_def_body_optimized_type; +pub use rb_get_cme_def_body_optimized_index as get_cme_def_body_optimized_index; +pub use rb_get_cme_def_body_cfunc as get_cme_def_body_cfunc; +pub use rb_get_def_method_serial as get_def_method_serial; +pub use rb_get_def_original_id as get_def_original_id; +pub use rb_get_mct_argc as get_mct_argc; +pub use rb_get_mct_func as get_mct_func; +pub use rb_get_def_iseq_ptr as get_def_iseq_ptr; +pub use rb_iseq_encoded_size as get_iseq_encoded_size; +pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq; +pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded; +pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max; +pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt; +pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw; +pub use rb_get_iseq_flags_has_rest as get_iseq_flags_has_rest; +pub use rb_get_iseq_flags_has_post as get_iseq_flags_has_post; +pub use rb_get_iseq_flags_has_kwrest as get_iseq_flags_has_kwrest; +pub use rb_get_iseq_flags_has_block as get_iseq_flags_has_block; +pub use rb_get_iseq_flags_has_accepts_no_kwarg as get_iseq_flags_has_accepts_no_kwarg; +pub use rb_get_iseq_body_local_table_size as get_iseq_body_local_table_size; +pub use rb_get_iseq_body_param_keyword as get_iseq_body_param_keyword; +pub use rb_get_iseq_body_param_size as get_iseq_body_param_size; +pub use rb_get_iseq_body_param_lead_num as get_iseq_body_param_lead_num; +pub use rb_get_iseq_body_param_opt_num as get_iseq_body_param_opt_num; +pub use rb_get_iseq_body_param_opt_table as get_iseq_body_param_opt_table; +pub use rb_get_cikw_keyword_len as get_cikw_keyword_len; +pub use rb_get_cikw_keywords_idx as get_cikw_keywords_idx; +pub use rb_get_call_data_ci as get_call_data_ci; +pub use rb_yarv_str_eql_internal as rb_str_eql_internal; +pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal; +pub use rb_yarv_fix_mod_fix as rb_fix_mod_fix; +pub use rb_FL_TEST as FL_TEST; +pub use rb_FL_TEST_RAW as FL_TEST_RAW; +pub use rb_RB_TYPE_P as RB_TYPE_P; +pub use rb_BASIC_OP_UNREDEFINED_P as BASIC_OP_UNREDEFINED_P; +pub use rb_RSTRUCT_LEN as RSTRUCT_LEN; +pub use rb_RSTRUCT_SET as RSTRUCT_SET; +pub use rb_vm_ci_argc as vm_ci_argc; +pub use rb_vm_ci_mid as vm_ci_mid; +pub use rb_vm_ci_flag as vm_ci_flag; +pub use rb_vm_ci_kwarg as vm_ci_kwarg; +pub use rb_METHOD_ENTRY_VISI as METHOD_ENTRY_VISI; +pub use rb_RCLASS_ORIGIN as RCLASS_ORIGIN; + /// Helper so we can get a Rust string for insn_name() pub fn insn_name(opcode: usize) -> String { use std::ffi::CStr; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index a329dadc9b3721..83e9f580bfa9ad 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -548,6 +548,20 @@ pub const VM_METHOD_TYPE_OPTIMIZED: rb_method_type_t = 9; pub const VM_METHOD_TYPE_MISSING: rb_method_type_t = 10; pub const VM_METHOD_TYPE_REFINED: rb_method_type_t = 11; pub type rb_method_type_t = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct rb_method_cfunc_struct { + pub func: ::std::option::Option VALUE>, + pub invoker: ::std::option::Option< + unsafe extern "C" fn( + recv: VALUE, + argc: ::std::os::raw::c_int, + argv: *const VALUE, + func: ::std::option::Option VALUE>, + ) -> VALUE, + >, + pub argc: ::std::os::raw::c_int, +} pub const OPTIMIZED_METHOD_TYPE_SEND: method_optimized_type = 0; pub const OPTIMIZED_METHOD_TYPE_CALL: method_optimized_type = 1; pub const OPTIMIZED_METHOD_TYPE_BLOCK_CALL: method_optimized_type = 2; @@ -1025,6 +1039,9 @@ extern "C" { extern "C" { pub fn rb_full_cfunc_return(ec: *mut rb_execution_context_t, return_value: VALUE); } +extern "C" { + pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; +} extern "C" { pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void; } @@ -1047,6 +1064,122 @@ extern "C" { pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; } pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; +extern "C" { + pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID; +} +extern "C" { + pub fn rb_vm_ci_flag(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg; +} +extern "C" { + pub fn rb_get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_get_cikw_keywords_idx( + cikw: *const rb_callinfo_kwarg, + idx: ::std::os::raw::c_int, + ) -> VALUE; +} +extern "C" { + pub fn rb_METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t; +} +extern "C" { + pub fn rb_get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t; +} +extern "C" { + pub fn rb_get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID; +} +extern "C" { + pub fn rb_get_cme_def_body_optimized_type( + cme: *const rb_callable_method_entry_t, + ) -> method_optimized_type; +} +extern "C" { + pub fn rb_get_cme_def_body_optimized_index( + cme: *const rb_callable_method_entry_t, + ) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_get_cme_def_body_cfunc( + cme: *const rb_callable_method_entry_t, + ) -> *mut rb_method_cfunc_t; +} +extern "C" { + pub fn rb_get_def_method_serial(def: *const rb_method_definition_t) -> usize; +} +extern "C" { + pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID; +} +extern "C" { + pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn rb_get_def_iseq_ptr(def: *mut rb_method_definition_t) -> *const rb_iseq_t; +} +extern "C" { + pub fn rb_get_iseq_body_local_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t; +} +extern "C" { + pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE; +} +extern "C" { + pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_post(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_kwrest(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_rest(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_block(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_accepts_no_kwarg(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_body_param_keyword( + iseq: *const rb_iseq_t, + ) -> *const rb_seq_param_keyword_struct; +} +extern "C" { + pub fn rb_get_iseq_body_param_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_get_iseq_body_param_lead_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_get_iseq_body_param_opt_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_get_iseq_body_param_opt_table(iseq: *const rb_iseq_t) -> *const VALUE; +} extern "C" { pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool; } @@ -1056,6 +1189,15 @@ extern "C" { extern "C" { pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; } +extern "C" { + pub fn rb_get_ec_cfp(ec: *const rb_execution_context_t) -> *mut rb_control_frame_struct; +} +extern "C" { + pub fn rb_get_cfp_pc(cfp: *mut rb_control_frame_struct) -> *mut VALUE; +} +extern "C" { + pub fn rb_get_cfp_sp(cfp: *mut rb_control_frame_struct) -> *mut VALUE; +} extern "C" { pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); } @@ -1065,9 +1207,54 @@ extern "C" { extern "C" { pub fn rb_cfp_get_iseq(cfp: *mut rb_control_frame_struct) -> *mut rb_iseq_t; } +extern "C" { + pub fn rb_get_cfp_self(cfp: *mut rb_control_frame_struct) -> VALUE; +} +extern "C" { + pub fn rb_get_cfp_ep(cfp: *mut rb_control_frame_struct) -> *mut VALUE; +} +extern "C" { + pub fn rb_get_cfp_ep_level(cfp: *mut rb_control_frame_struct, lv: u32) -> *const VALUE; +} +extern "C" { + pub fn rb_yarv_class_of(obj: VALUE) -> VALUE; +} +extern "C" { + pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; +} +extern "C" { + pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; +} +extern "C" { + pub fn rb_yarv_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; +} extern "C" { pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32); } +extern "C" { + pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; +} +extern "C" { + pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; +} +extern "C" { + pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; +} +extern "C" { + pub fn rb_RSTRUCT_LEN(st: VALUE) -> ::std::os::raw::c_long; +} +extern "C" { + pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE); +} +extern "C" { + pub fn rb_get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; +} +extern "C" { + pub fn rb_BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: u32) -> bool; +} +extern "C" { + pub fn rb_RCLASS_ORIGIN(c: VALUE) -> VALUE; +} extern "C" { pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int; } From 5a76a15a0f93100c7ff6361a34b06af936cc36c6 Mon Sep 17 00:00:00 2001 From: Maple Ong Date: Mon, 15 Aug 2022 12:54:26 -0400 Subject: [PATCH 483/546] YJIT: Implement concatarray in yjit (https://github.com/Shopify/ruby/pull/405) * Create code generation func * Make rb_vm_concat_array available to use in Rust * Map opcode to code gen func * Implement code gen for concatarray * Add test for concatarray * Use new asm backend * Add comment to C func wrapper --- bootstraptest/test_yjit.rb | 10 ++++++++++ vm_insnhelper.c | 10 ++++++++++ yjit/src/codegen.rs | 25 +++++++++++++++++++++++++ yjit/src/cruby.rs | 1 + 4 files changed, 46 insertions(+) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 89d7c9a038051d..364ed7094b03cf 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -3245,3 +3245,13 @@ def foo(&block) end foo { "foo" }.call } + +assert_equal '[1, 2]', %q{ + def foo + x = [2] + [1, *x] + end + + foo + foo +} diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 3c41adcdc9f6ea..ab1394c7ca8c3f 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -4380,6 +4380,14 @@ vm_concat_array(VALUE ary1, VALUE ary2st) return rb_ary_concat(tmp1, tmp2); } +// YJIT implementation is using the C function +// and needs to call a non-static function +VALUE +rb_vm_concat_array(VALUE ary1, VALUE ary2st) +{ + return vm_concat_array(ary1, ary2st); +} + static VALUE vm_splat_array(VALUE flag, VALUE ary) { @@ -4395,6 +4403,8 @@ vm_splat_array(VALUE flag, VALUE ary) } } +// YJIT implementation is using the C function +// and needs to call a non-static function VALUE rb_vm_splat_array(VALUE flag, VALUE ary) { diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 3428466297c736..a6473842f882a0 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1256,6 +1256,30 @@ fn gen_splatarray( KeepCompiling } +// concat two arrays +fn gen_concatarray( + jit: &mut JITState, + ctx: &mut Context, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> CodegenStatus { + // Save the PC and SP because the callee may allocate + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_routine_call(jit, ctx, asm); + + // Get the operands from the stack + let ary2st_opnd = ctx.stack_pop(1); + let ary1_opnd = ctx.stack_pop(1); + + // Call rb_vm_concat_array(ary1, ary2st) + let ary = asm.ccall(rb_vm_concat_array as *const u8, vec![ary1_opnd, ary2st_opnd]); + + let stack_ret = ctx.stack_push(Type::Array); + asm.mov(stack_ret, ary); + + KeepCompiling +} + // new range initialized from top 2 values fn gen_newrange( jit: &mut JITState, @@ -5862,6 +5886,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), YARVINSN_splatarray => Some(gen_splatarray), + YARVINSN_concatarray => Some(gen_concatarray), YARVINSN_newrange => Some(gen_newrange), YARVINSN_putstring => Some(gen_putstring), YARVINSN_expandarray => Some(gen_expandarray), diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 2cf5134e8199c9..2f823e1b615354 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -112,6 +112,7 @@ pub use autogened::*; #[cfg_attr(test, allow(unused))] // We don't link against C code when testing extern "C" { pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; + pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; pub fn rb_vm_defined( ec: EcPtr, reg_cfp: CfpPtr, From 09c12111d42573a19e7077bd8fa7e7cb709179de Mon Sep 17 00:00:00 2001 From: "Noah Gibbs (and/or Benchmark CI)" Date: Fri, 12 Aug 2022 10:35:52 +0000 Subject: [PATCH 484/546] Port jit_rb_str_concat to new backend, re-enable cfunc lookup (https://github.com/Shopify/ruby/pull/402) --- yjit/src/codegen.rs | 95 +++++++++++++++++++++------------------------ yjit/src/core.rs | 2 +- 2 files changed, 45 insertions(+), 52 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index a6473842f882a0..fd4a3e6b50e4a5 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1345,32 +1345,27 @@ fn guard_object_is_array( asm.jne(side_exit.into()); } -/* fn guard_object_is_string( - cb: &mut CodeBlock, - object_reg: X86Opnd, - flags_reg: X86Opnd, + asm: &mut Assembler, + object_reg: Opnd, side_exit: CodePtr, ) { - add_comment(cb, "guard object is string"); + asm.comment("guard object is string"); // Pull out the type mask - mov( - cb, - flags_reg, - mem_opnd( + let flags_reg = asm.load( + Opnd::mem( 8 * SIZEOF_VALUE as u8, object_reg, RUBY_OFFSET_RBASIC_FLAGS, ), ); - and(cb, flags_reg, uimm_opnd(RUBY_T_MASK as u64)); + let flags_reg = asm.and(flags_reg, Opnd::UImm(RUBY_T_MASK as u64)); // Compare the result with T_STRING - cmp(cb, flags_reg, uimm_opnd(RUBY_T_STRING as u64)); - jne_ptr(cb, side_exit); + asm.cmp(flags_reg, Opnd::UImm(RUBY_T_STRING as u64)); + asm.jne(side_exit.into()); } -*/ // push enough nils onto the stack to fill out an array fn gen_expandarray( @@ -2079,7 +2074,7 @@ fn gen_get_ivar( // Check that the slot is inside the extended table (num_slots > index) let num_slots = Opnd::mem(32, recv, ROBJECT_OFFSET_NUMIV); asm.cmp(num_slots, Opnd::UImm(ivar_index as u64)); - asm.jbe(Target::CodePtr(counted_exit!(ocb, side_exit, getivar_idx_out_of_range))); + asm.jbe(counted_exit!(ocb, side_exit, getivar_idx_out_of_range).into()); } // Get a pointer to the extended table @@ -3660,16 +3655,19 @@ fn jit_rb_str_uplus( asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64)); let ret_label = asm.new_label("stack_ret"); - // If the string isn't frozen, we just return it. It's already in REG0. + + // We guard for the receiver being a ::String, so the return value is too + let stack_ret = ctx.stack_push(Type::CString); + + // If the string isn't frozen, we just return it. + asm.mov(stack_ret, recv_opnd); asm.jz(ret_label); - // Str is frozen - duplicate + // Str is frozen - duplicate it let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); + asm.mov(stack_ret, ret_opnd); asm.write_label(ret_label); - // We guard for an exact-class match on the receiver of rb_cString - let stack_ret = ctx.stack_push(Type::CString); - asm.mov(stack_ret, ret_opnd); true } @@ -3720,6 +3718,7 @@ fn jit_rb_str_to_s( } false } +*/ // Codegen for rb_str_concat() -- *not* String#concat // Frequently strings are concatenated using "out_str << next_str". @@ -3749,63 +3748,65 @@ fn jit_rb_str_concat( // Guard that the argument is of class String at runtime. let insn_opnd = StackOpnd(0); - let arg_opnd = asm.load(ctx.stack_opnd(0)); let arg_type = ctx.get_opnd_type(insn_opnd); + let concat_arg = ctx.stack_pop(1); + let recv = ctx.stack_pop(1); + + // If we're not compile-time certain that this will always be a string, guard at runtime if arg_type != Type::CString && arg_type != Type::TString { + let arg_opnd = asm.load(concat_arg); if !arg_type.is_heap() { asm.comment("guard arg not immediate"); - asm.test(REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64)); - asm.jnz(Target::CodePtr(side_exit)); + asm.test(arg_opnd, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); + asm.jnz(side_exit.into()); asm.cmp(arg_opnd, Qnil.into()); - asm.jbe(Target::CodePtr(side_exit)); + asm.jbe(side_exit.into()); ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); } - guard_object_is_string(cb, REG0, REG1, side_exit); - // We know this has type T_STRING, but not necessarily that it's a ::String + guard_object_is_string(asm, arg_opnd, side_exit); + // We know this is a string-or-subclass, but not necessarily that it's a ::String ctx.upgrade_opnd_type(insn_opnd, Type::TString); } - let concat_arg = ctx.stack_pop(1); - let recv = ctx.stack_pop(1); - // Test if string encodings differ. If different, use rb_str_append. If the same, // use rb_yjit_str_simple_append, which calls rb_str_cat. asm.comment("<< on strings"); - // Both rb_str_append and rb_yjit_str_simple_append take identical args - let ccall_args = vec![recv, concat_arg]; - // Take receiver's object flags XOR arg's flags. If any // string-encoding flags are different between the two, // the encodings don't match. + let recv_reg = asm.load(recv); + let concat_arg_reg = asm.load(concat_arg); let flags_xor = asm.xor( - Opnd::mem(64, asm.load(recv), RUBY_OFFSET_RBASIC_FLAGS), - Opnd::mem(64, asm.load(concat_arg), RUBY_OFFSET_RBASIC_FLAGS) + Opnd::mem(64, recv_reg, RUBY_OFFSET_RBASIC_FLAGS), + Opnd::mem(64, concat_arg_reg, RUBY_OFFSET_RBASIC_FLAGS) ); asm.test(flags_xor, Opnd::UImm(RUBY_ENCODING_MASK as u64)); + // Push once, use the resulting operand in both branches below. + let stack_ret = ctx.stack_push(Type::CString); + let enc_mismatch = asm.new_label("enc_mismatch"); asm.jnz(enc_mismatch); // If encodings match, call the simple append function and jump to return - let ret_opnd = asm.ccall(rb_yjit_str_simple_append as *const u8, ccall_args); - let ret_label = asm.new_label("stack_return"); + let ret_opnd = asm.ccall(rb_yjit_str_simple_append as *const u8, vec![recv, concat_arg]); + let ret_label = asm.new_label("func_return"); + asm.mov(stack_ret, ret_opnd); asm.jmp(ret_label); // If encodings are different, use a slower encoding-aware concatenate asm.write_label(enc_mismatch); - asm.ccall(rb_str_buf_append as *const u8, ccall_args); + let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]); + asm.mov(stack_ret, ret_opnd); // Drop through to return asm.write_label(ret_label); - let stack_ret = ctx.stack_push(Type::CString); - asm.mov(stack_ret, ret_opnd); true } -*/ fn jit_thread_s_current( _jit: &mut JITState, @@ -3921,20 +3922,12 @@ fn gen_send_cfunc( if kw_arg.is_null() { let codegen_p = lookup_cfunc_codegen(unsafe { (*cme).def }); if let Some(known_cfunc_codegen) = codegen_p { - return CantCompile; /* - let start_pos = cb.get_write_ptr().raw_ptr() as usize; - if known_cfunc_codegen(jit, ctx, cb, ocb, ci, cme, block, argc, recv_known_klass) { - let written_bytes = cb.get_write_ptr().raw_ptr() as usize - start_pos; - if written_bytes < JUMP_SIZE_IN_BYTES { - add_comment(cb, "Writing NOPs to leave room for later invalidation code"); - nop(cb, (JUMP_SIZE_IN_BYTES - written_bytes) as u32); - } + if known_cfunc_codegen(jit, ctx, asm, ocb, ci, cme, block, argc, recv_known_klass) { // cfunc codegen generated code. Terminate the block so // there isn't multiple calls in the same block. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); return EndBlock; } - */ } } @@ -6141,8 +6134,8 @@ impl CodegenGlobals { //self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); //self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); self.yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize); - //self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); - self.yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); + self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); + //self.yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); // Thread.current self.yjit_reg_method( diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 1bc3d738ef4edf..354615db253472 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -69,7 +69,7 @@ impl Type { } else if val.flonum_p() { Type::Flonum } else { - unreachable!() + unreachable!("Illegal value: {:?}", val) } } else { // Core.rs can't reference rb_cString because it's linked by Rust-only tests. From 95dce1ccacb5e893bbd2bfb100c0778c5be83d47 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 16 Aug 2022 12:00:35 -0400 Subject: [PATCH 485/546] Temporarily disable rb_str_concat, add CI tests (https://github.com/Shopify/ruby/pull/407) Make sure we can load the test-all runner and run test_yjit.rb --- .cirrus.yml | 11 +++++++++-- yjit/src/codegen.rs | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 3425ebd175e6ef..35f908df048f75 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -163,6 +163,13 @@ yjit_task: bootstraptest/test_yjit_rust_port.rb \ bootstraptest/test_yjit.rb - # FIXME: not currently working on CI, missing cargo # Check that we can do a full ruby build - #full_build_script: make -j + full_build_script: source $HOME/.cargo/env && make -j + + # Check that we can build rdoc successfully + make_rdoc_script: source $HOME/.cargo/env && make -j rdoc + + # Run John's YJIT instruction tests, and make sure we can load the test-all runner + test_yjit_script: source $HOME/.cargo/env && make test-all TESTS='test/ruby/test_yjit.rb' RUN_OPTS="--yjit-call-threshold=1" + + # TODO: check that we can we run all of test-all successfully diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index fd4a3e6b50e4a5..ed11f7cf0fe942 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -6134,7 +6134,7 @@ impl CodegenGlobals { //self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); //self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); self.yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize); - self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); + //self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); //self.yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); // Thread.current From c38e9111478773a20656bfe5329cdab78dfb075b Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 16 Aug 2022 12:51:16 -0700 Subject: [PATCH 486/546] Allow aarch64 to build YJIT --- yjit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yjit.h b/yjit.h index 1f507f1b519fae..c9dc52c9b670e3 100644 --- a/yjit.h +++ b/yjit.h @@ -17,8 +17,8 @@ #if USE_YJIT -// We generate x86 assembly -#if defined(_WIN32) ? defined(_M_AMD64) : defined(__x86_64__) +// We generate x86 or arm64 assembly +#if defined(_WIN32) ? defined(_M_AMD64) : (defined(__x86_64__) || defined(__aarch64__)) // x86_64 platforms without mingw/msys or x64-mswin #else # error YJIT unsupported platform From 1cf9f56c55b1a771217678843c9546e368db0af3 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 17 Aug 2022 12:23:59 -0400 Subject: [PATCH 487/546] Fix issue with expandarray, add missing jl, enable tests (https://github.com/Shopify/ruby/pull/409) --- yjit/src/backend/arm64/mod.rs | 3 +++ yjit/src/backend/ir.rs | 2 ++ yjit/src/backend/x86_64/mod.rs | 8 ++++++++ yjit/src/codegen.rs | 2 +- 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 64136bfdd221da..d92b4778e342f3 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -826,6 +826,9 @@ impl Assembler Op::Jne => { emit_conditional_jump::<{Condition::NE}>(cb, insn.target.unwrap()); }, + Op::Jl => { + emit_conditional_jump::<{Condition::LT}>(cb, insn.target.unwrap()); + }, Op::Jbe => { emit_conditional_jump::<{Condition::LS}>(cb, insn.target.unwrap()); }, diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index ef8cd5e8728d4c..952c8f7f10aa5d 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -118,6 +118,7 @@ pub enum Op JmpOpnd, // Low-level conditional jump instructions + Jl, Jbe, Je, Jne, @@ -988,6 +989,7 @@ def_push_1_opnd_no_out!(jmp_opnd, Op::JmpOpnd); def_push_jcc!(jmp, Op::Jmp); def_push_jcc!(je, Op::Je); def_push_jcc!(jne, Op::Jne); +def_push_jcc!(jl, Op::Jl); def_push_jcc!(jbe, Op::Jbe); def_push_jcc!(jz, Op::Jz); def_push_jcc!(jnz, Op::Jnz); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index d474c9fe59a71b..1ec08dd78746ca 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -465,6 +465,14 @@ impl Assembler } } + Op::Jl => { + match insn.target.unwrap() { + Target::CodePtr(code_ptr) => jl_ptr(cb, code_ptr), + Target::Label(label_idx) => jl_label(cb, label_idx), + _ => unreachable!() + } + }, + Op::Jbe => { match insn.target.unwrap() { Target::CodePtr(code_ptr) => jbe_ptr(cb, code_ptr), diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ed11f7cf0fe942..2e202ce2d038ac 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1445,7 +1445,7 @@ fn gen_expandarray( // Only handle the case where the number of values in the array is greater // than or equal to the number of values requested. asm.cmp(array_len_opnd, num.into()); - asm.jo(counted_exit!(ocb, side_exit, expandarray_rhs_too_small).into()); + asm.jl(counted_exit!(ocb, side_exit, expandarray_rhs_too_small).into()); // Load the address of the embedded array into REG1. // (struct RArray *)(obj)->as.ary From b735eb5ef39e73e2a0ea6bbdb6ff7ce41a998d63 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 17 Aug 2022 12:56:23 -0400 Subject: [PATCH 488/546] Instruction builders for backend IR (https://github.com/Shopify/ruby/pull/410) Currently we use macros to define the shape of each of the instruction building methods. This works while all of the instructions share the same fields, but is really hard to get working when they're an enum with different shapes. This is an incremental step toward a bigger refactor of changing the Insn from a struct to an enum. --- yjit/src/backend/arm64/mod.rs | 10 +- yjit/src/backend/ir.rs | 425 ++++++++++++++++++--------------- yjit/src/backend/x86_64/mod.rs | 8 +- 3 files changed, 246 insertions(+), 197 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index d92b4778e342f3..d2693fee32b3c9 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -228,17 +228,17 @@ impl Assembler Op::And | Op::Or | Op::Xor => { match (opnds[0], opnds[1]) { (Opnd::Reg(_), Opnd::Reg(_)) => { - asm.push_insn(insn.op, vec![opnds[0], opnds[1]], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![opnds[0], opnds[1]], insn.target, insn.text, insn.pos_marker); }, (reg_opnd @ Opnd::Reg(_), other_opnd) | (other_opnd, reg_opnd @ Opnd::Reg(_)) => { let opnd1 = split_bitmask_immediate(asm, other_opnd); - asm.push_insn(insn.op, vec![reg_opnd, opnd1], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![reg_opnd, opnd1], insn.target, insn.text, insn.pos_marker); }, _ => { let opnd0 = split_load_operand(asm, opnds[0]); let opnd1 = split_bitmask_immediate(asm, opnds[1]); - asm.push_insn(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); } } }, @@ -284,7 +284,7 @@ impl Assembler } }).collect(); - asm.push_insn(insn.op, new_opnds, insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, new_opnds, insn.target, insn.text, insn.pos_marker); }, Op::IncrCounter => { // We'll use LDADD later which only works with registers @@ -403,7 +403,7 @@ impl Assembler asm.test(opnd0, opnd1); }, _ => { - asm.push_insn(insn.op, opnds, insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, opnds, insn.target, insn.text, insn.pos_marker); } }; diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 952c8f7f10aa5d..481d447c5c8480 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -466,23 +466,15 @@ impl Assembler } } - /// Append an instruction to the list - pub(super) fn push_insn( - &mut self, - op: Op, - opnds: Vec, - target: Option, - text: Option, - pos_marker: Option - ) -> Opnd - { + /// Append an instruction onto the current list of instructions. + pub(super) fn push_insn(&mut self, mut insn: Insn) -> Opnd { // Index of this instruction let insn_idx = self.insns.len(); // If we find any InsnOut from previous instructions, we're going to // update the live range of the previous instruction to point to this // one. - for opnd in &opnds { + for opnd in &insn.opnds { match opnd { Opnd::InsnOut{ idx, .. } => { self.live_ranges[*idx] = insn_idx; @@ -496,7 +488,7 @@ impl Assembler let mut out_num_bits: u8 = 0; - for opnd in &opnds { + for opnd in &insn.opnds { match *opnd { Opnd::InsnOut{ num_bits, .. } | Opnd::Mem(Mem { num_bits, .. }) | @@ -518,15 +510,7 @@ impl Assembler // Operand for the output of this instruction let out_opnd = Opnd::InsnOut{ idx: insn_idx, num_bits: out_num_bits }; - - let insn = Insn { - op, - text, - opnds, - out: out_opnd, - target, - pos_marker, - }; + insn.out = out_opnd; self.insns.push(insn); self.live_ranges.push(insn_idx); @@ -535,40 +519,25 @@ impl Assembler out_opnd } - /// Add a comment at the current position - pub fn comment(&mut self, text: &str) - { - let insn = Insn { - op: Op::Comment, - text: Some(text.to_owned()), - opnds: vec![], - out: Opnd::None, - target: None, - pos_marker: None, - }; - self.insns.push(insn); - self.live_ranges.push(self.insns.len()); - } - - /// Bake a string at the current position - pub fn bake_string(&mut self, text: &str) + /// Append an instruction to the list by creating a new instruction from the + /// component parts given to this function. + pub(super) fn push_insn_parts( + &mut self, + op: Op, + opnds: Vec, + target: Option, + text: Option, + pos_marker: Option + ) -> Opnd { - let insn = Insn { - op: Op::BakeString, - text: Some(text.to_owned()), - opnds: vec![], + self.push_insn(Insn { + op, + text, + opnds, out: Opnd::None, - target: None, - pos_marker: None, - }; - self.insns.push(insn); - self.live_ranges.push(self.insns.len()); - } - - /// Load an address relative to the given label. - #[must_use] - pub fn lea_label(&mut self, target: Target) -> Opnd { - self.push_insn(Op::LeaLabel, vec![], Some(target), None, None) + target, + pos_marker, + }) } /// Create a new label instance that we can jump to @@ -735,7 +704,7 @@ impl Assembler } ).collect(); - asm.push_insn(insn.op, reg_opnds, insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, reg_opnds, insn.target, insn.text, insn.pos_marker); // Set the output register for this instruction let num_insns = asm.insns.len(); @@ -777,16 +746,6 @@ impl Assembler pub fn into_lookback_iter(self) -> AssemblerLookbackIterator { AssemblerLookbackIterator::new(self) } - - pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { - let target = Target::FunPtr(fptr); - self.push_insn(Op::CCall, opnds, Some(target), None, None) - } - - // pub fn pos_marker(&mut self, marker_fn: F) - pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr) + 'static) { - self.push_insn(Op::PosMarker, vec![], None, None, Some(Box::new(marker_fn))); - } } /// A struct that allows iterating through an assembler's instructions and @@ -898,134 +857,224 @@ impl fmt::Debug for Assembler { } } -macro_rules! def_push_jcc { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - pub fn $op_name(&mut self, target: Target) - { - self.push_insn($opcode, vec![], Some(target), None, None); - } - } - }; -} +impl Assembler { + #[must_use] + pub fn add(&mut self, left: Opnd, right: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::Add, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + } -macro_rules! def_push_0_opnd { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - #[must_use] - pub fn $op_name(&mut self) -> Opnd - { - self.push_insn($opcode, vec![], None, None, None) - } - } - }; -} + #[must_use] + pub fn and(&mut self, left: Opnd, right: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::And, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + } -macro_rules! def_push_0_opnd_no_out { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - pub fn $op_name(&mut self) - { - self.push_insn($opcode, vec![], None, None, None); - } - } - }; -} + pub fn bake_string(&mut self, text: &str) { + self.push_insn(Insn { op: Op::BakeString, opnds: vec![], out: Opnd::None, text: Some(text.to_string()), target: None, pos_marker: None }); + } -macro_rules! def_push_1_opnd { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - #[must_use] - pub fn $op_name(&mut self, opnd0: Opnd) -> Opnd - { - self.push_insn($opcode, vec![opnd0], None, None, None) - } - } - }; -} + pub fn breakpoint(&mut self) { + self.push_insn(Insn { op: Op::Breakpoint, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + } -macro_rules! def_push_1_opnd_no_out { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - pub fn $op_name(&mut self, opnd0: Opnd) - { - self.push_insn($opcode, vec![opnd0], None, None, None); - } - } - }; -} + #[must_use] + pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { + self.push_insn(Insn { op: Op::CCall, opnds, out: Opnd::None, text: None, target: Some(Target::FunPtr(fptr)), pos_marker: None }) + } -macro_rules! def_push_2_opnd { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - #[must_use] - pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd - { - self.push_insn($opcode, vec![opnd0, opnd1], None, None, None) - } - } - }; -} + pub fn cmp(&mut self, left: Opnd, right: Opnd) { + self.push_insn(Insn { op: Op::Cmp, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }); + } -macro_rules! def_push_2_opnd_no_out { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) - { - self.push_insn($opcode, vec![opnd0, opnd1], None, None, None); - } - } - }; -} + pub fn comment(&mut self, text: &str) { + self.push_insn(Insn { op: Op::Comment, opnds: vec![], out: Opnd::None, text: Some(text.to_string()), target: None, pos_marker: None }); + } + + pub fn cpop(&mut self) -> Opnd { + self.push_insn(Insn { op: Op::CPop, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + pub fn cpop_all(&mut self) { + self.push_insn(Insn { op: Op::CPopAll, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + pub fn cpop_into(&mut self, opnd: Opnd) { + self.push_insn(Insn { op: Op::CPopInto, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + pub fn cpush(&mut self, opnd: Opnd) { + self.push_insn(Insn { op: Op::CPush, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + pub fn cpush_all(&mut self) { + self.push_insn(Insn { op: Op::CPushAll, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + } -def_push_1_opnd_no_out!(jmp_opnd, Op::JmpOpnd); -def_push_jcc!(jmp, Op::Jmp); -def_push_jcc!(je, Op::Je); -def_push_jcc!(jne, Op::Jne); -def_push_jcc!(jl, Op::Jl); -def_push_jcc!(jbe, Op::Jbe); -def_push_jcc!(jz, Op::Jz); -def_push_jcc!(jnz, Op::Jnz); -def_push_jcc!(jo, Op::Jo); -def_push_2_opnd!(add, Op::Add); -def_push_2_opnd!(sub, Op::Sub); -def_push_2_opnd!(and, Op::And); -def_push_2_opnd!(or, Op::Or); -def_push_2_opnd!(xor, Op::Xor); -def_push_1_opnd!(not, Op::Not); -def_push_2_opnd!(lshift, Op::LShift); -def_push_2_opnd!(rshift, Op::RShift); -def_push_2_opnd!(urshift, Op::URShift); -def_push_1_opnd_no_out!(cpush, Op::CPush); -def_push_0_opnd!(cpop, Op::CPop); -def_push_1_opnd_no_out!(cpop_into, Op::CPopInto); -def_push_0_opnd_no_out!(cpush_all, Op::CPushAll); -def_push_0_opnd_no_out!(cpop_all, Op::CPopAll); -def_push_1_opnd_no_out!(cret, Op::CRet); -def_push_1_opnd!(load, Op::Load); -def_push_1_opnd!(load_sext, Op::LoadSExt); -def_push_1_opnd!(lea, Op::Lea); -def_push_1_opnd!(live_reg_opnd, Op::LiveReg); -def_push_2_opnd_no_out!(store, Op::Store); -def_push_2_opnd_no_out!(mov, Op::Mov); -def_push_2_opnd_no_out!(cmp, Op::Cmp); -def_push_2_opnd_no_out!(test, Op::Test); -def_push_0_opnd_no_out!(breakpoint, Op::Breakpoint); -def_push_2_opnd_no_out!(incr_counter, Op::IncrCounter); -def_push_2_opnd!(csel_z, Op::CSelZ); -def_push_2_opnd!(csel_nz, Op::CSelNZ); -def_push_2_opnd!(csel_e, Op::CSelE); -def_push_2_opnd!(csel_ne, Op::CSelNE); -def_push_2_opnd!(csel_l, Op::CSelL); -def_push_2_opnd!(csel_le, Op::CSelLE); -def_push_2_opnd!(csel_g, Op::CSelG); -def_push_2_opnd!(csel_ge, Op::CSelGE); -def_push_0_opnd_no_out!(frame_setup, Op::FrameSetup); -def_push_0_opnd_no_out!(frame_teardown, Op::FrameTeardown); + pub fn cret(&mut self, opnd: Opnd) { + self.push_insn(Insn { op: Op::CRet, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + #[must_use] + pub fn csel_e(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::CSelE, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn csel_g(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::CSelG, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn csel_ge(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::CSelGE, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn csel_l(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::CSelL, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn csel_le(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::CSelLE, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn csel_ne(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::CSelNE, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn csel_nz(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::CSelNZ, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn csel_z(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::CSelZ, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + pub fn frame_setup(&mut self) { + self.push_insn(Insn { op: Op::FrameSetup, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + pub fn frame_teardown(&mut self) { + self.push_insn(Insn { op: Op::FrameTeardown, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + pub fn incr_counter(&mut self, mem: Opnd, value: Opnd) { + self.push_insn(Insn { op: Op::IncrCounter, opnds: vec![mem, value], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + pub fn jbe(&mut self, target: Target) { + self.push_insn(Insn { op: Op::Jbe, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + } + + pub fn je(&mut self, target: Target) { + self.push_insn(Insn { op: Op::Je, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + } + + pub fn jl(&mut self, target: Target) { + self.push_insn(Insn { op: Op::Jl, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + } + + pub fn jmp(&mut self, target: Target) { + self.push_insn(Insn { op: Op::Jmp, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + } + + pub fn jmp_opnd(&mut self, opnd: Opnd) { + self.push_insn(Insn { op: Op::JmpOpnd, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + pub fn jne(&mut self, target: Target) { + self.push_insn(Insn { op: Op::Jne, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + } + + pub fn jnz(&mut self, target: Target) { + self.push_insn(Insn { op: Op::Jnz, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + } + + pub fn jo(&mut self, target: Target) { + self.push_insn(Insn { op: Op::Jo, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + } + + pub fn jz(&mut self, target: Target) { + self.push_insn(Insn { op: Op::Jz, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + } + + #[must_use] + pub fn lea(&mut self, opnd: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::Lea, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn lea_label(&mut self, target: Target) -> Opnd { + self.push_insn(Insn { op: Op::LeaLabel, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }) + } + + #[must_use] + pub fn live_reg_opnd(&mut self, opnd: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::LiveReg, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn load(&mut self, opnd: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::Load, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn load_sext(&mut self, opnd: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::LoadSExt, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn lshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::LShift, opnds: vec![opnd, shift], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + pub fn mov(&mut self, dest: Opnd, src: Opnd) { + self.push_insn(Insn { op: Op::Mov, opnds: vec![dest, src], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + #[must_use] + pub fn not(&mut self, opnd: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::Not, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn or(&mut self, left: Opnd, right: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::Or, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + //pub fn pos_marker(&mut self, marker_fn: F) + pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr) + 'static) { + self.push_insn(Insn { op: Op::PosMarker, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: Some(Box::new(marker_fn)) }); + } + + #[must_use] + pub fn rshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::RShift, opnds: vec![opnd, shift], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + pub fn store(&mut self, dest: Opnd, src: Opnd) { + self.push_insn(Insn { op: Op::Store, opnds: vec![dest, src], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + #[must_use] + pub fn sub(&mut self, left: Opnd, right: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::Sub, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + pub fn test(&mut self, left: Opnd, right: Opnd) { + self.push_insn(Insn { op: Op::Test, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + + #[must_use] + pub fn urshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::URShift, opnds: vec![opnd, shift], out: Opnd::None, text: None, target: None, pos_marker: None }) + } + + #[must_use] + pub fn xor(&mut self, left: Opnd, right: Opnd) -> Opnd { + self.push_insn(Insn { op: Op::Xor, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + } +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 1ec08dd78746ca..65e2206c490175 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -174,7 +174,7 @@ impl Assembler _ => (opnds[0], opnds[1]) }; - asm.push_insn(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); }, // These instructions modify their input operand in-place, so we // may need to load the input value to preserve it @@ -195,7 +195,7 @@ impl Assembler _ => (opnds[0], opnds[1]) }; - asm.push_insn(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); }, Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE => { @@ -206,7 +206,7 @@ impl Assembler } }).collect(); - asm.push_insn(insn.op, new_opnds, insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, new_opnds, insn.target, insn.text, insn.pos_marker); }, Op::Mov => { match (opnds[0], opnds[1]) { @@ -260,7 +260,7 @@ impl Assembler asm.not(opnd0); }, _ => { - asm.push_insn(insn.op, opnds, insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, opnds, insn.target, insn.text, insn.pos_marker); } }; From d57a9f61a065418ef99fcbbb65eca4f34f33f1c8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 17 Aug 2022 12:59:08 -0400 Subject: [PATCH 489/546] Build output operands explicitly (https://github.com/Shopify/ruby/pull/411) When we're pushing instructions onto the assembler, we previously would iterate through the instruction's operands and then assign the output operand to it through the push_insn function. This is easy when all instructions have a vector of operands, but is much more difficult when the shape differs in an enum. This commit changes it so that we explicitly define the output operand for each instruction before it gets pushed onto the assembler. This has the added benefit of changing the definition of push_insn to no longer require a mutable instruction. This paves the way to make the out field on the instructions an Option instead which is going to more accurately reflect the behavior we're going to have once we switch the instructions over to an enum instead of a struct. --- yjit/src/backend/ir.rs | 181 ++++++++++++++++++++++++++--------------- 1 file changed, 116 insertions(+), 65 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 481d447c5c8480..d2b90c337370a1 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -466,8 +466,41 @@ impl Assembler } } - /// Append an instruction onto the current list of instructions. - pub(super) fn push_insn(&mut self, mut insn: Insn) -> Opnd { + /// Build an Opnd::InsnOut from the current index of the assembler and the + /// given slice of operands. The operands are given to determine the number + /// of bits necessary for the output operand. They should all be the same + /// size. + fn next_opnd_out(&self, opnds: &[Opnd]) -> Opnd { + let mut out_num_bits: Option = None; + + for opnd in opnds { + match opnd { + Opnd::InsnOut { num_bits, .. } | + Opnd::Mem(Mem { num_bits, .. }) | + Opnd::Reg(Reg { num_bits, .. }) => { + match out_num_bits { + None => { + out_num_bits = Some(*num_bits); + }, + Some(out_num_bits) => { + assert_eq!(out_num_bits, *num_bits, "operands of incompatible sizes"); + } + }; + } + _ => {} + } + } + + Opnd::InsnOut { + idx: self.insns.len(), + num_bits: out_num_bits.unwrap_or(64) + } + } + + /// Append an instruction onto the current list of instructions and update + /// the live ranges of any instructions whose outputs are being used as + /// operands to this instruction. + pub(super) fn push_insn(&mut self, insn: Insn) { // Index of this instruction let insn_idx = self.insns.len(); @@ -476,51 +509,25 @@ impl Assembler // one. for opnd in &insn.opnds { match opnd { - Opnd::InsnOut{ idx, .. } => { + Opnd::InsnOut { idx, .. } => { + assert!(*idx < self.insns.len()); self.live_ranges[*idx] = insn_idx; } Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { + assert!(*idx < self.insns.len()); self.live_ranges[*idx] = insn_idx; } _ => {} } } - let mut out_num_bits: u8 = 0; - - for opnd in &insn.opnds { - match *opnd { - Opnd::InsnOut{ num_bits, .. } | - Opnd::Mem(Mem { num_bits, .. }) | - Opnd::Reg(Reg { num_bits, .. }) => { - if out_num_bits == 0 { - out_num_bits = num_bits - } - else if out_num_bits != num_bits { - panic!("operands of incompatible sizes"); - } - } - _ => {} - } - } - - if out_num_bits == 0 { - out_num_bits = 64; - } - - // Operand for the output of this instruction - let out_opnd = Opnd::InsnOut{ idx: insn_idx, num_bits: out_num_bits }; - insn.out = out_opnd; - self.insns.push(insn); self.live_ranges.push(insn_idx); - - // Return an operand for the output of this instruction - out_opnd } /// Append an instruction to the list by creating a new instruction from the - /// component parts given to this function. + /// component parts given to this function. This will also create a new + /// output operand from the given operands for the new instruction. pub(super) fn push_insn_parts( &mut self, op: Op, @@ -530,14 +537,9 @@ impl Assembler pos_marker: Option ) -> Opnd { - self.push_insn(Insn { - op, - text, - opnds, - out: Opnd::None, - target, - pos_marker, - }) + let out = self.next_opnd_out(&opnds); + self.push_insn(Insn { op, text, opnds, out, target, pos_marker }); + out } /// Create a new label instance that we can jump to @@ -860,12 +862,16 @@ impl fmt::Debug for Assembler { impl Assembler { #[must_use] pub fn add(&mut self, left: Opnd, right: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::Add, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[left, right]); + self.push_insn(Insn { op: Op::Add, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn and(&mut self, left: Opnd, right: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::And, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[left, right]); + self.push_insn(Insn { op: Op::And, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + out } pub fn bake_string(&mut self, text: &str) { @@ -878,7 +884,9 @@ impl Assembler { #[must_use] pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { - self.push_insn(Insn { op: Op::CCall, opnds, out: Opnd::None, text: None, target: Some(Target::FunPtr(fptr)), pos_marker: None }) + let out = self.next_opnd_out(&opnds); + self.push_insn(Insn { op: Op::CCall, opnds, out, text: None, target: Some(Target::FunPtr(fptr)), pos_marker: None }); + out } pub fn cmp(&mut self, left: Opnd, right: Opnd) { @@ -889,8 +897,11 @@ impl Assembler { self.push_insn(Insn { op: Op::Comment, opnds: vec![], out: Opnd::None, text: Some(text.to_string()), target: None, pos_marker: None }); } + #[must_use] pub fn cpop(&mut self) -> Opnd { - self.push_insn(Insn { op: Op::CPop, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[]); + self.push_insn(Insn { op: Op::CPop, opnds: vec![], out, text: None, target: None, pos_marker: None }); + out } pub fn cpop_all(&mut self) { @@ -915,42 +926,58 @@ impl Assembler { #[must_use] pub fn csel_e(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::CSelE, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[truthy, falsy]); + self.push_insn(Insn { op: Op::CSelE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn csel_g(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::CSelG, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[truthy, falsy]); + self.push_insn(Insn { op: Op::CSelG, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn csel_ge(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::CSelGE, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[truthy, falsy]); + self.push_insn(Insn { op: Op::CSelGE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn csel_l(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::CSelL, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[truthy, falsy]); + self.push_insn(Insn { op: Op::CSelL, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn csel_le(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::CSelLE, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[truthy, falsy]); + self.push_insn(Insn { op: Op::CSelLE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn csel_ne(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::CSelNE, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[truthy, falsy]); + self.push_insn(Insn { op: Op::CSelNE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn csel_nz(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::CSelNZ, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[truthy, falsy]); + self.push_insn(Insn { op: Op::CSelNZ, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn csel_z(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::CSelZ, opnds: vec![truthy, falsy], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[truthy, falsy]); + self.push_insn(Insn { op: Op::CSelZ, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + out } pub fn frame_setup(&mut self) { @@ -1003,32 +1030,44 @@ impl Assembler { #[must_use] pub fn lea(&mut self, opnd: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::Lea, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[opnd]); + self.push_insn(Insn { op: Op::Lea, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn lea_label(&mut self, target: Target) -> Opnd { - self.push_insn(Insn { op: Op::LeaLabel, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }) + let out = self.next_opnd_out(&[]); + self.push_insn(Insn { op: Op::LeaLabel, opnds: vec![], out, text: None, target: Some(target), pos_marker: None }); + out } #[must_use] pub fn live_reg_opnd(&mut self, opnd: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::LiveReg, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[opnd]); + self.push_insn(Insn { op: Op::LiveReg, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn load(&mut self, opnd: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::Load, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[opnd]); + self.push_insn(Insn { op: Op::Load, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn load_sext(&mut self, opnd: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::LoadSExt, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[opnd]); + self.push_insn(Insn { op: Op::LoadSExt, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn lshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::LShift, opnds: vec![opnd, shift], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[opnd, shift]); + self.push_insn(Insn { op: Op::LShift, opnds: vec![opnd, shift], out, text: None, target: None, pos_marker: None }); + out } pub fn mov(&mut self, dest: Opnd, src: Opnd) { @@ -1037,12 +1076,16 @@ impl Assembler { #[must_use] pub fn not(&mut self, opnd: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::Not, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[opnd]); + self.push_insn(Insn { op: Op::Not, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn or(&mut self, left: Opnd, right: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::Or, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[left, right]); + self.push_insn(Insn { op: Op::Or, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + out } //pub fn pos_marker(&mut self, marker_fn: F) @@ -1052,7 +1095,9 @@ impl Assembler { #[must_use] pub fn rshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::RShift, opnds: vec![opnd, shift], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[opnd, shift]); + self.push_insn(Insn { op: Op::RShift, opnds: vec![opnd, shift], out, text: None, target: None, pos_marker: None }); + out } pub fn store(&mut self, dest: Opnd, src: Opnd) { @@ -1061,7 +1106,9 @@ impl Assembler { #[must_use] pub fn sub(&mut self, left: Opnd, right: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::Sub, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[left, right]); + self.push_insn(Insn { op: Op::Sub, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + out } pub fn test(&mut self, left: Opnd, right: Opnd) { @@ -1070,11 +1117,15 @@ impl Assembler { #[must_use] pub fn urshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::URShift, opnds: vec![opnd, shift], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[opnd, shift]); + self.push_insn(Insn { op: Op::URShift, opnds: vec![opnd, shift], out, text: None, target: None, pos_marker: None }); + out } #[must_use] pub fn xor(&mut self, left: Opnd, right: Opnd) -> Opnd { - self.push_insn(Insn { op: Op::Xor, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }) + let out = self.next_opnd_out(&[left, right]); + self.push_insn(Insn { op: Op::Xor, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + out } } From c70d1471c1723f26ca54699f056887fe200c973e Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Wed, 17 Aug 2022 15:42:39 -0400 Subject: [PATCH 490/546] Only check lowest bit for _Bool type (https://github.com/Shopify/ruby/pull/412) * Only check lowest bit for _Bool type The `test AL, AL` got lost during porting and we were generating `test RAX, RAX` instead. The upper bits of a `_Bool` return type is unspecified and we were failing `TestClass#test_singleton_class_should_has_own_namespace` due to interpreterting the return value incorrectly. * Enable test_class for test-all on x86_64 --- yjit/src/codegen.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 2e202ce2d038ac..1399a92f142eeb 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5526,8 +5526,9 @@ fn gen_opt_getinlinecache( vec![inline_cache, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)] ); - // Check the result. _Bool is one byte in SysV. - asm.test(ret_val, ret_val); + // Check the result. SysV only specifies one byte for _Bool return values, + // so it's important we only check one bit to ignore the higher bits in the register. + asm.test(ret_val, 1.into()); asm.jz(counted_exit!(ocb, side_exit, opt_getinlinecache_miss).into()); let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); From b00606eb644e4ffb42b9267f7d81b352845a29ae Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 17 Aug 2022 16:08:41 -0400 Subject: [PATCH 491/546] Even more prep for instruction enum (https://github.com/Shopify/ruby/pull/413) * Mutate in place for register allocation Currently we allocate a new instruction every time when we're doing register allocation by first splitting up the instruction into its component parts, mapping the operands and the output, and then pushing all of its parts onto the new assembler. Since we don't need the old instruction, we can mutate the existing one in place. While it's not that big of a win in and of itself, it matches much more closely to what we're going to have to do when we switch the instruction from being a struct to being an enum, because it's much easier for the instruction to modify itself since it knows its own shape than it is to push a new instruction that very closely matches. * Mutate in place for arm64 split When we're splitting instructions for the arm64 backend, we map all of the operands for a given instruction when it has an Opnd::Value. We can do this in place with the existing operand instead of allocating a new vector each time. This enables us to pattern match against the entire instruction instead of just the opcode, which is much closer to matching against an enum. * Match against entire instruction in arm64_emit Instead of matching against the opcode and then accessing all of the various fields on the instruction when emitting bytecode for arm64, we should instead match against the entire instruction. This makes it much closer to what's going to happen when we switch it over to being an enum. * Match against entire instruction in x86_64 backend When we're splitting or emitting code for x86_64, we should match against the entire instruction instead of matching against just the opcode. This gets us closer to matching against an enum instead of a struct. * Reuse instructions for arm64_split When we're splitting, the default behavior was previously to split up the instruction into its component parts and then reassemble them in a new instruction. Instead, we can reuse the existing instruction. --- yjit/src/backend/arm64/mod.rs | 264 ++++++++++++++-------------- yjit/src/backend/ir.rs | 132 +++++++------- yjit/src/backend/x86_64/mod.rs | 311 +++++++++++++++++---------------- 3 files changed, 367 insertions(+), 340 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index d2693fee32b3c9..501e0a6138302a 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -186,29 +186,27 @@ impl Assembler let asm = &mut asm_local; let mut iterator = self.into_draining_iter(); - while let Some((index, insn)) = iterator.next_mapped() { + while let Some((index, mut insn)) = iterator.next_mapped() { // Here we're going to map the operands of the instruction to load // any Opnd::Value operands into registers if they are heap objects // such that only the Op::Load instruction needs to handle that // case. If the values aren't heap objects then we'll treat them as // if they were just unsigned integer. - let opnds: Vec = insn.opnds.into_iter().map(|opnd| { + for opnd in &mut insn.opnds { match opnd { Opnd::Value(value) => { if value.special_const_p() { - Opnd::UImm(value.as_u64()) - } else if insn.op == Op::Load { - opnd - } else { - asm.load(opnd) + *opnd = Opnd::UImm(value.as_u64()); + } else if insn.op != Op::Load { + *opnd = asm.load(*opnd); } }, - _ => opnd - } - }).collect(); + _ => {} + }; + } - match insn.op { - Op::Add => { + match insn { + Insn { op: Op::Add, opnds, .. } => { match (opnds[0], opnds[1]) { (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => { asm.add(opnds[0], opnds[1]); @@ -225,24 +223,24 @@ impl Assembler } } }, - Op::And | Op::Or | Op::Xor => { + Insn { op: Op::And | Op::Or | Op::Xor, opnds, target, text, pos_marker, .. } => { match (opnds[0], opnds[1]) { (Opnd::Reg(_), Opnd::Reg(_)) => { - asm.push_insn_parts(insn.op, vec![opnds[0], opnds[1]], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![opnds[0], opnds[1]], target, text, pos_marker); }, (reg_opnd @ Opnd::Reg(_), other_opnd) | (other_opnd, reg_opnd @ Opnd::Reg(_)) => { let opnd1 = split_bitmask_immediate(asm, other_opnd); - asm.push_insn_parts(insn.op, vec![reg_opnd, opnd1], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![reg_opnd, opnd1], target, text, pos_marker); }, _ => { let opnd0 = split_load_operand(asm, opnds[0]); let opnd1 = split_bitmask_immediate(asm, opnds[1]); - asm.push_insn_parts(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![opnd0, opnd1], target, text, pos_marker); } } }, - Op::CCall => { + Insn { op: Op::CCall, opnds, target, .. } => { assert!(opnds.len() <= C_ARG_OPNDS.len()); // For each of the operands we're going to first load them @@ -257,9 +255,9 @@ impl Assembler // Now we push the CCall without any arguments so that it // just performs the call. - asm.ccall(insn.target.unwrap().unwrap_fun_ptr(), vec![]); + asm.ccall(target.unwrap().unwrap_fun_ptr(), vec![]); }, - Op::Cmp => { + Insn { op: Op::Cmp, opnds, .. } => { let opnd0 = match opnds[0] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], _ => split_load_operand(asm, opnds[0]) @@ -268,15 +266,14 @@ impl Assembler let opnd1 = split_shifted_immediate(asm, opnds[1]); asm.cmp(opnd0, opnd1); }, - Op::CRet => { + Insn { op: Op::CRet, opnds, .. } => { if opnds[0] != Opnd::Reg(C_RET_REG) { let value = split_load_operand(asm, opnds[0]); asm.mov(C_RET_OPND, value); } asm.cret(C_RET_OPND); }, - Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | - Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE => { + Insn { op: Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE, opnds, target, text, pos_marker, .. } => { let new_opnds = opnds.into_iter().map(|opnd| { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, @@ -284,9 +281,9 @@ impl Assembler } }).collect(); - asm.push_insn_parts(insn.op, new_opnds, insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, new_opnds, target, text, pos_marker); }, - Op::IncrCounter => { + Insn { op: Op::IncrCounter, opnds, .. } => { // We'll use LDADD later which only works with registers // ... Load pointer into register let counter_addr = split_lea_operand(asm, opnds[0]); @@ -299,7 +296,7 @@ impl Assembler asm.incr_counter(counter_addr, addend); }, - Op::JmpOpnd => { + Insn { op: Op::JmpOpnd, opnds, .. } => { if let Opnd::Mem(_) = opnds[0] { let opnd0 = split_load_operand(asm, opnds[0]); asm.jmp_opnd(opnd0); @@ -307,10 +304,10 @@ impl Assembler asm.jmp_opnd(opnds[0]); } }, - Op::Load => { + Insn { op: Op::Load, opnds, .. } => { split_load_operand(asm, opnds[0]); }, - Op::LoadSExt => { + Insn { op: Op::LoadSExt, opnds, .. } => { match opnds[0] { // We only want to sign extend if the operand is a // register, instruction output, or memory address that @@ -326,7 +323,7 @@ impl Assembler } }; }, - Op::Mov => { + Insn { op: Op::Mov, opnds, .. } => { let value = match (opnds[0], opnds[1]) { // If the first operand is a memory operand, we're going // to transform this into a store instruction, so we'll @@ -353,7 +350,7 @@ impl Assembler _ => unreachable!() }; }, - Op::Not => { + Insn { op: Op::Not, opnds, .. } => { // The value that is being negated must be in a register, so // if we get anything else we need to load it first. let opnd0 = match opnds[0] { @@ -363,7 +360,7 @@ impl Assembler asm.not(opnd0); }, - Op::Store => { + Insn { op: Op::Store, opnds, .. } => { // The displacement for the STUR instruction can't be more // than 9 bits long. If it's longer, we need to load the // memory address into a register first. @@ -378,7 +375,7 @@ impl Assembler asm.store(opnd0, opnd1); }, - Op::Sub => { + Insn { op: Op::Sub, opnds, .. } => { let opnd0 = match opnds[0] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], _ => split_load_operand(asm, opnds[0]) @@ -387,7 +384,7 @@ impl Assembler let opnd1 = split_shifted_immediate(asm, opnds[1]); asm.sub(opnd0, opnd1); }, - Op::Test => { + Insn { op: Op::Test, opnds, .. } => { // The value being tested must be in a register, so if it's // not already one we'll load it first. let opnd0 = match opnds[0] { @@ -403,7 +400,10 @@ impl Assembler asm.test(opnd0, opnd1); }, _ => { - asm.push_insn_parts(insn.op, opnds, insn.target, insn.text, insn.pos_marker); + if insn.out.is_some() { + insn.out = asm.next_opnd_out(&insn.opnds); + } + asm.push_insn(insn); } }; @@ -569,23 +569,23 @@ impl Assembler // For each instruction for insn in &self.insns { - match insn.op { - Op::Comment => { + match insn { + Insn { op: Op::Comment, text, .. } => { if cfg!(feature = "asm_comments") { - cb.add_comment(&insn.text.as_ref().unwrap()); + cb.add_comment(text.as_ref().unwrap()); } }, - Op::Label => { - cb.write_label(insn.target.unwrap().unwrap_label_idx()); + Insn { op: Op::Label, target, .. } => { + cb.write_label(target.unwrap().unwrap_label_idx()); }, // Report back the current position in the generated code - Op::PosMarker => { + Insn { op: Op::PosMarker, pos_marker, .. } => { let pos = cb.get_write_ptr(); - let pos_marker_fn = insn.pos_marker.as_ref().unwrap(); + let pos_marker_fn = pos_marker.as_ref().unwrap(); pos_marker_fn(pos); } - Op::BakeString => { - let str = insn.text.as_ref().unwrap(); + Insn { op: Op::BakeString, text, .. } => { + let str = text.as_ref().unwrap(); for byte in str.as_bytes() { cb.write_byte(*byte); } @@ -600,65 +600,65 @@ impl Assembler cb.write_byte(0); } }, - Op::Add => { - adds(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::Add, opnds, out, .. } => { + adds(cb, (*out).into(), opnds[0].into(), opnds[1].into()); }, - Op::FrameSetup => { + Insn { op: Op::FrameSetup, .. } => { stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); // X29 (frame_pointer) = SP mov(cb, X29, C_SP_REG); }, - Op::FrameTeardown => { + Insn { op: Op::FrameTeardown, .. } => { // SP = X29 (frame pointer) mov(cb, C_SP_REG, X29); ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); }, - Op::Sub => { - subs(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::Sub, opnds, out, .. } => { + subs(cb, (*out).into(), opnds[0].into(), opnds[1].into()); }, - Op::And => { - and(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::And, opnds, out, .. } => { + and(cb, (*out).into(), opnds[0].into(), opnds[1].into()); }, - Op::Or => { - orr(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::Or, opnds, out, .. } => { + orr(cb, (*out).into(), opnds[0].into(), opnds[1].into()); }, - Op::Xor => { - eor(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::Xor, opnds, out, .. } => { + eor(cb, (*out).into(), opnds[0].into(), opnds[1].into()); }, - Op::Not => { - mvn(cb, insn.out.into(), insn.opnds[0].into()); + Insn { op: Op::Not, opnds, out, .. } => { + mvn(cb, (*out).into(), opnds[0].into()); }, - Op::RShift => { - asr(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::RShift, opnds, out, .. } => { + asr(cb, (*out).into(), opnds[0].into(), opnds[1].into()); }, - Op::URShift => { - lsr(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::URShift, opnds, out, .. } => { + lsr(cb, (*out).into(), opnds[0].into(), opnds[1].into()); }, - Op::LShift => { - lsl(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::LShift, opnds, out, .. } => { + lsl(cb, (*out).into(), opnds[0].into(), opnds[1].into()); }, - Op::Store => { + Insn { op: Op::Store, opnds, .. } => { // This order may be surprising but it is correct. The way // the Arm64 assembler works, the register that is going to // be stored is first and the address is second. However in // our IR we have the address first and the register second. - stur(cb, insn.opnds[1].into(), insn.opnds[0].into()); + stur(cb, opnds[1].into(), opnds[0].into()); }, - Op::Load => { - match insn.opnds[0] { + Insn { op: Op::Load, opnds, out, .. } => { + match opnds[0] { Opnd::Reg(_) | Opnd::InsnOut { .. } => { - mov(cb, insn.out.into(), insn.opnds[0].into()); + mov(cb, (*out).into(), opnds[0].into()); }, Opnd::UImm(uimm) => { - emit_load_value(cb, insn.out.into(), uimm); + emit_load_value(cb, (*out).into(), uimm); }, Opnd::Imm(imm) => { - emit_load_value(cb, insn.out.into(), imm as u64); + emit_load_value(cb, (*out).into(), imm as u64); }, Opnd::Mem(_) => { - ldur(cb, insn.out.into(), insn.opnds[0].into()); + ldur(cb, (*out).into(), opnds[0].into()); }, Opnd::Value(value) => { // We dont need to check if it's a special const @@ -670,7 +670,7 @@ impl Assembler // references to GC'd Value operands. If the value // being loaded is a heap object, we'll report that // back out to the gc_offsets list. - ldr_literal(cb, insn.out.into(), 2); + ldr_literal(cb, (*out).into(), 2); b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4)); cb.write_bytes(&value.as_u64().to_le_bytes()); @@ -682,29 +682,29 @@ impl Assembler } }; }, - Op::LoadSExt => { - match insn.opnds[0] { + Insn { op: Op::LoadSExt, opnds, out, .. } => { + match opnds[0] { Opnd::Reg(Reg { num_bits: 32, .. }) | Opnd::InsnOut { num_bits: 32, .. } => { - sxtw(cb, insn.out.into(), insn.opnds[0].into()); + sxtw(cb, (*out).into(), opnds[0].into()); }, Opnd::Mem(Mem { num_bits: 32, .. }) => { - ldursw(cb, insn.out.into(), insn.opnds[0].into()); + ldursw(cb, (*out).into(), opnds[0].into()); }, _ => unreachable!() }; }, - Op::Mov => { - mov(cb, insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::Mov, opnds, .. } => { + mov(cb, opnds[0].into(), opnds[1].into()); }, - Op::Lea => { - let opnd: A64Opnd = insn.opnds[0].into(); + Insn { op: Op::Lea, opnds, out, .. } => { + let opnd: A64Opnd = opnds[0].into(); match opnd { A64Opnd::Mem(mem) => { add( cb, - insn.out.into(), + (*out).into(), A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }), A64Opnd::new_imm(mem.disp.into()) ); @@ -714,25 +714,25 @@ impl Assembler } }; }, - Op::LeaLabel => { - let label_idx = insn.target.unwrap().unwrap_label_idx(); + Insn { op: Op::LeaLabel, out, target, .. } => { + let label_idx = target.unwrap().unwrap_label_idx(); cb.label_ref(label_idx, 4, |cb, end_addr, dst_addr| { adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); }); - mov(cb, insn.out.into(), Self::SCRATCH0); + mov(cb, (*out).into(), Self::SCRATCH0); }, - Op::CPush => { - emit_push(cb, insn.opnds[0].into()); + Insn { op: Op::CPush, opnds, .. } => { + emit_push(cb, opnds[0].into()); }, - Op::CPop => { - emit_pop(cb, insn.out.into()); + Insn { op: Op::CPop, out, .. } => { + emit_pop(cb, (*out).into()); }, - Op::CPopInto => { - emit_pop(cb, insn.opnds[0].into()); + Insn { op: Op::CPopInto, opnds, .. } => { + emit_pop(cb, opnds[0].into()); }, - Op::CPushAll => { + Insn { op: Op::CPushAll, .. } => { let regs = Assembler::get_caller_save_regs(); for reg in regs { @@ -743,7 +743,7 @@ impl Assembler mrs(cb, Self::SCRATCH0, SystemRegister::NZCV); emit_push(cb, Self::SCRATCH0); }, - Op::CPopAll => { + Insn { op: Op::CPopAll, .. } => { let regs = Assembler::get_caller_save_regs(); // Pop the state/flags register @@ -754,10 +754,10 @@ impl Assembler emit_pop(cb, A64Opnd::Reg(reg)); } }, - Op::CCall => { + Insn { op: Op::CCall, target, .. } => { // The offset to the call target in bytes let src_addr = cb.get_write_ptr().into_i64(); - let dst_addr = insn.target.unwrap().unwrap_fun_ptr() as i64; + let dst_addr = target.unwrap().unwrap_fun_ptr() as i64; let offset = dst_addr - src_addr; // The offset in instruction count for BL's immediate let offset = offset / 4; @@ -771,20 +771,20 @@ impl Assembler blr(cb, Self::SCRATCH0); } }, - Op::CRet => { + Insn { op: Op::CRet, .. } => { ret(cb, A64Opnd::None); }, - Op::Cmp => { - cmp(cb, insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::Cmp, opnds, .. } => { + cmp(cb, opnds[0].into(), opnds[1].into()); }, - Op::Test => { - tst(cb, insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::Test, opnds, .. } => { + tst(cb, opnds[0].into(), opnds[1].into()); }, - Op::JmpOpnd => { - br(cb, insn.opnds[0].into()); + Insn { op: Op::JmpOpnd, opnds, .. } => { + br(cb, opnds[0].into()); }, - Op::Jmp => { - match insn.target.unwrap() { + Insn { op: Op::Jmp, target, .. } => { + match target.unwrap() { Target::CodePtr(dst_ptr) => { let src_addr = cb.get_write_ptr().into_i64(); let dst_addr = dst_ptr.into_i64(); @@ -820,52 +820,52 @@ impl Assembler _ => unreachable!() }; }, - Op::Je => { - emit_conditional_jump::<{Condition::EQ}>(cb, insn.target.unwrap()); + Insn { op: Op::Je, target, .. } => { + emit_conditional_jump::<{Condition::EQ}>(cb, target.unwrap()); }, - Op::Jne => { - emit_conditional_jump::<{Condition::NE}>(cb, insn.target.unwrap()); + Insn { op: Op::Jne, target, .. } => { + emit_conditional_jump::<{Condition::NE}>(cb, target.unwrap()); }, - Op::Jl => { - emit_conditional_jump::<{Condition::LT}>(cb, insn.target.unwrap()); + Insn { op: Op::Jl, target, .. } => { + emit_conditional_jump::<{Condition::LT}>(cb, target.unwrap()); }, - Op::Jbe => { - emit_conditional_jump::<{Condition::LS}>(cb, insn.target.unwrap()); + Insn { op: Op::Jbe, target, .. } => { + emit_conditional_jump::<{Condition::LS}>(cb, target.unwrap()); }, - Op::Jz => { - emit_conditional_jump::<{Condition::EQ}>(cb, insn.target.unwrap()); + Insn { op: Op::Jz, target, .. } => { + emit_conditional_jump::<{Condition::EQ}>(cb, target.unwrap()); }, - Op::Jnz => { - emit_conditional_jump::<{Condition::NE}>(cb, insn.target.unwrap()); + Insn { op: Op::Jnz, target, .. } => { + emit_conditional_jump::<{Condition::NE}>(cb, target.unwrap()); }, - Op::Jo => { - emit_conditional_jump::<{Condition::VS}>(cb, insn.target.unwrap()); + Insn { op: Op::Jo, target, .. } => { + emit_conditional_jump::<{Condition::VS}>(cb, target.unwrap()); }, - Op::IncrCounter => { - ldaddal(cb, insn.opnds[1].into(), insn.opnds[1].into(), insn.opnds[0].into()); + Insn { op: Op::IncrCounter, opnds, .. } => { + ldaddal(cb, opnds[1].into(), opnds[1].into(), opnds[0].into()); }, - Op::Breakpoint => { + Insn { op: Op::Breakpoint, .. } => { brk(cb, A64Opnd::None); }, - Op::CSelZ | Op::CSelE => { - csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::EQ); + Insn { op: Op::CSelZ | Op::CSelE, opnds, out, .. } => { + csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::EQ); }, - Op::CSelNZ | Op::CSelNE => { - csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::NE); + Insn { op: Op::CSelNZ | Op::CSelNE, opnds, out, .. } => { + csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::NE); }, - Op::CSelL => { - csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::LT); + Insn { op: Op::CSelL, opnds, out, .. } => { + csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::LT); }, - Op::CSelLE => { - csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::LE); + Insn { op: Op::CSelLE, opnds, out, .. } => { + csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::LE); }, - Op::CSelG => { - csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::GT); + Insn { op: Op::CSelG, opnds, out, .. } => { + csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::GT); }, - Op::CSelGE => { - csel(cb, insn.out.into(), insn.opnds[0].into(), insn.opnds[1].into(), Condition::GE); + Insn { op: Op::CSelGE, opnds, out, .. } => { + csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::GE); } - Op::LiveReg => (), // just a reg alloc signal, no code + Insn { op: Op::LiveReg, .. } => (), // just a reg alloc signal, no code }; } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index d2b90c337370a1..985b8e0500ffef 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -286,16 +286,21 @@ impl Opnd } } - /// Get the size in bits for register/memory operands - pub fn rm_num_bits(&self) -> u8 { + /// Get the size in bits for this operand if there is one. + fn num_bits(&self) -> Option { match *self { - Opnd::Reg(reg) => reg.num_bits, - Opnd::Mem(mem) => mem.num_bits, - Opnd::InsnOut{ num_bits, .. } => num_bits, - _ => unreachable!() + Opnd::Reg(Reg { num_bits, .. }) => Some(num_bits), + Opnd::Mem(Mem { num_bits, .. }) => Some(num_bits), + Opnd::InsnOut { num_bits, .. } => Some(num_bits), + _ => None } } + /// Get the size in bits for register/memory operands. + pub fn rm_num_bits(&self) -> u8 { + self.num_bits().unwrap() + } + /// Maps the indices from a previous list of instructions to a new list of /// instructions. pub fn map_index(self, indices: &Vec) -> Opnd { @@ -309,6 +314,27 @@ impl Opnd _ => self } } + + /// Determine the size in bits of the slice of the given operands. If any of + /// them are different sizes this will panic. + fn match_num_bits(opnds: &[Opnd]) -> u8 { + let mut value: Option = None; + + for opnd in opnds { + if let Some(num_bits) = opnd.num_bits() { + match value { + None => { + value = Some(num_bits); + }, + Some(value) => { + assert_eq!(value, num_bits, "operands of incompatible sizes"); + } + }; + } + } + + value.unwrap_or(64) + } } impl From for Opnd { @@ -470,30 +496,10 @@ impl Assembler /// given slice of operands. The operands are given to determine the number /// of bits necessary for the output operand. They should all be the same /// size. - fn next_opnd_out(&self, opnds: &[Opnd]) -> Opnd { - let mut out_num_bits: Option = None; - - for opnd in opnds { - match opnd { - Opnd::InsnOut { num_bits, .. } | - Opnd::Mem(Mem { num_bits, .. }) | - Opnd::Reg(Reg { num_bits, .. }) => { - match out_num_bits { - None => { - out_num_bits = Some(*num_bits); - }, - Some(out_num_bits) => { - assert_eq!(out_num_bits, *num_bits, "operands of incompatible sizes"); - } - }; - } - _ => {} - } - } - + pub(super) fn next_opnd_out(&self, opnds: &[Opnd]) -> Opnd { Opnd::InsnOut { idx: self.insns.len(), - num_bits: out_num_bits.unwrap_or(64) + num_bits: Opnd::match_num_bits(opnds) } } @@ -619,14 +625,14 @@ impl Assembler let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); let mut iterator = self.into_draining_iter(); - while let Some((index, insn)) = iterator.next_unmapped() { + while let Some((index, mut insn)) = iterator.next_unmapped() { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. for opnd in &insn.opnds { match opnd { - Opnd::InsnOut{idx, .. } | - Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { + Opnd::InsnOut{ idx, .. } | + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { // Since we have an InsnOut, we know it spans more that one // instruction. let start_index = *idx; @@ -643,7 +649,6 @@ impl Assembler } } } - _ => {} } } @@ -655,12 +660,23 @@ impl Assembler // If this instruction is used by another instruction, // we need to allocate a register to it - let mut out_reg = Opnd::None; if live_ranges[index] != index { + // If we get to this point where the end of the live range is + // not equal to the index of the instruction, then it must be + // true that we set an output operand for this instruction. If + // it's not true, something has gone wrong. + assert!( + !matches!(insn.out, Opnd::None), + "Instruction output reused but no output operand set" + ); + + // This is going to be the output operand that we will set on + // the instruction. + let mut out_reg: Option = None; // C return values need to be mapped to the C return register if insn.op == Op::CCall { - out_reg = Opnd::Reg(take_reg(&mut pool, ®s, &C_RET_REG)) + out_reg = Some(take_reg(&mut pool, ®s, &C_RET_REG)); } // If this instruction's first operand maps to a register and @@ -672,50 +688,44 @@ impl Assembler if let Opnd::InsnOut{idx, ..} = insn.opnds[0] { if live_ranges[idx] == index { if let Opnd::Reg(reg) = asm.insns[idx].out { - out_reg = Opnd::Reg(take_reg(&mut pool, ®s, ®)) + out_reg = Some(take_reg(&mut pool, ®s, ®)); } } } } - // Allocate a new register for this instruction - if out_reg == Opnd::None { + // Allocate a new register for this instruction if one is not + // already allocated. + if out_reg.is_none() { out_reg = if insn.op == Op::LiveReg { // Allocate a specific register let reg = insn.opnds[0].unwrap_reg(); - Opnd::Reg(take_reg(&mut pool, ®s, ®)) + Some(take_reg(&mut pool, ®s, ®)) } else { - Opnd::Reg(alloc_reg(&mut pool, ®s)) - } + Some(alloc_reg(&mut pool, ®s)) + }; } + + // Set the output operand on the instruction + let out_num_bits = Opnd::match_num_bits(&insn.opnds); + insn.out = Opnd::Reg(out_reg.unwrap().sub_reg(out_num_bits)); } // Replace InsnOut operands by their corresponding register - let reg_opnds: Vec = insn.opnds.into_iter().map(|opnd| - match opnd { - Opnd::InsnOut{idx, ..} => asm.insns[idx].out, + for opnd in &mut insn.opnds { + match *opnd { + Opnd::InsnOut { idx, .. } => { + *opnd = asm.insns[idx].out; + }, Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { - let out_reg = asm.insns[idx].out.unwrap_reg(); - Opnd::Mem(Mem { - base: MemBase::Reg(out_reg.reg_no), - disp, - num_bits - }) + let base = MemBase::Reg(asm.insns[idx].out.unwrap_reg().reg_no); + *opnd = Opnd::Mem(Mem { base, disp, num_bits }); } - _ => opnd, + _ => {}, } - ).collect(); - - asm.push_insn_parts(insn.op, reg_opnds, insn.target, insn.text, insn.pos_marker); - - // Set the output register for this instruction - let num_insns = asm.insns.len(); - let mut new_insn = &mut asm.insns[num_insns - 1]; - if let Opnd::Reg(reg) = out_reg { - let num_out_bits = new_insn.out.rm_num_bits(); - out_reg = Opnd::Reg(reg.sub_reg(num_out_bits)) } - new_insn.out = out_reg; + + asm.push_insn(insn); } assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 65e2206c490175..1c07bd54028fbe 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -122,7 +122,7 @@ impl Assembler // - Most instructions can't be encoded with 64-bit immediates. // - We look for Op::Load specifically when emiting to keep GC'ed // VALUEs alive. This is a sort of canonicalization. - let opnds: Vec = insn.opnds.iter().map(|opnd| { + let mapped_opnds: Vec = insn.opnds.iter().map(|opnd| { if insn.op == Op::Load { iterator.map_opnd(*opnd) } else if let Opnd::Value(value) = opnd { @@ -138,129 +138,128 @@ impl Assembler } }).collect(); - match insn.op { - Op::Add | Op::Sub | Op::And | Op::Cmp | Op::Or | Op::Test | Op::Xor => { - let (opnd0, opnd1) = match (insn.opnds[0], insn.opnds[1]) { + match insn { + Insn { op: Op::Add | Op::Sub | Op::And | Op::Cmp | Op::Or | Op::Test | Op::Xor, opnds, target, text, pos_marker, .. } => { + let (opnd0, opnd1) = match (opnds[0], opnds[1]) { (Opnd::Mem(_), Opnd::Mem(_)) => { - (asm.load(opnds[0]), asm.load(opnds[1])) + (asm.load(mapped_opnds[0]), asm.load(mapped_opnds[1])) }, (Opnd::Mem(_), Opnd::UImm(value)) => { // 32-bit values will be sign-extended if imm_num_bits(value as i64) > 32 { - (asm.load(opnds[0]), asm.load(opnds[1])) + (asm.load(mapped_opnds[0]), asm.load(mapped_opnds[1])) } else { - (asm.load(opnds[0]), opnds[1]) + (asm.load(mapped_opnds[0]), mapped_opnds[1]) } }, (Opnd::Mem(_), Opnd::Imm(value)) => { if imm_num_bits(value) > 32 { - (asm.load(opnds[0]), asm.load(opnds[1])) + (asm.load(mapped_opnds[0]), asm.load(mapped_opnds[1])) } else { - (asm.load(opnds[0]), opnds[1]) + (asm.load(mapped_opnds[0]), mapped_opnds[1]) } }, // Instruction output whose live range spans beyond this instruction (Opnd::InsnOut { idx, .. }, _) => { if live_ranges[idx] > index { - (asm.load(opnds[0]), opnds[1]) + (asm.load(mapped_opnds[0]), mapped_opnds[1]) } else { - (opnds[0], opnds[1]) + (mapped_opnds[0], mapped_opnds[1]) } }, // We have to load memory operands to avoid corrupting them (Opnd::Mem(_) | Opnd::Reg(_), _) => { - (asm.load(opnds[0]), opnds[1]) + (asm.load(mapped_opnds[0]), mapped_opnds[1]) }, - _ => (opnds[0], opnds[1]) + _ => (mapped_opnds[0], mapped_opnds[1]) }; - asm.push_insn_parts(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![opnd0, opnd1], target, text, pos_marker); }, // These instructions modify their input operand in-place, so we // may need to load the input value to preserve it - Op::LShift | Op::RShift | Op::URShift => { - let (opnd0, opnd1) = match (insn.opnds[0], insn.opnds[1]) { + Insn { op: Op::LShift | Op::RShift | Op::URShift, opnds, target, text, pos_marker, .. } => { + let (opnd0, opnd1) = match (opnds[0], opnds[1]) { // Instruction output whose live range spans beyond this instruction (Opnd::InsnOut { idx, .. }, _) => { if live_ranges[idx] > index { - (asm.load(opnds[0]), opnds[1]) + (asm.load(mapped_opnds[0]), mapped_opnds[1]) } else { - (opnds[0], opnds[1]) + (mapped_opnds[0], mapped_opnds[1]) } }, // We have to load memory operands to avoid corrupting them (Opnd::Mem(_) | Opnd::Reg(_), _) => { - (asm.load(opnds[0]), opnds[1]) + (asm.load(mapped_opnds[0]), mapped_opnds[1]) }, - _ => (opnds[0], opnds[1]) + _ => (mapped_opnds[0], mapped_opnds[1]) }; - asm.push_insn_parts(insn.op, vec![opnd0, opnd1], insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, vec![opnd0, opnd1], target, text, pos_marker); }, - Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | - Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE => { - let new_opnds = opnds.into_iter().map(|opnd| { + Insn { op: Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE, target, text, pos_marker, .. } => { + let new_opnds = mapped_opnds.into_iter().map(|opnd| { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, _ => asm.load(opnd) } }).collect(); - asm.push_insn_parts(insn.op, new_opnds, insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, new_opnds, target, text, pos_marker); }, - Op::Mov => { - match (opnds[0], opnds[1]) { + Insn { op: Op::Mov, .. } => { + match (mapped_opnds[0], mapped_opnds[1]) { (Opnd::Mem(_), Opnd::Mem(_)) => { // We load opnd1 because for mov, opnd0 is the output - let opnd1 = asm.load(opnds[1]); - asm.mov(opnds[0], opnd1); + let opnd1 = asm.load(mapped_opnds[1]); + asm.mov(mapped_opnds[0], opnd1); }, (Opnd::Mem(_), Opnd::UImm(value)) => { // 32-bit values will be sign-extended if imm_num_bits(value as i64) > 32 { - let opnd1 = asm.load(opnds[1]); - asm.mov(opnds[0], opnd1); + let opnd1 = asm.load(mapped_opnds[1]); + asm.mov(mapped_opnds[0], opnd1); } else { - asm.mov(opnds[0], opnds[1]); + asm.mov(mapped_opnds[0], mapped_opnds[1]); } }, (Opnd::Mem(_), Opnd::Imm(value)) => { if imm_num_bits(value) > 32 { - let opnd1 = asm.load(opnds[1]); - asm.mov(opnds[0], opnd1); + let opnd1 = asm.load(mapped_opnds[1]); + asm.mov(mapped_opnds[0], opnd1); } else { - asm.mov(opnds[0], opnds[1]); + asm.mov(mapped_opnds[0], mapped_opnds[1]); } }, _ => { - asm.mov(opnds[0], opnds[1]); + asm.mov(mapped_opnds[0], mapped_opnds[1]); } } }, - Op::Not => { - let opnd0 = match insn.opnds[0] { + Insn { op: Op::Not, opnds, .. } => { + let opnd0 = match opnds[0] { // If we have an instruction output whose live range // spans beyond this instruction, we have to load it. Opnd::InsnOut { idx, .. } => { if live_ranges[idx] > index { - asm.load(opnds[0]) + asm.load(mapped_opnds[0]) } else { - opnds[0] + mapped_opnds[0] } }, // We have to load memory and register operands to avoid // corrupting them. Opnd::Mem(_) | Opnd::Reg(_) => { - asm.load(opnds[0]) + asm.load(mapped_opnds[0]) }, // Otherwise we can just reuse the existing operand. - _ => opnds[0] + _ => mapped_opnds[0] }; asm.not(opnd0); }, _ => { - asm.push_insn_parts(insn.op, opnds, insn.target, insn.text, insn.pos_marker); + asm.push_insn_parts(insn.op, mapped_opnds, insn.target, insn.text, insn.pos_marker); } }; @@ -280,27 +279,27 @@ impl Assembler // For each instruction for insn in &self.insns { - match insn.op { - Op::Comment => { + match insn { + Insn { op: Op::Comment, text, .. } => { if cfg!(feature = "asm_comments") { - cb.add_comment(&insn.text.as_ref().unwrap()); + cb.add_comment(text.as_ref().unwrap()); } }, // Write the label at the current position - Op::Label => { - cb.write_label(insn.target.unwrap().unwrap_label_idx()); + Insn { op: Op::Label, target, .. } => { + cb.write_label(target.unwrap().unwrap_label_idx()); }, // Report back the current position in the generated code - Op::PosMarker => { + Insn { op: Op::PosMarker, pos_marker, .. } => { let pos = cb.get_write_ptr(); - let pos_marker_fn = insn.pos_marker.as_ref().unwrap(); + let pos_marker_fn = pos_marker.as_ref().unwrap(); pos_marker_fn(pos); - } + }, - Op::BakeString => { - for byte in insn.text.as_ref().unwrap().as_bytes() { + Insn { op: Op::BakeString, text, .. } => { + for byte in text.as_ref().unwrap().as_bytes() { cb.write_byte(*byte); } @@ -309,53 +308,55 @@ impl Assembler cb.write_byte(0); }, - Op::Add => { - add(cb, insn.opnds[0].into(), insn.opnds[1].into()) + Insn { op: Op::Add, opnds, .. } => { + add(cb, opnds[0].into(), opnds[1].into()) }, - Op::FrameSetup => {}, - Op::FrameTeardown => {}, + Insn { op: Op::FrameSetup, .. } => {}, + Insn { op: Op::FrameTeardown, .. } => {}, - Op::Sub => { - sub(cb, insn.opnds[0].into(), insn.opnds[1].into()) + Insn { op: Op::Sub, opnds, .. } => { + sub(cb, opnds[0].into(), opnds[1].into()) }, - Op::And => { - and(cb, insn.opnds[0].into(), insn.opnds[1].into()) + Insn { op: Op::And, opnds, .. } => { + and(cb, opnds[0].into(), opnds[1].into()) }, - Op::Or => { - or(cb, insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::Or, opnds, .. } => { + or(cb, opnds[0].into(), opnds[1].into()); }, - Op::Xor => { - xor(cb, insn.opnds[0].into(), insn.opnds[1].into()); + Insn { op: Op::Xor, opnds, .. } => { + xor(cb, opnds[0].into(), opnds[1].into()); }, - Op::Not => { - not(cb, insn.opnds[0].into()) + Insn { op: Op::Not, opnds, .. } => { + not(cb, opnds[0].into()); }, - Op::LShift => { - shl(cb, insn.opnds[0].into(), insn.opnds[1].into()) + Insn { op: Op::LShift, opnds, .. } => { + shl(cb, opnds[0].into(), opnds[1].into()) }, - Op::RShift => { - sar(cb, insn.opnds[0].into(), insn.opnds[1].into()) + Insn { op: Op::RShift, opnds, .. } => { + sar(cb, opnds[0].into(), opnds[1].into()) }, - Op::URShift => { - shr(cb, insn.opnds[0].into(), insn.opnds[1].into()) + Insn { op: Op::URShift, opnds, .. } => { + shr(cb, opnds[0].into(), opnds[1].into()) }, - Op::Store => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), + Insn { op: Op::Store, opnds, .. } => { + mov(cb, opnds[0].into(), opnds[1].into()); + }, // This assumes only load instructions can contain references to GC'd Value operands - Op::Load => { - mov(cb, insn.out.into(), insn.opnds[0].into()); + Insn { op: Op::Load, opnds, out, .. } => { + mov(cb, (*out).into(), opnds[0].into()); // If the value being loaded is a heap object - if let Opnd::Value(val) = insn.opnds[0] { + if let Opnd::Value(val) = opnds[0] { if !val.special_const_p() { // The pointer immediate is encoded as the last part of the mov written out let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); @@ -364,35 +365,45 @@ impl Assembler } }, - Op::LoadSExt => { - movsx(cb, insn.out.into(), insn.opnds[0].into()) + Insn { op: Op::LoadSExt, opnds, out, .. } => { + movsx(cb, (*out).into(), opnds[0].into()); }, - Op::Mov => mov(cb, insn.opnds[0].into(), insn.opnds[1].into()), + Insn { op: Op::Mov, opnds, .. } => { + mov(cb, opnds[0].into(), opnds[1].into()); + }, // Load effective address - Op::Lea => lea(cb, insn.out.into(), insn.opnds[0].into()), + Insn { op: Op::Lea, opnds, out, .. } => { + lea(cb, (*out).into(), opnds[0].into()); + }, // Load relative address - Op::LeaLabel => { - let label_idx = insn.target.unwrap().unwrap_label_idx(); + Insn { op: Op::LeaLabel, out, target, .. } => { + let label_idx = target.unwrap().unwrap_label_idx(); cb.label_ref(label_idx, 7, |cb, src_addr, dst_addr| { let disp = dst_addr - src_addr; lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); }); - mov(cb, insn.out.into(), Self::SCRATCH0); + mov(cb, (*out).into(), Self::SCRATCH0); }, // Push and pop to/from the C stack - Op::CPush => push(cb, insn.opnds[0].into()), - Op::CPop => pop(cb, insn.out.into()), - Op::CPopInto => pop(cb, insn.opnds[0].into()), + Insn { op: Op::CPush, opnds, .. } => { + push(cb, opnds[0].into()); + }, + Insn { op: Op::CPop, out, .. } => { + pop(cb, (*out).into()); + }, + Insn { op: Op::CPopInto, opnds, .. } => { + pop(cb, opnds[0].into()); + }, // Push and pop to the C stack all caller-save registers and the // flags - Op::CPushAll => { + Insn { op: Op::CPushAll, .. } => { let regs = Assembler::get_caller_save_regs(); for reg in regs { @@ -400,7 +411,7 @@ impl Assembler } pushfq(cb); }, - Op::CPopAll => { + Insn { op: Op::CPopAll, .. } => { let regs = Assembler::get_caller_save_regs(); popfq(cb); @@ -410,95 +421,101 @@ impl Assembler }, // C function call - Op::CCall => { + Insn { op: Op::CCall, opnds, target, .. } => { // Temporary - assert!(insn.opnds.len() <= _C_ARG_OPNDS.len()); + assert!(opnds.len() <= _C_ARG_OPNDS.len()); // For each operand - for (idx, opnd) in insn.opnds.iter().enumerate() { - mov(cb, X86Opnd::Reg(_C_ARG_OPNDS[idx].unwrap_reg()), insn.opnds[idx].into()); + for (idx, opnd) in opnds.iter().enumerate() { + mov(cb, X86Opnd::Reg(_C_ARG_OPNDS[idx].unwrap_reg()), opnds[idx].into()); } - let ptr = insn.target.unwrap().unwrap_fun_ptr(); + let ptr = target.unwrap().unwrap_fun_ptr(); call_ptr(cb, RAX, ptr); }, - Op::CRet => { + Insn { op: Op::CRet, opnds, .. } => { // TODO: bias allocation towards return register - if insn.opnds[0] != Opnd::Reg(C_RET_REG) { - mov(cb, RAX, insn.opnds[0].into()); + if opnds[0] != Opnd::Reg(C_RET_REG) { + mov(cb, RAX, opnds[0].into()); } ret(cb); - } + }, // Compare - Op::Cmp => cmp(cb, insn.opnds[0].into(), insn.opnds[1].into()), + Insn { op: Op::Cmp, opnds, .. } => { + cmp(cb, opnds[0].into(), opnds[1].into()); + } // Test and set flags - Op::Test => test(cb, insn.opnds[0].into(), insn.opnds[1].into()), + Insn { op: Op::Test, opnds, .. } => { + test(cb, opnds[0].into(), opnds[1].into()); + } - Op::JmpOpnd => jmp_rm(cb, insn.opnds[0].into()), + Insn { op: Op::JmpOpnd, opnds, .. } => { + jmp_rm(cb, opnds[0].into()); + } // Conditional jump to a label - Op::Jmp => { - match insn.target.unwrap() { + Insn { op: Op::Jmp, target, .. } => { + match target.unwrap() { Target::CodePtr(code_ptr) => jmp_ptr(cb, code_ptr), Target::Label(label_idx) => jmp_label(cb, label_idx), _ => unreachable!() } } - Op::Je => { - match insn.target.unwrap() { + Insn { op: Op::Je, target, .. } => { + match target.unwrap() { Target::CodePtr(code_ptr) => je_ptr(cb, code_ptr), Target::Label(label_idx) => je_label(cb, label_idx), _ => unreachable!() } } - Op::Jne => { - match insn.target.unwrap() { + Insn { op: Op::Jne, target, .. } => { + match target.unwrap() { Target::CodePtr(code_ptr) => jne_ptr(cb, code_ptr), Target::Label(label_idx) => jne_label(cb, label_idx), _ => unreachable!() } } - Op::Jl => { - match insn.target.unwrap() { + Insn { op: Op::Jl, target, .. } => { + match target.unwrap() { Target::CodePtr(code_ptr) => jl_ptr(cb, code_ptr), Target::Label(label_idx) => jl_label(cb, label_idx), _ => unreachable!() } }, - Op::Jbe => { - match insn.target.unwrap() { + Insn { op: Op::Jbe, target, .. } => { + match target.unwrap() { Target::CodePtr(code_ptr) => jbe_ptr(cb, code_ptr), Target::Label(label_idx) => jbe_label(cb, label_idx), _ => unreachable!() } }, - Op::Jz => { - match insn.target.unwrap() { + Insn { op: Op::Jz, target, .. } => { + match target.unwrap() { Target::CodePtr(code_ptr) => jz_ptr(cb, code_ptr), Target::Label(label_idx) => jz_label(cb, label_idx), _ => unreachable!() } } - Op::Jnz => { - match insn.target.unwrap() { + Insn { op: Op::Jnz, target, .. } => { + match target.unwrap() { Target::CodePtr(code_ptr) => jnz_ptr(cb, code_ptr), Target::Label(label_idx) => jnz_label(cb, label_idx), _ => unreachable!() } } - Op::Jo => { - match insn.target.unwrap() { + Insn { op: Op::Jo, target, .. } => { + match target.unwrap() { Target::CodePtr(code_ptr) => jo_ptr(cb, code_ptr), Target::Label(label_idx) => jo_label(cb, label_idx), _ => unreachable!() @@ -506,48 +523,48 @@ impl Assembler } // Atomically increment a counter at a given memory location - Op::IncrCounter => { - assert!(matches!(insn.opnds[0], Opnd::Mem(_))); - assert!(matches!(insn.opnds[1], Opnd::UImm(_) | Opnd::Imm(_) ) ); + Insn { op: Op::IncrCounter, opnds, .. } => { + assert!(matches!(opnds[0], Opnd::Mem(_))); + assert!(matches!(opnds[1], Opnd::UImm(_) | Opnd::Imm(_) ) ); write_lock_prefix(cb); - add(cb, insn.opnds[0].into(), insn.opnds[1].into()); + add(cb, opnds[0].into(), opnds[1].into()); }, - Op::Breakpoint => int3(cb), + Insn { op: Op::Breakpoint, .. } => int3(cb), - Op::CSelZ => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - cmovnz(cb, insn.out.into(), insn.opnds[1].into()); + Insn { op: Op::CSelZ, opnds, out, .. } => { + mov(cb, (*out).into(), opnds[0].into()); + cmovnz(cb, (*out).into(), opnds[1].into()); }, - Op::CSelNZ => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - cmovz(cb, insn.out.into(), insn.opnds[1].into()); + Insn { op: Op::CSelNZ, opnds, out, .. } => { + mov(cb, (*out).into(), opnds[0].into()); + cmovz(cb, (*out).into(), opnds[1].into()); }, - Op::CSelE => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - cmovne(cb, insn.out.into(), insn.opnds[1].into()); + Insn { op: Op::CSelE, opnds, out, .. } => { + mov(cb, (*out).into(), opnds[0].into()); + cmovne(cb, (*out).into(), opnds[1].into()); }, - Op::CSelNE => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - cmove(cb, insn.out.into(), insn.opnds[1].into()); + Insn { op: Op::CSelNE, opnds, out, .. } => { + mov(cb, (*out).into(), opnds[0].into()); + cmove(cb, (*out).into(), opnds[1].into()); }, - Op::CSelL => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - cmovge(cb, insn.out.into(), insn.opnds[1].into()); + Insn { op: Op::CSelL, opnds, out, .. } => { + mov(cb, (*out).into(), opnds[0].into()); + cmovge(cb, (*out).into(), opnds[1].into()); }, - Op::CSelLE => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - cmovg(cb, insn.out.into(), insn.opnds[1].into()); + Insn { op: Op::CSelLE, opnds, out, .. } => { + mov(cb, (*out).into(), opnds[0].into()); + cmovg(cb, (*out).into(), opnds[1].into()); }, - Op::CSelG => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - cmovle(cb, insn.out.into(), insn.opnds[1].into()); + Insn { op: Op::CSelG, opnds, out, .. } => { + mov(cb, (*out).into(), opnds[0].into()); + cmovle(cb, (*out).into(), opnds[1].into()); }, - Op::CSelGE => { - mov(cb, insn.out.into(), insn.opnds[0].into()); - cmovl(cb, insn.out.into(), insn.opnds[1].into()); + Insn { op: Op::CSelGE, opnds, out, .. } => { + mov(cb, (*out).into(), opnds[0].into()); + cmovl(cb, (*out).into(), opnds[1].into()); } - Op::LiveReg => (), // just a reg alloc signal, no code + Insn { op: Op::LiveReg, .. } => (), // just a reg alloc signal, no code // We want to keep the panic here because some instructions that // we feed to the backend could get lowered into other From e0e63b1a0142968e99c8a973907092b10f0d9b4b Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 17 Aug 2022 13:43:00 -0700 Subject: [PATCH 492/546] Fix a bus error on regenerate_branch (https://github.com/Shopify/ruby/pull/408) * Fix a bus error on regenerate_branch * Fix pad_size --- yjit/src/backend/arm64/mod.rs | 30 +++++++++++++++++++++++------- yjit/src/backend/ir.rs | 7 +++++++ yjit/src/backend/x86_64/mod.rs | 8 ++++++++ yjit/src/codegen.rs | 7 +++++-- yjit/src/core.rs | 22 +++++++++------------- 5 files changed, 52 insertions(+), 22 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 501e0a6138302a..c1d8b773f1ed4a 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -440,39 +440,46 @@ impl Assembler /// Emit the required instructions to load the given value into the /// given register. Our goal here is to use as few instructions as /// possible to get this value into the register. - fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) { + fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> i32 { let mut current = value; if current <= 0xffff { // If the value fits into a single movz // instruction, then we'll use that. movz(cb, rd, A64Opnd::new_uimm(current), 0); + return 1; } else if BitmaskImmediate::try_from(current).is_ok() { // Otherwise, if the immediate can be encoded // with the special bitmask immediate encoding, // we'll use that. mov(cb, rd, A64Opnd::new_uimm(current)); + return 1; } else { // Finally we'll fall back to encoding the value // using movz for the first 16 bits and movk for // each subsequent set of 16 bits as long we // they are necessary. movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); + let mut num_insns = 1; // (We're sure this is necessary since we // checked if it only fit into movz above). current >>= 16; movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); + num_insns += 1; if current > 0xffff { current >>= 16; movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); + num_insns += 1; } if current > 0xffff { current >>= 16; movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); + num_insns += 1; } + return num_insns; } } @@ -495,8 +502,11 @@ impl Assembler // next instruction that perform the direct jump. b(cb, A64Opnd::new_imm(2i64 + emit_load_size(dst_addr) as i64)); - emit_load_value(cb, Assembler::SCRATCH0, dst_addr); + let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr); br(cb, Assembler::SCRATCH0); + for _ in num_insns..4 { + nop(cb); + } /* // If the jump offset fits into the conditional jump as an @@ -568,6 +578,7 @@ impl Assembler let mut gc_offsets: Vec = Vec::new(); // For each instruction + let start_write_pos = cb.get_write_pos(); for insn in &self.insns { match insn { Insn { op: Op::Comment, text, .. } => { @@ -800,11 +811,10 @@ impl Assembler // branch instruction. Otherwise, we'll move the // destination into a register and use the branch // register instruction. - if b_offset_fits_bits(offset) { - b(cb, A64Opnd::new_imm(offset)); - } else { - emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); - br(cb, Self::SCRATCH0); + let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); + br(cb, Self::SCRATCH0); + for _ in num_insns..4 { + nop(cb); } }, Target::Label(label_idx) => { @@ -866,6 +876,12 @@ impl Assembler csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::GE); } Insn { op: Op::LiveReg, .. } => (), // just a reg alloc signal, no code + Insn { op: Op::PadEntryExit, .. } => { + let jmp_len = 5 * 4; // Op::Jmp may emit 5 instructions + while (cb.get_write_pos() - start_write_pos) < jmp_len { + nop(cb); + } + } }; } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 985b8e0500ffef..db2bc7622c08cd 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -168,6 +168,9 @@ pub enum Op /// Take a specific register. Signal the register allocator to not use it. LiveReg, + + /// Pad nop instructions to accomodate Op::Jmp in case the block is invalidated. + PadEntryExit, } // Memory operand base @@ -1138,4 +1141,8 @@ impl Assembler { self.push_insn(Insn { op: Op::Xor, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); out } + + pub fn pad_entry_exit(&mut self) { + self.push_insn(Insn { op: Op::PadEntryExit, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 1c07bd54028fbe..f80e06ba9bf605 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -278,6 +278,7 @@ impl Assembler let mut gc_offsets: Vec = Vec::new(); // For each instruction + let start_write_pos = cb.get_write_pos(); for insn in &self.insns { match insn { Insn { op: Op::Comment, text, .. } => { @@ -565,6 +566,13 @@ impl Assembler cmovl(cb, (*out).into(), opnds[1].into()); } Insn { op: Op::LiveReg, .. } => (), // just a reg alloc signal, no code + Insn { op: Op::PadEntryExit, .. } => { + // We assume that our Op::Jmp usage that gets invalidated is <= 5 + let code_size: u32 = (cb.get_write_pos() - start_write_pos).try_into().unwrap(); + if code_size < 5 { + nop(cb, 5 - code_size); + } + } // We want to keep the panic here because some instructions that // we feed to the backend could get lowered into other diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 1399a92f142eeb..c6e69e0ad22749 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -843,11 +843,14 @@ pub fn gen_single_block( // Finish filling out the block { + let mut block = jit.block.borrow_mut(); + if block.entry_exit.is_some() { + asm.pad_entry_exit(); + } + // Compile code into the code block let gc_offsets = asm.compile(cb); - let mut block = jit.block.borrow_mut(); - // Add the GC offsets to the block for offset in gc_offsets { block.add_gc_obj_offset(offset) diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 354615db253472..7d07918228ab13 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1420,9 +1420,6 @@ fn gen_block_series_body( .incoming .push(last_branchref.clone()); - // This block should immediately follow the last branch - assert!(new_blockref.borrow().start_addr == last_branch.end_addr); - // Track the block batch.push(new_blockref.clone()); @@ -2078,8 +2075,10 @@ pub fn invalidate_block_version(blockref: &BlockRef) { asm.compile(&mut cb); assert!( - cb.get_write_ptr() < block_end, - "invalidation wrote past end of block" + cb.get_write_ptr() <= block_end, + "invalidation wrote past end of block (code_size: {:?}, new_size: {})", + block.code_size(), + cb.get_write_ptr().into_i64() - block_start.into_i64(), ); cb.set_write_ptr(cur_pos); } @@ -2133,17 +2132,14 @@ pub fn invalidate_block_version(blockref: &BlockRef) { } // Rewrite the branch with the new jump target address + let branch_end_addr = branch.end_addr; regenerate_branch(cb, &mut branch); if target_next && branch.end_addr > block.end_addr { - dbg!( - branch.block.borrow().blockid.idx, - block.blockid.idx, - branch.end_addr, - block.end_addr, - block.code_size() - ); - panic!("yjit invalidate rewrote branch past end of invalidated block"); + panic!("yjit invalidate rewrote branch past end of invalidated block: {:?} (code_size: {})", branch, block.code_size()); + } + if !target_next && branch.end_addr > branch_end_addr { + panic!("invalidated branch grew in size: {:?}", branch); } } From a78bbef12feac4d4f5eca98718cf7418c1b8e584 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 17 Aug 2022 16:23:21 -0700 Subject: [PATCH 493/546] Use VALUE for block_iseq (https://github.com/Shopify/ruby/pull/417) Co-authored-by: Alan Wu --- yjit/src/codegen.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index c6e69e0ad22749..5be0f0f749610c 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3958,7 +3958,7 @@ fn gen_send_cfunc( // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases // with cfp->block_code. - asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), Opnd::UImm(block_iseq as u64)); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); } // Increment the stack pointer by 3 (in the callee) From 5114ddce3f3475aba8bead85f5fe9db774a4cc14 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 18 Aug 2022 07:49:27 -0700 Subject: [PATCH 494/546] Avoid marking op_type on gen_defined (https://github.com/Shopify/ruby/pull/419) --- .cirrus.yml | 2 +- yjit/src/codegen.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 35f908df048f75..21f3e386513bf3 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -170,6 +170,6 @@ yjit_task: make_rdoc_script: source $HOME/.cargo/env && make -j rdoc # Run John's YJIT instruction tests, and make sure we can load the test-all runner - test_yjit_script: source $HOME/.cargo/env && make test-all TESTS='test/ruby/test_yjit.rb' RUN_OPTS="--yjit-call-threshold=1" + test_yjit_script: source $HOME/.cargo/env && make -j test-all TESTS='test/ruby/test_method.rb test/ruby/test_yjit.rb' RUN_OPTS="--yjit-call-threshold=1" # TODO: check that we can we run all of test-all successfully diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 5be0f0f749610c..01ce65deb15dc8 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2186,7 +2186,7 @@ fn gen_defined( asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let op_type = jit_get_arg(jit, 0); + let op_type = jit_get_arg(jit, 0).as_u64(); let obj = jit_get_arg(jit, 1); let pushval = jit_get_arg(jit, 2); From 342459576d4dc57a4c2e92c95c5ff225bf9df763 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 18 Aug 2022 11:00:58 -0400 Subject: [PATCH 495/546] Use VALUE for callinfos that are on the heap (https://github.com/Shopify/ruby/pull/420) Yet another case of `jit_mov_gc_ptr()` being yanked out during the transition to the new backend, causing a crash after object movement. The intresting wrinkle with this one is that not all callinfos are GC'ed objects, so the old code had an implicit assumption. https://github.com/ruby/ruby/blob/b0b9f7201acab05c2a3ad92c3043a1f01df3e17f/yjit/src/codegen.rs#L4087-L4095 --- yjit/src/codegen.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 01ce65deb15dc8..b94ddc32d557da 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -4033,8 +4033,11 @@ fn gen_send_cfunc( if !kw_arg.is_null() { // Build a hash from all kwargs passed asm.comment("build_kwhash"); + let imemo_ci = VALUE(ci as usize); + assert_ne!(0, unsafe { rb_IMEMO_TYPE_P(imemo_ci, imemo_callinfo) }, + "we assume all callinfos with kwargs are on the GC heap"); let sp = asm.lea(ctx.sp_opnd(0)); - let kwargs = asm.ccall(build_kwhash as *const u8, vec![Opnd::UImm(ci as u64), sp]); + let kwargs = asm.ccall(build_kwhash as *const u8, vec![imemo_ci.into(), sp]); // Replace the stack location at the start of kwargs with the new hash let stack_opnd = ctx.stack_opnd(argc - passed_argc); From 1c67e90bdecf9aec97eb3185b237d879207db465 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 18 Aug 2022 13:04:11 -0400 Subject: [PATCH 496/546] More work toward instruction enum (https://github.com/Shopify/ruby/pull/421) * Operand iterators There are a couple of times when we're dealing with instructions that we need to iterate through their operands. At the moment this is relatively easy because there's an opnds field and we can work with it directly. When the instructions become enums, however, the shape of each variant will be different so we'll need an iterator to make sense of the shape. This commit introduces two new iterators that are created from an instruction. One iterates over references to each operand (for instances where they don't need to be mutable like updating live ranges) and one iterates over mutable references to each operand (for instances where you need to mutate them like loading values in arm64). Note that because iterators can't have generic items (i.e., be associated with lifetimes) the mutable iterator forces you to use the `while let Some` syntax as opposed to the for-loop like we did with instructions. This commit eliminates the last reference to insn.opnds, which is going to make it much easier to transition to an enum. * Consolidate output operand fetching Currently we always look at the .out field on instructions whenever we want to access the output operand. When the instructions become an enum, this is not going to be possible since the shape of the variants will be different. Instead, this commit introduces two functions on Insn: out_opnd() and out_opnd_mut(). These return an Option containing a reference to the output operand and a mutable reference to the output operand, respectively. This commit then uses those functions to replace all instances of accessing the output operand. For the most part this was straightforward; when we previously checked if it was Opnd::None we now check that it's None, when we assumed there was an output operand we now unwrap. --- yjit/src/backend/arm64/mod.rs | 16 +- yjit/src/backend/ir.rs | 446 ++++++++++++++++++++++++++++----- yjit/src/backend/tests.rs | 12 +- yjit/src/backend/x86_64/mod.rs | 2 +- 4 files changed, 403 insertions(+), 73 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index c1d8b773f1ed4a..a32be6a6b2769b 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -192,12 +192,15 @@ impl Assembler // such that only the Op::Load instruction needs to handle that // case. If the values aren't heap objects then we'll treat them as // if they were just unsigned integer. - for opnd in &mut insn.opnds { + let skip_load = matches!(insn, Insn { op: Op::Load, .. }); + let mut opnd_iter = insn.opnd_iter_mut(); + + while let Some(opnd) = opnd_iter.next() { match opnd { Opnd::Value(value) => { if value.special_const_p() { *opnd = Opnd::UImm(value.as_u64()); - } else if insn.op != Op::Load { + } else if !skip_load { *opnd = asm.load(*opnd); } }, @@ -400,9 +403,14 @@ impl Assembler asm.test(opnd0, opnd1); }, _ => { - if insn.out.is_some() { - insn.out = asm.next_opnd_out(&insn.opnds); + // If we have an output operand, then we need to replace it + // with a new output operand from the new assembler. + if insn.out_opnd().is_some() { + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + let out = insn.out_opnd_mut().unwrap(); + *out = asm.next_opnd_out(out_num_bits); } + asm.push_insn(insn); } }; diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index db2bc7622c08cd..cea8dfb227120d 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -318,9 +318,13 @@ impl Opnd } } - /// Determine the size in bits of the slice of the given operands. If any of - /// them are different sizes this will panic. - fn match_num_bits(opnds: &[Opnd]) -> u8 { + /// When there aren't any operands to check against, this is the number of + /// bits that should be used for any given output variable. + const DEFAULT_NUM_BITS: u8 = 64; + + /// Determine the size in bits from the iterator of operands. If any of them + /// are different sizes this will panic. + pub fn match_num_bits_iter<'a>(opnds: impl Iterator) -> u8 { let mut value: Option = None; for opnd in opnds { @@ -336,7 +340,13 @@ impl Opnd } } - value.unwrap_or(64) + value.unwrap_or(Self::DEFAULT_NUM_BITS) + } + + /// Determine the size in bits of the slice of the given operands. If any of + /// them are different sizes this will panic. + pub fn match_num_bits(opnds: &[Opnd]) -> u8 { + Self::match_num_bits_iter(opnds.iter()) } } @@ -441,12 +451,287 @@ pub struct Insn pub(super) pos_marker: Option, } +impl Insn { + /// Create an iterator that will yield a non-mutable reference to each + /// operand in turn for this instruction. + pub(super) fn opnd_iter(&self) -> InsnOpndIterator { + InsnOpndIterator::new(self) + } + + /// Create an iterator that will yield a mutable reference to each operand + /// in turn for this instruction. + pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator { + InsnOpndMutIterator::new(self) + } + + /// Return a non-mutable reference to the out operand for this instruction + /// if it has one. + pub fn out_opnd(&self) -> Option<&Opnd> { + match self { + Insn { op: Op::Add, out, .. } | + Insn { op: Op::And, out, .. } | + Insn { op: Op::CCall, out, .. } | + Insn { op: Op::CPop, out, .. } | + Insn { op: Op::CSelE, out, .. } | + Insn { op: Op::CSelG, out, .. } | + Insn { op: Op::CSelGE, out, .. } | + Insn { op: Op::CSelL, out, .. } | + Insn { op: Op::CSelLE, out, .. } | + Insn { op: Op::CSelNE, out, .. } | + Insn { op: Op::CSelNZ, out, .. } | + Insn { op: Op::CSelZ, out, .. } | + Insn { op: Op::Lea, out, .. } | + Insn { op: Op::LeaLabel, out, .. } | + Insn { op: Op::LiveReg, out, .. } | + Insn { op: Op::Load, out, .. } | + Insn { op: Op::LoadSExt, out, .. } | + Insn { op: Op::LShift, out, .. } | + Insn { op: Op::Not, out, .. } | + Insn { op: Op::Or, out, .. } | + Insn { op: Op::RShift, out, .. } | + Insn { op: Op::Sub, out, .. } | + Insn { op: Op::URShift, out, .. } | + Insn { op: Op::Xor, out, .. } => Some(out), + _ => None + } + } + + /// Return a mutable reference to the out operand for this instruction if it + /// has one. + pub fn out_opnd_mut(&mut self) -> Option<&mut Opnd> { + match self { + Insn { op: Op::Add, out, .. } | + Insn { op: Op::And, out, .. } | + Insn { op: Op::CCall, out, .. } | + Insn { op: Op::CPop, out, .. } | + Insn { op: Op::CSelE, out, .. } | + Insn { op: Op::CSelG, out, .. } | + Insn { op: Op::CSelGE, out, .. } | + Insn { op: Op::CSelL, out, .. } | + Insn { op: Op::CSelLE, out, .. } | + Insn { op: Op::CSelNE, out, .. } | + Insn { op: Op::CSelNZ, out, .. } | + Insn { op: Op::CSelZ, out, .. } | + Insn { op: Op::Lea, out, .. } | + Insn { op: Op::LeaLabel, out, .. } | + Insn { op: Op::LiveReg, out, .. } | + Insn { op: Op::Load, out, .. } | + Insn { op: Op::LoadSExt, out, .. } | + Insn { op: Op::LShift, out, .. } | + Insn { op: Op::Not, out, .. } | + Insn { op: Op::Or, out, .. } | + Insn { op: Op::RShift, out, .. } | + Insn { op: Op::Sub, out, .. } | + Insn { op: Op::URShift, out, .. } | + Insn { op: Op::Xor, out, .. } => Some(out), + _ => None + } + } +} + +/// An iterator that will yield a non-mutable reference to each operand in turn +/// for the given instruction. +pub(super) struct InsnOpndIterator<'a> { + insn: &'a Insn, + idx: usize, +} + +impl<'a> InsnOpndIterator<'a> { + fn new(insn: &'a Insn) -> Self { + Self { insn, idx: 0 } + } +} + +impl<'a> Iterator for InsnOpndIterator<'a> { + type Item = &'a Opnd; + + fn next(&mut self) -> Option { + match self.insn { + Insn { op: Op::BakeString, .. } | + Insn { op: Op::Breakpoint, .. } | + Insn { op: Op::Comment, .. } | + Insn { op: Op::CPop, .. } | + Insn { op: Op::CPopAll, .. } | + Insn { op: Op::CPushAll, .. } | + Insn { op: Op::FrameSetup, .. } | + Insn { op: Op::FrameTeardown, .. } | + Insn { op: Op::Jbe, .. } | + Insn { op: Op::Je, .. } | + Insn { op: Op::Jl, .. } | + Insn { op: Op::Jmp, .. } | + Insn { op: Op::Jne, .. } | + Insn { op: Op::Jnz, .. } | + Insn { op: Op::Jo, .. } | + Insn { op: Op::Jz, .. } | + Insn { op: Op::Label, .. } | + Insn { op: Op::LeaLabel, .. } | + Insn { op: Op::PadEntryExit, .. } | + Insn { op: Op::PosMarker, .. } => None, + Insn { op: Op::CPopInto, opnds, .. } | + Insn { op: Op::CPush, opnds, .. } | + Insn { op: Op::CRet, opnds, .. } | + Insn { op: Op::JmpOpnd, opnds, .. } | + Insn { op: Op::Lea, opnds, .. } | + Insn { op: Op::LiveReg, opnds, .. } | + Insn { op: Op::Load, opnds, .. } | + Insn { op: Op::LoadSExt, opnds, .. } | + Insn { op: Op::Not, opnds, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(&opnds[0]) + }, + _ => None + } + }, + Insn { op: Op::Add, opnds, .. } | + Insn { op: Op::And, opnds, .. } | + Insn { op: Op::Cmp, opnds, .. } | + Insn { op: Op::CSelE, opnds, .. } | + Insn { op: Op::CSelG, opnds, .. } | + Insn { op: Op::CSelGE, opnds, .. } | + Insn { op: Op::CSelL, opnds, .. } | + Insn { op: Op::CSelLE, opnds, .. } | + Insn { op: Op::CSelNE, opnds, .. } | + Insn { op: Op::CSelNZ, opnds, .. } | + Insn { op: Op::CSelZ, opnds, .. } | + Insn { op: Op::IncrCounter, opnds, .. } | + Insn { op: Op::LShift, opnds, .. } | + Insn { op: Op::Mov, opnds, .. } | + Insn { op: Op::Or, opnds, .. } | + Insn { op: Op::RShift, opnds, .. } | + Insn { op: Op::Store, opnds, .. } | + Insn { op: Op::Sub, opnds, .. } | + Insn { op: Op::Test, opnds, .. } | + Insn { op: Op::URShift, opnds, .. } | + Insn { op: Op::Xor, opnds, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(&opnds[0]) + } + 1 => { + self.idx += 1; + Some(&opnds[1]) + } + _ => None + } + }, + Insn { op: Op::CCall, opnds, .. } => { + if self.idx < opnds.len() { + let opnd = &opnds[self.idx]; + self.idx += 1; + Some(opnd) + } else { + None + } + } + } + } +} + +/// An iterator that will yield each operand in turn for the given instruction. +pub(super) struct InsnOpndMutIterator<'a> { + insn: &'a mut Insn, + idx: usize, +} + +impl<'a> InsnOpndMutIterator<'a> { + fn new(insn: &'a mut Insn) -> Self { + Self { insn, idx: 0 } + } + + pub(super) fn next(&mut self) -> Option<&mut Opnd> { + match self.insn { + Insn { op: Op::BakeString, .. } | + Insn { op: Op::Breakpoint, .. } | + Insn { op: Op::Comment, .. } | + Insn { op: Op::CPop, .. } | + Insn { op: Op::CPopAll, .. } | + Insn { op: Op::CPushAll, .. } | + Insn { op: Op::FrameSetup, .. } | + Insn { op: Op::FrameTeardown, .. } | + Insn { op: Op::Jbe, .. } | + Insn { op: Op::Je, .. } | + Insn { op: Op::Jl, .. } | + Insn { op: Op::Jmp, .. } | + Insn { op: Op::Jne, .. } | + Insn { op: Op::Jnz, .. } | + Insn { op: Op::Jo, .. } | + Insn { op: Op::Jz, .. } | + Insn { op: Op::Label, .. } | + Insn { op: Op::LeaLabel, .. } | + Insn { op: Op::PadEntryExit, .. } | + Insn { op: Op::PosMarker, .. } => None, + Insn { op: Op::CPopInto, opnds, .. } | + Insn { op: Op::CPush, opnds, .. } | + Insn { op: Op::CRet, opnds, .. } | + Insn { op: Op::JmpOpnd, opnds, .. } | + Insn { op: Op::Lea, opnds, .. } | + Insn { op: Op::LiveReg, opnds, .. } | + Insn { op: Op::Load, opnds, .. } | + Insn { op: Op::LoadSExt, opnds, .. } | + Insn { op: Op::Not, opnds, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(&mut opnds[0]) + }, + _ => None + } + }, + Insn { op: Op::Add, opnds, .. } | + Insn { op: Op::And, opnds, .. } | + Insn { op: Op::Cmp, opnds, .. } | + Insn { op: Op::CSelE, opnds, .. } | + Insn { op: Op::CSelG, opnds, .. } | + Insn { op: Op::CSelGE, opnds, .. } | + Insn { op: Op::CSelL, opnds, .. } | + Insn { op: Op::CSelLE, opnds, .. } | + Insn { op: Op::CSelNE, opnds, .. } | + Insn { op: Op::CSelNZ, opnds, .. } | + Insn { op: Op::CSelZ, opnds, .. } | + Insn { op: Op::IncrCounter, opnds, .. } | + Insn { op: Op::LShift, opnds, .. } | + Insn { op: Op::Mov, opnds, .. } | + Insn { op: Op::Or, opnds, .. } | + Insn { op: Op::RShift, opnds, .. } | + Insn { op: Op::Store, opnds, .. } | + Insn { op: Op::Sub, opnds, .. } | + Insn { op: Op::Test, opnds, .. } | + Insn { op: Op::URShift, opnds, .. } | + Insn { op: Op::Xor, opnds, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(&mut opnds[0]) + } + 1 => { + self.idx += 1; + Some(&mut opnds[1]) + } + _ => None + } + }, + Insn { op: Op::CCall, opnds, .. } => { + if self.idx < opnds.len() { + let opnd = &mut opnds[self.idx]; + self.idx += 1; + Some(opnd) + } else { + None + } + } + } + } +} + impl fmt::Debug for Insn { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { write!(fmt, "{:?}(", self.op)?; // Print list of operands - let mut opnd_iter = self.opnds.iter(); + let mut opnd_iter = self.opnd_iter(); if let Some(first_opnd) = opnd_iter.next() { write!(fmt, "{first_opnd:?}")?; } @@ -463,7 +748,7 @@ impl fmt::Debug for Insn { write!(fmt, " target={target:?}")?; } - write!(fmt, " -> {:?}", self.out) + write!(fmt, " -> {:?}", self.out_opnd().unwrap_or(&Opnd::None)) } } @@ -496,14 +781,9 @@ impl Assembler } /// Build an Opnd::InsnOut from the current index of the assembler and the - /// given slice of operands. The operands are given to determine the number - /// of bits necessary for the output operand. They should all be the same - /// size. - pub(super) fn next_opnd_out(&self, opnds: &[Opnd]) -> Opnd { - Opnd::InsnOut { - idx: self.insns.len(), - num_bits: Opnd::match_num_bits(opnds) - } + /// given number of bits. + pub(super) fn next_opnd_out(&self, num_bits: u8) -> Opnd { + Opnd::InsnOut { idx: self.insns.len(), num_bits } } /// Append an instruction onto the current list of instructions and update @@ -516,7 +796,7 @@ impl Assembler // If we find any InsnOut from previous instructions, we're going to // update the live range of the previous instruction to point to this // one. - for opnd in &insn.opnds { + for opnd in insn.opnd_iter() { match opnd { Opnd::InsnOut { idx, .. } => { assert!(*idx < self.insns.len()); @@ -546,7 +826,7 @@ impl Assembler pos_marker: Option ) -> Opnd { - let out = self.next_opnd_out(&opnds); + let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); self.push_insn(Insn { op, text, opnds, out, target, pos_marker }); out } @@ -632,9 +912,9 @@ impl Assembler // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. - for opnd in &insn.opnds { + for opnd in insn.opnd_iter() { match opnd { - Opnd::InsnOut{ idx, .. } | + Opnd::InsnOut { idx, .. } | Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { // Since we have an InsnOut, we know it spans more that one // instruction. @@ -645,8 +925,8 @@ impl Assembler // uses this operand. If it is, we can return the allocated // register to the pool. if live_ranges[start_index] == index { - if let Opnd::Reg(reg) = asm.insns[start_index].out { - dealloc_reg(&mut pool, ®s, ®); + if let Some(Opnd::Reg(reg)) = asm.insns[start_index].out_opnd() { + dealloc_reg(&mut pool, ®s, reg); } else { unreachable!("no register allocated for insn {:?}", insn.op); } @@ -669,7 +949,7 @@ impl Assembler // true that we set an output operand for this instruction. If // it's not true, something has gone wrong. assert!( - !matches!(insn.out, Opnd::None), + !matches!(insn.out_opnd(), None), "Instruction output reused but no output operand set" ); @@ -687,11 +967,13 @@ impl Assembler // We do this to improve register allocation on x86 // e.g. out = add(reg0, reg1) // reg0 = add(reg0, reg1) - else if insn.opnds.len() > 0 { - if let Opnd::InsnOut{idx, ..} = insn.opnds[0] { - if live_ranges[idx] == index { - if let Opnd::Reg(reg) = asm.insns[idx].out { - out_reg = Some(take_reg(&mut pool, ®s, ®)); + if out_reg.is_none() { + let mut opnd_iter = insn.opnd_iter(); + + if let Some(Opnd::InsnOut{ idx, .. }) = opnd_iter.next() { + if live_ranges[*idx] == index { + if let Some(Opnd::Reg(reg)) = asm.insns[*idx].out_opnd() { + out_reg = Some(take_reg(&mut pool, ®s, reg)); } } } @@ -700,28 +982,37 @@ impl Assembler // Allocate a new register for this instruction if one is not // already allocated. if out_reg.is_none() { - out_reg = if insn.op == Op::LiveReg { - // Allocate a specific register - let reg = insn.opnds[0].unwrap_reg(); - Some(take_reg(&mut pool, ®s, ®)) - } else { - Some(alloc_reg(&mut pool, ®s)) + out_reg = match &insn { + Insn { op: Op::LiveReg, opnds, .. } => { + // Allocate a specific register + let reg = opnds[0].unwrap_reg(); + Some(take_reg(&mut pool, ®s, ®)) + }, + _ => { + Some(alloc_reg(&mut pool, ®s)) + } }; } // Set the output operand on the instruction - let out_num_bits = Opnd::match_num_bits(&insn.opnds); - insn.out = Opnd::Reg(out_reg.unwrap().sub_reg(out_num_bits)); + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + + // If we have gotten to this point, then we're sure we have an + // output operand on this instruction because the live range + // extends beyond the index of the instruction. + let out = insn.out_opnd_mut().unwrap(); + *out = Opnd::Reg(out_reg.unwrap().sub_reg(out_num_bits)); } // Replace InsnOut operands by their corresponding register - for opnd in &mut insn.opnds { + let mut opnd_iter = insn.opnd_iter_mut(); + while let Some(opnd) = opnd_iter.next() { match *opnd { Opnd::InsnOut { idx, .. } => { - *opnd = asm.insns[idx].out; + *opnd = *asm.insns[idx].out_opnd().unwrap(); }, Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { - let base = MemBase::Reg(asm.insns[idx].out.unwrap_reg().reg_no); + let base = MemBase::Reg(asm.insns[idx].out_opnd().unwrap().unwrap_reg().reg_no); *opnd = Opnd::Mem(Mem { base, disp, num_bits }); } _ => {}, @@ -800,7 +1091,7 @@ impl AssemblerDrainingIterator { /// to the next list of instructions. pub fn next_mapped(&mut self) -> Option<(usize, Insn)> { self.next_unmapped().map(|(index, insn)| { - let opnds = insn.opnds.into_iter().map(|opnd| opnd.map_index(&self.indices)).collect(); + let opnds = insn.opnd_iter().map(|opnd| opnd.map_index(&self.indices)).collect(); (index, Insn { opnds, ..insn }) }) } @@ -875,14 +1166,14 @@ impl fmt::Debug for Assembler { impl Assembler { #[must_use] pub fn add(&mut self, left: Opnd, right: Opnd) -> Opnd { - let out = self.next_opnd_out(&[left, right]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); self.push_insn(Insn { op: Op::Add, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn and(&mut self, left: Opnd, right: Opnd) -> Opnd { - let out = self.next_opnd_out(&[left, right]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); self.push_insn(Insn { op: Op::And, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); out } @@ -897,7 +1188,7 @@ impl Assembler { #[must_use] pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { - let out = self.next_opnd_out(&opnds); + let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); self.push_insn(Insn { op: Op::CCall, opnds, out, text: None, target: Some(Target::FunPtr(fptr)), pos_marker: None }); out } @@ -912,7 +1203,7 @@ impl Assembler { #[must_use] pub fn cpop(&mut self) -> Opnd { - let out = self.next_opnd_out(&[]); + let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS); self.push_insn(Insn { op: Op::CPop, opnds: vec![], out, text: None, target: None, pos_marker: None }); out } @@ -939,56 +1230,56 @@ impl Assembler { #[must_use] pub fn csel_e(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - let out = self.next_opnd_out(&[truthy, falsy]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); self.push_insn(Insn { op: Op::CSelE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn csel_g(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - let out = self.next_opnd_out(&[truthy, falsy]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); self.push_insn(Insn { op: Op::CSelG, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn csel_ge(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - let out = self.next_opnd_out(&[truthy, falsy]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); self.push_insn(Insn { op: Op::CSelGE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn csel_l(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - let out = self.next_opnd_out(&[truthy, falsy]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); self.push_insn(Insn { op: Op::CSelL, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn csel_le(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - let out = self.next_opnd_out(&[truthy, falsy]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); self.push_insn(Insn { op: Op::CSelLE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn csel_ne(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - let out = self.next_opnd_out(&[truthy, falsy]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); self.push_insn(Insn { op: Op::CSelNE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn csel_nz(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - let out = self.next_opnd_out(&[truthy, falsy]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); self.push_insn(Insn { op: Op::CSelNZ, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn csel_z(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { - let out = self.next_opnd_out(&[truthy, falsy]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); self.push_insn(Insn { op: Op::CSelZ, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); out } @@ -1043,42 +1334,42 @@ impl Assembler { #[must_use] pub fn lea(&mut self, opnd: Opnd) -> Opnd { - let out = self.next_opnd_out(&[opnd]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); self.push_insn(Insn { op: Op::Lea, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn lea_label(&mut self, target: Target) -> Opnd { - let out = self.next_opnd_out(&[]); + let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS); self.push_insn(Insn { op: Op::LeaLabel, opnds: vec![], out, text: None, target: Some(target), pos_marker: None }); out } #[must_use] pub fn live_reg_opnd(&mut self, opnd: Opnd) -> Opnd { - let out = self.next_opnd_out(&[opnd]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); self.push_insn(Insn { op: Op::LiveReg, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn load(&mut self, opnd: Opnd) -> Opnd { - let out = self.next_opnd_out(&[opnd]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); self.push_insn(Insn { op: Op::Load, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn load_sext(&mut self, opnd: Opnd) -> Opnd { - let out = self.next_opnd_out(&[opnd]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); self.push_insn(Insn { op: Op::LoadSExt, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn lshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { - let out = self.next_opnd_out(&[opnd, shift]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); self.push_insn(Insn { op: Op::LShift, opnds: vec![opnd, shift], out, text: None, target: None, pos_marker: None }); out } @@ -1089,18 +1380,22 @@ impl Assembler { #[must_use] pub fn not(&mut self, opnd: Opnd) -> Opnd { - let out = self.next_opnd_out(&[opnd]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); self.push_insn(Insn { op: Op::Not, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn or(&mut self, left: Opnd, right: Opnd) -> Opnd { - let out = self.next_opnd_out(&[left, right]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); self.push_insn(Insn { op: Op::Or, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); out } + pub fn pad_entry_exit(&mut self) { + self.push_insn(Insn { op: Op::PadEntryExit, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + } + //pub fn pos_marker(&mut self, marker_fn: F) pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr) + 'static) { self.push_insn(Insn { op: Op::PosMarker, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: Some(Box::new(marker_fn)) }); @@ -1108,7 +1403,7 @@ impl Assembler { #[must_use] pub fn rshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { - let out = self.next_opnd_out(&[opnd, shift]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); self.push_insn(Insn { op: Op::RShift, opnds: vec![opnd, shift], out, text: None, target: None, pos_marker: None }); out } @@ -1119,7 +1414,7 @@ impl Assembler { #[must_use] pub fn sub(&mut self, left: Opnd, right: Opnd) -> Opnd { - let out = self.next_opnd_out(&[left, right]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); self.push_insn(Insn { op: Op::Sub, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); out } @@ -1130,19 +1425,42 @@ impl Assembler { #[must_use] pub fn urshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { - let out = self.next_opnd_out(&[opnd, shift]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); self.push_insn(Insn { op: Op::URShift, opnds: vec![opnd, shift], out, text: None, target: None, pos_marker: None }); out } #[must_use] pub fn xor(&mut self, left: Opnd, right: Opnd) -> Opnd { - let out = self.next_opnd_out(&[left, right]); + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); self.push_insn(Insn { op: Op::Xor, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); out } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_opnd_iter() { + let insn = Insn { op: Op::Add, opnds: vec![Opnd::None, Opnd::None], out: Opnd::None, text: None, target: None, pos_marker: None }; + + let mut opnd_iter = insn.opnd_iter(); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + + assert!(matches!(opnd_iter.next(), None)); + } - pub fn pad_entry_exit(&mut self) { - self.push_insn(Insn { op: Op::PadEntryExit, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + #[test] + fn test_opnd_iter_mut() { + let mut insn = Insn { op: Op::Add, opnds: vec![Opnd::None, Opnd::None], out: Opnd::None, text: None, target: None, pos_marker: None }; + + let mut opnd_iter = insn.opnd_iter_mut(); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + + assert!(matches!(opnd_iter.next(), None)); } } diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index e4ab95d4ee721e..b89b7eb6485d71 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -74,9 +74,12 @@ fn test_alloc_regs() { // Now we're going to verify that the out field has been appropriately // updated for each of the instructions that needs it. let regs = Assembler::get_alloc_regs(); - assert_eq!(result.insns[0].out, Opnd::Reg(regs[0])); - assert_eq!(result.insns[2].out, Opnd::Reg(regs[1])); - assert_eq!(result.insns[5].out, Opnd::Reg(regs[0])); + let reg0 = regs[0]; + let reg1 = regs[1]; + + assert!(matches!(result.insns[0].out_opnd(), Some(Opnd::Reg(reg0)))); + assert!(matches!(result.insns[2].out_opnd(), Some(Opnd::Reg(reg1)))); + assert!(matches!(result.insns[5].out_opnd(), Some(Opnd::Reg(reg0)))); } fn setup_asm() -> (Assembler, CodeBlock) { @@ -332,7 +335,8 @@ fn test_lookback_iterator() { while let Some((index, insn)) = iter.next_unmapped() { if index > 0 { - assert_eq!(iter.get_previous().unwrap().opnds[0], Opnd::None); + let opnd_iter = iter.get_previous().unwrap().opnd_iter(); + assert_eq!(opnd_iter.take(1).next(), Some(&Opnd::None)); assert_eq!(insn.op, Op::Store); } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index f80e06ba9bf605..0c994144d067be 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -122,7 +122,7 @@ impl Assembler // - Most instructions can't be encoded with 64-bit immediates. // - We look for Op::Load specifically when emiting to keep GC'ed // VALUEs alive. This is a sort of canonicalization. - let mapped_opnds: Vec = insn.opnds.iter().map(|opnd| { + let mapped_opnds: Vec = insn.opnd_iter().map(|opnd| { if insn.op == Op::Load { iterator.map_opnd(*opnd) } else if let Opnd::Value(value) = opnd { From ea9ee31744a905d7bafdd064ed97f68b5b1f21fa Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 18 Aug 2022 13:10:37 -0400 Subject: [PATCH 497/546] A64 Linux reports aarach64 in RUBY_PLATFORM This should fix a version string test --- test/lib/jit_support.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lib/jit_support.rb b/test/lib/jit_support.rb index ccd3b83be76b5f..3846749016e8f6 100644 --- a/test/lib/jit_support.rb +++ b/test/lib/jit_support.rb @@ -64,7 +64,7 @@ def supported? def yjit_supported? # e.g. x86_64-linux, x64-mswin64_140, x64-mingw32, x64-mingw-ucrt - RUBY_PLATFORM.match?(/^(x86_64|x64|arm64)-/) + RUBY_PLATFORM.match?(/^(x86_64|x64|arm64|aarch64)-/) end # AppVeyor's Visual Studio 2013 / 2015 are known to spuriously generate broken pch / pdb, like: From f883aabc13d334771da926e632dca5758bb506c8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 18 Aug 2022 15:39:18 -0400 Subject: [PATCH 498/546] Instruction enum (https://github.com/Shopify/ruby/pull/423) * Remove references to explicit instruction parts Previously we would reference individual instruction fields manually. We can't do that with instructions that are enums, so this commit removes those references. As a side effect, we can remove the push_insn_parts() function from the assembler because we now explicitly push instruction structs every time. * Switch instructions to enum Instructions are now no longer a large struct with a bunch of optional fields. Instead they are an enum with individual shapes for the variants. In terms of size, the instruction struct was 120 bytes while the new instruction enum is 106 bytes. The bigger win however is that we're not allocating any vectors for instruction operands (except for CCall), which should help cut down on memory usage. Adding new instructions will be a little more complicated going forward, but every mission-critical function that needs to be touched will have an exhaustive match, so the compiler should guide any additions. --- yjit/src/backend/arm64/mod.rs | 426 +++++++++------- yjit/src/backend/ir.rs | 888 +++++++++++++++++---------------- yjit/src/backend/tests.rs | 8 +- yjit/src/backend/x86_64/mod.rs | 431 +++++++++------- 4 files changed, 960 insertions(+), 793 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index a32be6a6b2769b..60cdf2b9d1eaf3 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -59,6 +59,13 @@ impl From for A64Opnd { } } +/// Also implement going from a reference to an operand for convenience. +impl From<&Opnd> for A64Opnd { + fn from(opnd: &Opnd) -> Self { + A64Opnd::from(*opnd) + } +} + impl Assembler { // A special scratch register for intermediate processing. @@ -182,6 +189,41 @@ impl Assembler } } + /// Returns the operands that should be used for a boolean logic + /// instruction. + fn split_boolean_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { + match (opnd0, opnd1) { + (Opnd::Reg(_), Opnd::Reg(_)) => { + (opnd0, opnd1) + }, + (reg_opnd @ Opnd::Reg(_), other_opnd) | + (other_opnd, reg_opnd @ Opnd::Reg(_)) => { + let opnd1 = split_bitmask_immediate(asm, other_opnd); + (reg_opnd, opnd1) + }, + _ => { + let opnd0 = split_load_operand(asm, opnd0); + let opnd1 = split_bitmask_immediate(asm, opnd1); + (opnd0, opnd1) + } + } + } + + /// Returns the operands that should be used for a csel instruction. + fn split_csel_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { + let opnd0 = match opnd0 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd0, + _ => split_load_operand(asm, opnd0) + }; + + let opnd1 = match opnd1 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd1, + _ => split_load_operand(asm, opnd1) + }; + + (opnd0, opnd1) + } + let mut asm_local = Assembler::new_with_label_names(std::mem::take(&mut self.label_names)); let asm = &mut asm_local; let mut iterator = self.into_draining_iter(); @@ -192,7 +234,7 @@ impl Assembler // such that only the Op::Load instruction needs to handle that // case. If the values aren't heap objects then we'll treat them as // if they were just unsigned integer. - let skip_load = matches!(insn, Insn { op: Op::Load, .. }); + let skip_load = matches!(insn, Insn::Load { .. }); let mut opnd_iter = insn.opnd_iter_mut(); while let Some(opnd) = opnd_iter.next() { @@ -209,10 +251,10 @@ impl Assembler } match insn { - Insn { op: Op::Add, opnds, .. } => { - match (opnds[0], opnds[1]) { + Insn::Add { left, right, .. } => { + match (left, right) { (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => { - asm.add(opnds[0], opnds[1]); + asm.add(left, right); }, (reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. }), other_opnd) | (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. })) => { @@ -220,30 +262,25 @@ impl Assembler asm.add(reg_opnd, opnd1); }, _ => { - let opnd0 = split_load_operand(asm, opnds[0]); - let opnd1 = split_shifted_immediate(asm, opnds[1]); + let opnd0 = split_load_operand(asm, left); + let opnd1 = split_shifted_immediate(asm, right); asm.add(opnd0, opnd1); } } }, - Insn { op: Op::And | Op::Or | Op::Xor, opnds, target, text, pos_marker, .. } => { - match (opnds[0], opnds[1]) { - (Opnd::Reg(_), Opnd::Reg(_)) => { - asm.push_insn_parts(insn.op, vec![opnds[0], opnds[1]], target, text, pos_marker); - }, - (reg_opnd @ Opnd::Reg(_), other_opnd) | - (other_opnd, reg_opnd @ Opnd::Reg(_)) => { - let opnd1 = split_bitmask_immediate(asm, other_opnd); - asm.push_insn_parts(insn.op, vec![reg_opnd, opnd1], target, text, pos_marker); - }, - _ => { - let opnd0 = split_load_operand(asm, opnds[0]); - let opnd1 = split_bitmask_immediate(asm, opnds[1]); - asm.push_insn_parts(insn.op, vec![opnd0, opnd1], target, text, pos_marker); - } - } + Insn::And { left, right, .. } => { + let (opnd0, opnd1) = split_boolean_operands(asm, left, right); + asm.and(opnd0, opnd1); + }, + Insn::Or { left, right, .. } => { + let (opnd0, opnd1) = split_boolean_operands(asm, left, right); + asm.or(opnd0, opnd1); }, - Insn { op: Op::CCall, opnds, target, .. } => { + Insn::Xor { left, right, .. } => { + let (opnd0, opnd1) = split_boolean_operands(asm, left, right); + asm.xor(opnd0, opnd1); + }, + Insn::CCall { opnds, target, .. } => { assert!(opnds.len() <= C_ARG_OPNDS.len()); // For each of the operands we're going to first load them @@ -258,60 +295,82 @@ impl Assembler // Now we push the CCall without any arguments so that it // just performs the call. - asm.ccall(target.unwrap().unwrap_fun_ptr(), vec![]); + asm.ccall(target.unwrap_fun_ptr(), vec![]); }, - Insn { op: Op::Cmp, opnds, .. } => { - let opnd0 = match opnds[0] { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], - _ => split_load_operand(asm, opnds[0]) + Insn::Cmp { left, right } => { + let opnd0 = match left { + Opnd::Reg(_) | Opnd::InsnOut { .. } => left, + _ => split_load_operand(asm, left) }; - let opnd1 = split_shifted_immediate(asm, opnds[1]); + let opnd1 = split_shifted_immediate(asm, right); asm.cmp(opnd0, opnd1); }, - Insn { op: Op::CRet, opnds, .. } => { - if opnds[0] != Opnd::Reg(C_RET_REG) { - let value = split_load_operand(asm, opnds[0]); + Insn::CRet(opnd) => { + if opnd != Opnd::Reg(C_RET_REG) { + let value = split_load_operand(asm, opnd); asm.mov(C_RET_OPND, value); } asm.cret(C_RET_OPND); }, - Insn { op: Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE, opnds, target, text, pos_marker, .. } => { - let new_opnds = opnds.into_iter().map(|opnd| { - match opnd { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, - _ => split_load_operand(asm, opnd) - } - }).collect(); - - asm.push_insn_parts(insn.op, new_opnds, target, text, pos_marker); + Insn::CSelZ { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_z(opnd0, opnd1); }, - Insn { op: Op::IncrCounter, opnds, .. } => { + Insn::CSelNZ { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_nz(opnd0, opnd1); + }, + Insn::CSelE { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_e(opnd0, opnd1); + }, + Insn::CSelNE { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_ne(opnd0, opnd1); + }, + Insn::CSelL { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_l(opnd0, opnd1); + }, + Insn::CSelLE { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_le(opnd0, opnd1); + }, + Insn::CSelG { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_g(opnd0, opnd1); + }, + Insn::CSelGE { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_ge(opnd0, opnd1); + }, + Insn::IncrCounter { mem, value } => { // We'll use LDADD later which only works with registers // ... Load pointer into register - let counter_addr = split_lea_operand(asm, opnds[0]); + let counter_addr = split_lea_operand(asm, mem); // Load immediates into a register - let addend = match opnds[1] { + let addend = match value { opnd @ Opnd::Imm(_) | opnd @ Opnd::UImm(_) => asm.load(opnd), opnd => opnd, }; asm.incr_counter(counter_addr, addend); }, - Insn { op: Op::JmpOpnd, opnds, .. } => { - if let Opnd::Mem(_) = opnds[0] { - let opnd0 = split_load_operand(asm, opnds[0]); + Insn::JmpOpnd(opnd) => { + if let Opnd::Mem(_) = opnd { + let opnd0 = split_load_operand(asm, opnd); asm.jmp_opnd(opnd0); } else { - asm.jmp_opnd(opnds[0]); + asm.jmp_opnd(opnd); } }, - Insn { op: Op::Load, opnds, .. } => { - split_load_operand(asm, opnds[0]); + Insn::Load { opnd, .. } => { + split_load_operand(asm, opnd); }, - Insn { op: Op::LoadSExt, opnds, .. } => { - match opnds[0] { + Insn::LoadSExt { opnd, .. } => { + match opnd { // We only want to sign extend if the operand is a // register, instruction output, or memory address that // is 32 bits. Otherwise we'll just load the value @@ -319,87 +378,87 @@ impl Assembler Opnd::Reg(Reg { num_bits: 32, .. }) | Opnd::InsnOut { num_bits: 32, .. } | Opnd::Mem(Mem { num_bits: 32, .. }) => { - asm.load_sext(opnds[0]); + asm.load_sext(opnd); }, _ => { - asm.load(opnds[0]); + asm.load(opnd); } }; }, - Insn { op: Op::Mov, opnds, .. } => { - let value = match (opnds[0], opnds[1]) { + Insn::Mov { dest, src } => { + let value = match (dest, src) { // If the first operand is a memory operand, we're going // to transform this into a store instruction, so we'll // need to load this anyway. - (Opnd::Mem(_), Opnd::UImm(_)) => asm.load(opnds[1]), + (Opnd::Mem(_), Opnd::UImm(_)) => asm.load(src), // The value that is being moved must be either a // register or an immediate that can be encoded as a // bitmask immediate. Otherwise, we'll need to split the // move into multiple instructions. - _ => split_bitmask_immediate(asm, opnds[1]) + _ => split_bitmask_immediate(asm, src) }; // If we're attempting to load into a memory operand, then // we'll switch over to the store instruction. Otherwise // we'll use the normal mov instruction. - match opnds[0] { + match dest { Opnd::Mem(_) => { - let opnd0 = split_memory_address(asm, opnds[0]); + let opnd0 = split_memory_address(asm, dest); asm.store(opnd0, value); }, Opnd::Reg(_) => { - asm.mov(opnds[0], value); + asm.mov(dest, value); }, _ => unreachable!() }; }, - Insn { op: Op::Not, opnds, .. } => { + Insn::Not { opnd, .. } => { // The value that is being negated must be in a register, so // if we get anything else we need to load it first. - let opnd0 = match opnds[0] { - Opnd::Mem(_) => split_load_operand(asm, opnds[0]), - _ => opnds[0] + let opnd0 = match opnd { + Opnd::Mem(_) => split_load_operand(asm, opnd), + _ => opnd }; asm.not(opnd0); }, - Insn { op: Op::Store, opnds, .. } => { + Insn::Store { dest, src } => { // The displacement for the STUR instruction can't be more // than 9 bits long. If it's longer, we need to load the // memory address into a register first. - let opnd0 = split_memory_address(asm, opnds[0]); + let opnd0 = split_memory_address(asm, dest); // The value being stored must be in a register, so if it's // not already one we'll load it first. - let opnd1 = match opnds[1] { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], - _ => split_load_operand(asm, opnds[1]) + let opnd1 = match src { + Opnd::Reg(_) | Opnd::InsnOut { .. } => src, + _ => split_load_operand(asm, src) }; asm.store(opnd0, opnd1); }, - Insn { op: Op::Sub, opnds, .. } => { - let opnd0 = match opnds[0] { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], - _ => split_load_operand(asm, opnds[0]) + Insn::Sub { left, right, .. } => { + let opnd0 = match left { + Opnd::Reg(_) | Opnd::InsnOut { .. } => left, + _ => split_load_operand(asm, left) }; - let opnd1 = split_shifted_immediate(asm, opnds[1]); + let opnd1 = split_shifted_immediate(asm, right); asm.sub(opnd0, opnd1); }, - Insn { op: Op::Test, opnds, .. } => { + Insn::Test { left, right } => { // The value being tested must be in a register, so if it's // not already one we'll load it first. - let opnd0 = match opnds[0] { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], - _ => split_load_operand(asm, opnds[0]) + let opnd0 = match left { + Opnd::Reg(_) | Opnd::InsnOut { .. } => left, + _ => split_load_operand(asm, left) }; // The second value must be either a register or an // unsigned immediate that can be encoded as a bitmask // immediate. If it's not one of those, we'll need to load // it first. - let opnd1 = split_bitmask_immediate(asm, opnds[1]); + let opnd1 = split_bitmask_immediate(asm, right); asm.test(opnd0, opnd1); }, _ => { @@ -589,23 +648,20 @@ impl Assembler let start_write_pos = cb.get_write_pos(); for insn in &self.insns { match insn { - Insn { op: Op::Comment, text, .. } => { + Insn::Comment(text) => { if cfg!(feature = "asm_comments") { - cb.add_comment(text.as_ref().unwrap()); + cb.add_comment(text); } }, - Insn { op: Op::Label, target, .. } => { - cb.write_label(target.unwrap().unwrap_label_idx()); + Insn::Label(target) => { + cb.write_label(target.unwrap_label_idx()); }, // Report back the current position in the generated code - Insn { op: Op::PosMarker, pos_marker, .. } => { - let pos = cb.get_write_ptr(); - let pos_marker_fn = pos_marker.as_ref().unwrap(); - pos_marker_fn(pos); + Insn::PosMarker(pos_marker) => { + pos_marker(cb.get_write_ptr()); } - Insn { op: Op::BakeString, text, .. } => { - let str = text.as_ref().unwrap(); - for byte in str.as_bytes() { + Insn::BakeString(text) => { + for byte in text.as_bytes() { cb.write_byte(*byte); } @@ -615,69 +671,69 @@ impl Assembler // Pad out the string to the next 4-byte boundary so that // it's easy to jump past. - for _ in 0..(4 - ((str.len() + 1) % 4)) { + for _ in 0..(4 - ((text.len() + 1) % 4)) { cb.write_byte(0); } }, - Insn { op: Op::Add, opnds, out, .. } => { - adds(cb, (*out).into(), opnds[0].into(), opnds[1].into()); + Insn::Add { left, right, out } => { + adds(cb, out.into(), left.into(), right.into()); }, - Insn { op: Op::FrameSetup, .. } => { + Insn::FrameSetup => { stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); // X29 (frame_pointer) = SP mov(cb, X29, C_SP_REG); }, - Insn { op: Op::FrameTeardown, .. } => { + Insn::FrameTeardown => { // SP = X29 (frame pointer) mov(cb, C_SP_REG, X29); ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); }, - Insn { op: Op::Sub, opnds, out, .. } => { - subs(cb, (*out).into(), opnds[0].into(), opnds[1].into()); + Insn::Sub { left, right, out } => { + subs(cb, out.into(), left.into(), right.into()); }, - Insn { op: Op::And, opnds, out, .. } => { - and(cb, (*out).into(), opnds[0].into(), opnds[1].into()); + Insn::And { left, right, out } => { + and(cb, out.into(), left.into(), right.into()); }, - Insn { op: Op::Or, opnds, out, .. } => { - orr(cb, (*out).into(), opnds[0].into(), opnds[1].into()); + Insn::Or { left, right, out } => { + orr(cb, out.into(), left.into(), right.into()); }, - Insn { op: Op::Xor, opnds, out, .. } => { - eor(cb, (*out).into(), opnds[0].into(), opnds[1].into()); + Insn::Xor { left, right, out } => { + eor(cb, out.into(), left.into(), right.into()); }, - Insn { op: Op::Not, opnds, out, .. } => { - mvn(cb, (*out).into(), opnds[0].into()); + Insn::Not { opnd, out } => { + mvn(cb, out.into(), opnd.into()); }, - Insn { op: Op::RShift, opnds, out, .. } => { - asr(cb, (*out).into(), opnds[0].into(), opnds[1].into()); + Insn::RShift { opnd, shift, out } => { + asr(cb, out.into(), opnd.into(), shift.into()); }, - Insn { op: Op::URShift, opnds, out, .. } => { - lsr(cb, (*out).into(), opnds[0].into(), opnds[1].into()); + Insn::URShift { opnd, shift, out } => { + lsr(cb, out.into(), opnd.into(), shift.into()); }, - Insn { op: Op::LShift, opnds, out, .. } => { - lsl(cb, (*out).into(), opnds[0].into(), opnds[1].into()); + Insn::LShift { opnd, shift, out } => { + lsl(cb, out.into(), opnd.into(), shift.into()); }, - Insn { op: Op::Store, opnds, .. } => { + Insn::Store { dest, src } => { // This order may be surprising but it is correct. The way // the Arm64 assembler works, the register that is going to // be stored is first and the address is second. However in // our IR we have the address first and the register second. - stur(cb, opnds[1].into(), opnds[0].into()); + stur(cb, src.into(), dest.into()); }, - Insn { op: Op::Load, opnds, out, .. } => { - match opnds[0] { + Insn::Load { opnd, out } => { + match *opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => { - mov(cb, (*out).into(), opnds[0].into()); + mov(cb, out.into(), opnd.into()); }, Opnd::UImm(uimm) => { - emit_load_value(cb, (*out).into(), uimm); + emit_load_value(cb, out.into(), uimm); }, Opnd::Imm(imm) => { - emit_load_value(cb, (*out).into(), imm as u64); + emit_load_value(cb, out.into(), imm as u64); }, Opnd::Mem(_) => { - ldur(cb, (*out).into(), opnds[0].into()); + ldur(cb, out.into(), opnd.into()); }, Opnd::Value(value) => { // We dont need to check if it's a special const @@ -689,7 +745,7 @@ impl Assembler // references to GC'd Value operands. If the value // being loaded is a heap object, we'll report that // back out to the gc_offsets list. - ldr_literal(cb, (*out).into(), 2); + ldr_literal(cb, out.into(), 2); b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4)); cb.write_bytes(&value.as_u64().to_le_bytes()); @@ -701,29 +757,29 @@ impl Assembler } }; }, - Insn { op: Op::LoadSExt, opnds, out, .. } => { - match opnds[0] { + Insn::LoadSExt { opnd, out } => { + match *opnd { Opnd::Reg(Reg { num_bits: 32, .. }) | Opnd::InsnOut { num_bits: 32, .. } => { - sxtw(cb, (*out).into(), opnds[0].into()); + sxtw(cb, out.into(), opnd.into()); }, Opnd::Mem(Mem { num_bits: 32, .. }) => { - ldursw(cb, (*out).into(), opnds[0].into()); + ldursw(cb, out.into(), opnd.into()); }, _ => unreachable!() }; }, - Insn { op: Op::Mov, opnds, .. } => { - mov(cb, opnds[0].into(), opnds[1].into()); + Insn::Mov { dest, src } => { + mov(cb, dest.into(), src.into()); }, - Insn { op: Op::Lea, opnds, out, .. } => { - let opnd: A64Opnd = opnds[0].into(); + Insn::Lea { opnd, out } => { + let opnd: A64Opnd = opnd.into(); match opnd { A64Opnd::Mem(mem) => { add( cb, - (*out).into(), + out.into(), A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }), A64Opnd::new_imm(mem.disp.into()) ); @@ -733,25 +789,25 @@ impl Assembler } }; }, - Insn { op: Op::LeaLabel, out, target, .. } => { - let label_idx = target.unwrap().unwrap_label_idx(); + Insn::LeaLabel { out, target, .. } => { + let label_idx = target.unwrap_label_idx(); cb.label_ref(label_idx, 4, |cb, end_addr, dst_addr| { adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); }); - mov(cb, (*out).into(), Self::SCRATCH0); + mov(cb, out.into(), Self::SCRATCH0); }, - Insn { op: Op::CPush, opnds, .. } => { - emit_push(cb, opnds[0].into()); + Insn::CPush(opnd) => { + emit_push(cb, opnd.into()); }, - Insn { op: Op::CPop, out, .. } => { - emit_pop(cb, (*out).into()); + Insn::CPop { out } => { + emit_pop(cb, out.into()); }, - Insn { op: Op::CPopInto, opnds, .. } => { - emit_pop(cb, opnds[0].into()); + Insn::CPopInto(opnd) => { + emit_pop(cb, opnd.into()); }, - Insn { op: Op::CPushAll, .. } => { + Insn::CPushAll => { let regs = Assembler::get_caller_save_regs(); for reg in regs { @@ -762,7 +818,7 @@ impl Assembler mrs(cb, Self::SCRATCH0, SystemRegister::NZCV); emit_push(cb, Self::SCRATCH0); }, - Insn { op: Op::CPopAll, .. } => { + Insn::CPopAll => { let regs = Assembler::get_caller_save_regs(); // Pop the state/flags register @@ -773,10 +829,10 @@ impl Assembler emit_pop(cb, A64Opnd::Reg(reg)); } }, - Insn { op: Op::CCall, target, .. } => { + Insn::CCall { target, .. } => { // The offset to the call target in bytes let src_addr = cb.get_write_ptr().into_i64(); - let dst_addr = target.unwrap().unwrap_fun_ptr() as i64; + let dst_addr = target.unwrap_fun_ptr() as i64; let offset = dst_addr - src_addr; // The offset in instruction count for BL's immediate let offset = offset / 4; @@ -790,20 +846,20 @@ impl Assembler blr(cb, Self::SCRATCH0); } }, - Insn { op: Op::CRet, .. } => { + Insn::CRet { .. } => { ret(cb, A64Opnd::None); }, - Insn { op: Op::Cmp, opnds, .. } => { - cmp(cb, opnds[0].into(), opnds[1].into()); + Insn::Cmp { left, right } => { + cmp(cb, left.into(), right.into()); }, - Insn { op: Op::Test, opnds, .. } => { - tst(cb, opnds[0].into(), opnds[1].into()); + Insn::Test { left, right } => { + tst(cb, left.into(), right.into()); }, - Insn { op: Op::JmpOpnd, opnds, .. } => { - br(cb, opnds[0].into()); + Insn::JmpOpnd(opnd) => { + br(cb, opnd.into()); }, - Insn { op: Op::Jmp, target, .. } => { - match target.unwrap() { + Insn::Jmp(target) => { + match target { Target::CodePtr(dst_ptr) => { let src_addr = cb.get_write_ptr().into_i64(); let dst_addr = dst_ptr.into_i64(); @@ -831,60 +887,62 @@ impl Assembler // instruction once we know the offset. We're going // to assume we can fit into a single b instruction. // It will panic otherwise. - cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + cb.label_ref(*label_idx, 4, |cb, src_addr, dst_addr| { b(cb, A64Opnd::new_imm((dst_addr - (src_addr - 4)) / 4)); }); }, _ => unreachable!() }; }, - Insn { op: Op::Je, target, .. } => { - emit_conditional_jump::<{Condition::EQ}>(cb, target.unwrap()); + Insn::Je(target) => { + emit_conditional_jump::<{Condition::EQ}>(cb, *target); }, - Insn { op: Op::Jne, target, .. } => { - emit_conditional_jump::<{Condition::NE}>(cb, target.unwrap()); + Insn::Jne(target) => { + emit_conditional_jump::<{Condition::NE}>(cb, *target); }, - Insn { op: Op::Jl, target, .. } => { - emit_conditional_jump::<{Condition::LT}>(cb, target.unwrap()); + Insn::Jl(target) => { + emit_conditional_jump::<{Condition::LT}>(cb, *target); }, - Insn { op: Op::Jbe, target, .. } => { - emit_conditional_jump::<{Condition::LS}>(cb, target.unwrap()); + Insn::Jbe(target) => { + emit_conditional_jump::<{Condition::LS}>(cb, *target); }, - Insn { op: Op::Jz, target, .. } => { - emit_conditional_jump::<{Condition::EQ}>(cb, target.unwrap()); + Insn::Jz(target) => { + emit_conditional_jump::<{Condition::EQ}>(cb, *target); }, - Insn { op: Op::Jnz, target, .. } => { - emit_conditional_jump::<{Condition::NE}>(cb, target.unwrap()); + Insn::Jnz(target) => { + emit_conditional_jump::<{Condition::NE}>(cb, *target); }, - Insn { op: Op::Jo, target, .. } => { - emit_conditional_jump::<{Condition::VS}>(cb, target.unwrap()); + Insn::Jo(target) => { + emit_conditional_jump::<{Condition::VS}>(cb, *target); }, - Insn { op: Op::IncrCounter, opnds, .. } => { - ldaddal(cb, opnds[1].into(), opnds[1].into(), opnds[0].into()); + Insn::IncrCounter { mem, value } => { + ldaddal(cb, value.into(), value.into(), mem.into()); }, - Insn { op: Op::Breakpoint, .. } => { + Insn::Breakpoint => { brk(cb, A64Opnd::None); }, - Insn { op: Op::CSelZ | Op::CSelE, opnds, out, .. } => { - csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::EQ); + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ); }, - Insn { op: Op::CSelNZ | Op::CSelNE, opnds, out, .. } => { - csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::NE); + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::NE); }, - Insn { op: Op::CSelL, opnds, out, .. } => { - csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::LT); + Insn::CSelL { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LT); }, - Insn { op: Op::CSelLE, opnds, out, .. } => { - csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::LE); + Insn::CSelLE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LE); }, - Insn { op: Op::CSelG, opnds, out, .. } => { - csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::GT); + Insn::CSelG { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GT); }, - Insn { op: Op::CSelGE, opnds, out, .. } => { - csel(cb, (*out).into(), opnds[0].into(), opnds[1].into(), Condition::GE); + Insn::CSelGE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE); } - Insn { op: Op::LiveReg, .. } => (), // just a reg alloc signal, no code - Insn { op: Op::PadEntryExit, .. } => { + Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::PadEntryExit => { let jmp_len = 5 * 4; // Op::Jmp may emit 5 instructions while (cb.get_write_pos() - start_write_pos) < jmp_len { nop(cb); diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index cea8dfb227120d..fe525cf31d3d70 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -24,155 +24,6 @@ pub const SP: Opnd = _SP; pub const C_ARG_OPNDS: [Opnd; 6] = _C_ARG_OPNDS; pub const C_RET_OPND: Opnd = _C_RET_OPND; -/// Instruction opcodes -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub enum Op -{ - // Add a comment into the IR at the point that this instruction is added. - // It won't have any impact on that actual compiled code. - Comment, - - // Add a label into the IR at the point that this instruction is added. - Label, - - // Mark a position in the generated code - PosMarker, - - // Bake a string directly into the instruction stream. - BakeString, - - // Add two operands together, and return the result as a new operand. This - // operand can then be used as the operand on another instruction. It - // accepts two operands, which can be of any type - // - // Under the hood when allocating registers, the IR will determine the most - // efficient way to get these values into memory. For example, if both - // operands are immediates, then it will load the first one into a register - // first with a mov instruction and then add them together. If one of them - // is a register, however, it will just perform a single add instruction. - Add, - - // This is the same as the OP_ADD instruction, except for subtraction. - Sub, - - // This is the same as the OP_ADD instruction, except that it performs the - // binary AND operation. - And, - - // This is the same as the OP_ADD instruction, except that it performs the - // binary OR operation. - Or, - - // This is the same as the OP_ADD instruction, except that it performs the - // binary XOR operation. - Xor, - - // Perform the NOT operation on an individual operand, and return the result - // as a new operand. This operand can then be used as the operand on another - // instruction. - Not, - - /// Shift a value right by a certain amount (signed). - RShift, - - /// Shift a value right by a certain amount (unsigned). - URShift, - - /// Shift a value left by a certain amount. - LShift, - - // - // Low-level instructions - // - - // A low-level instruction that loads a value into a register. - Load, - - // A low-level instruction that loads a value into a register and - // sign-extends it to a 64-bit value. - LoadSExt, - - // Low-level instruction to store a value to memory. - Store, - - // Load effective address - Lea, - - // Load effective address relative to the current instruction pointer. It - // accepts a single signed immediate operand. - LeaLabel, - - // A low-level mov instruction. It accepts two operands. - Mov, - - // Bitwise AND test instruction - Test, - - // Compare two operands - Cmp, - - // Unconditional jump to a branch target - Jmp, - - // Unconditional jump which takes a reg/mem address operand - JmpOpnd, - - // Low-level conditional jump instructions - Jl, - Jbe, - Je, - Jne, - Jz, - Jnz, - Jo, - - // Conditional select instructions - CSelZ, - CSelNZ, - CSelE, - CSelNE, - CSelL, - CSelLE, - CSelG, - CSelGE, - - // Push and pop registers to/from the C stack - CPush, - CPop, - CPopInto, - - // Push and pop all of the caller-save registers and the flags to/from the C - // stack - CPushAll, - CPopAll, - - // C function call with N arguments (variadic) - CCall, - - // C function return - CRet, - - // Atomically increment a counter - // Input: memory operand, increment value - // Produces no output - IncrCounter, - - // Trigger a debugger breakpoint - Breakpoint, - - /// Set up the frame stack as necessary per the architecture. - FrameSetup, - - /// Tear down the frame stack as necessary per the architecture. - FrameTeardown, - - /// Take a specific register. Signal the register allocator to not use it. - LiveReg, - - /// Pad nop instructions to accomodate Op::Jmp in case the block is invalidated. - PadEntryExit, -} - // Memory operand base #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum MemBase @@ -429,26 +280,170 @@ impl From for Target { type PosMarkerFn = Box; /// YJIT IR instruction -pub struct Insn -{ - // Opcode for the instruction - pub(super) op: Op, +pub enum Insn { + /// Add two operands together, and return the result as a new operand. + Add { left: Opnd, right: Opnd, out: Opnd }, + + /// This is the same as the OP_ADD instruction, except that it performs the + /// binary AND operation. + And { left: Opnd, right: Opnd, out: Opnd }, + + /// Bake a string directly into the instruction stream. + BakeString(String), + + // Trigger a debugger breakpoint + Breakpoint, + + /// Add a comment into the IR at the point that this instruction is added. + /// It won't have any impact on that actual compiled code. + Comment(String), + + /// Compare two operands + Cmp { left: Opnd, right: Opnd }, + + /// Pop a register from the C stack + CPop { out: Opnd }, + + /// Pop all of the caller-save registers and the flags from the C stack + CPopAll, + + /// Pop a register from the C stack and store it into another register + CPopInto(Opnd), + + /// Push a register onto the C stack + CPush(Opnd), + + /// Push all of the caller-save registers and the flags to the C stack + CPushAll, + + // C function call with N arguments (variadic) + CCall { opnds: Vec, target: Target, out: Opnd }, + + // C function return + CRet(Opnd), + + /// Conditionally select if equal + CSelE { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if greater + CSelG { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if greater or equal + CSelGE { truthy: Opnd, falsy: Opnd, out: Opnd }, - // Optional string for comments and labels - pub(super) text: Option, + /// Conditionally select if less + CSelL { truthy: Opnd, falsy: Opnd, out: Opnd }, - // List of input operands/values - pub(super) opnds: Vec, + /// Conditionally select if less or equal + CSelLE { truthy: Opnd, falsy: Opnd, out: Opnd }, - // Output operand for this instruction - pub(super) out: Opnd, + /// Conditionally select if not equal + CSelNE { truthy: Opnd, falsy: Opnd, out: Opnd }, - // List of branch targets (branch instructions only) - pub(super) target: Option, + /// Conditionally select if not zero + CSelNZ { truthy: Opnd, falsy: Opnd, out: Opnd }, - // Callback to mark the position of this instruction - // in the generated code - pub(super) pos_marker: Option, + /// Conditionally select if zero + CSelZ { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Set up the frame stack as necessary per the architecture. + FrameSetup, + + /// Tear down the frame stack as necessary per the architecture. + FrameTeardown, + + // Atomically increment a counter + // Input: memory operand, increment value + // Produces no output + IncrCounter { mem: Opnd, value: Opnd }, + + /// Jump if below or equal + Jbe(Target), + + /// Jump if equal + Je(Target), + + /// Jump if lower + Jl(Target), + + // Unconditional jump to a branch target + Jmp(Target), + + // Unconditional jump which takes a reg/mem address operand + JmpOpnd(Opnd), + + /// Jump if not equal + Jne(Target), + + /// Jump if not zero + Jnz(Target), + + /// Jump if overflow + Jo(Target), + + /// Jump if zero + Jz(Target), + + // Add a label into the IR at the point that this instruction is added. + Label(Target), + + // Load effective address relative to the current instruction pointer. It + // accepts a single signed immediate operand. + LeaLabel { target: Target, out: Opnd }, + + // Load effective address + Lea { opnd: Opnd, out: Opnd }, + + /// Take a specific register. Signal the register allocator to not use it. + LiveReg { opnd: Opnd, out: Opnd }, + + // A low-level instruction that loads a value into a register. + Load { opnd: Opnd, out: Opnd }, + + // A low-level instruction that loads a value into a register and + // sign-extends it to a 64-bit value. + LoadSExt { opnd: Opnd, out: Opnd }, + + /// Shift a value left by a certain amount. + LShift { opnd: Opnd, shift: Opnd, out: Opnd }, + + // A low-level mov instruction. It accepts two operands. + Mov { dest: Opnd, src: Opnd }, + + // Perform the NOT operation on an individual operand, and return the result + // as a new operand. This operand can then be used as the operand on another + // instruction. + Not { opnd: Opnd, out: Opnd }, + + // This is the same as the OP_ADD instruction, except that it performs the + // binary OR operation. + Or { left: Opnd, right: Opnd, out: Opnd }, + + /// Pad nop instructions to accomodate Op::Jmp in case the block is + /// invalidated. + PadEntryExit, + + // Mark a position in the generated code + PosMarker(PosMarkerFn), + + /// Shift a value right by a certain amount (signed). + RShift { opnd: Opnd, shift: Opnd, out: Opnd }, + + // Low-level instruction to store a value to memory. + Store { dest: Opnd, src: Opnd }, + + // This is the same as the OP_ADD instruction, except for subtraction. + Sub { left: Opnd, right: Opnd, out: Opnd }, + + // Bitwise AND test instruction + Test { left: Opnd, right: Opnd }, + + /// Shift a value right by a certain amount (unsigned). + URShift { opnd: Opnd, shift: Opnd, out: Opnd }, + + // This is the same as the OP_ADD instruction, except that it performs the + // binary XOR operation. + Xor { left: Opnd, right: Opnd, out: Opnd } } impl Insn { @@ -464,34 +459,92 @@ impl Insn { InsnOpndMutIterator::new(self) } + /// Returns a string that describes which operation this instruction is + /// performing. This is used for debugging. + fn op(&self) -> &'static str { + match self { + Insn::Add { .. } => "Add", + Insn::And { .. } => "And", + Insn::BakeString(_) => "BakeString", + Insn::Breakpoint => "Breakpoint", + Insn::Comment(_) => "Comment", + Insn::Cmp { .. } => "Cmp", + Insn::CPop { .. } => "CPop", + Insn::CPopAll => "CPopAll", + Insn::CPopInto(_) => "CPopInto", + Insn::CPush(_) => "CPush", + Insn::CPushAll => "CPushAll", + Insn::CCall { .. } => "CCall", + Insn::CRet(_) => "CRet", + Insn::CSelE { .. } => "CSelE", + Insn::CSelG { .. } => "CSelG", + Insn::CSelGE { .. } => "CSelGE", + Insn::CSelL { .. } => "CSelL", + Insn::CSelLE { .. } => "CSelLE", + Insn::CSelNE { .. } => "CSelNE", + Insn::CSelNZ { .. } => "CSelNZ", + Insn::CSelZ { .. } => "CSelZ", + Insn::FrameSetup => "FrameSetup", + Insn::FrameTeardown => "FrameTeardown", + Insn::IncrCounter { .. } => "IncrCounter", + Insn::Jbe(_) => "Jbe", + Insn::Je(_) => "Je", + Insn::Jl(_) => "Jl", + Insn::Jmp(_) => "Jmp", + Insn::JmpOpnd(_) => "JmpOpnd", + Insn::Jne(_) => "Jne", + Insn::Jnz(_) => "Jnz", + Insn::Jo(_) => "Jo", + Insn::Jz(_) => "Jz", + Insn::Label(_) => "Label", + Insn::LeaLabel { .. } => "LeaLabel", + Insn::Lea { .. } => "Lea", + Insn::LiveReg { .. } => "LiveReg", + Insn::Load { .. } => "Load", + Insn::LoadSExt { .. } => "LoadSExt", + Insn::LShift { .. } => "LShift", + Insn::Mov { .. } => "Mov", + Insn::Not { .. } => "Not", + Insn::Or { .. } => "Or", + Insn::PadEntryExit => "PadEntryExit", + Insn::PosMarker(_) => "PosMarker", + Insn::RShift { .. } => "RShift", + Insn::Store { .. } => "Store", + Insn::Sub { .. } => "Sub", + Insn::Test { .. } => "Test", + Insn::URShift { .. } => "URShift", + Insn::Xor { .. } => "Xor" + } + } + /// Return a non-mutable reference to the out operand for this instruction /// if it has one. pub fn out_opnd(&self) -> Option<&Opnd> { match self { - Insn { op: Op::Add, out, .. } | - Insn { op: Op::And, out, .. } | - Insn { op: Op::CCall, out, .. } | - Insn { op: Op::CPop, out, .. } | - Insn { op: Op::CSelE, out, .. } | - Insn { op: Op::CSelG, out, .. } | - Insn { op: Op::CSelGE, out, .. } | - Insn { op: Op::CSelL, out, .. } | - Insn { op: Op::CSelLE, out, .. } | - Insn { op: Op::CSelNE, out, .. } | - Insn { op: Op::CSelNZ, out, .. } | - Insn { op: Op::CSelZ, out, .. } | - Insn { op: Op::Lea, out, .. } | - Insn { op: Op::LeaLabel, out, .. } | - Insn { op: Op::LiveReg, out, .. } | - Insn { op: Op::Load, out, .. } | - Insn { op: Op::LoadSExt, out, .. } | - Insn { op: Op::LShift, out, .. } | - Insn { op: Op::Not, out, .. } | - Insn { op: Op::Or, out, .. } | - Insn { op: Op::RShift, out, .. } | - Insn { op: Op::Sub, out, .. } | - Insn { op: Op::URShift, out, .. } | - Insn { op: Op::Xor, out, .. } => Some(out), + Insn::Add { out, .. } | + Insn::And { out, .. } | + Insn::CCall { out, .. } | + Insn::CPop { out, .. } | + Insn::CSelE { out, .. } | + Insn::CSelG { out, .. } | + Insn::CSelGE { out, .. } | + Insn::CSelL { out, .. } | + Insn::CSelLE { out, .. } | + Insn::CSelNE { out, .. } | + Insn::CSelNZ { out, .. } | + Insn::CSelZ { out, .. } | + Insn::Lea { out, .. } | + Insn::LeaLabel { out, .. } | + Insn::LiveReg { out, .. } | + Insn::Load { out, .. } | + Insn::LoadSExt { out, .. } | + Insn::LShift { out, .. } | + Insn::Not { out, .. } | + Insn::Or { out, .. } | + Insn::RShift { out, .. } | + Insn::Sub { out, .. } | + Insn::URShift { out, .. } | + Insn::Xor { out, .. } => Some(out), _ => None } } @@ -500,30 +553,55 @@ impl Insn { /// has one. pub fn out_opnd_mut(&mut self) -> Option<&mut Opnd> { match self { - Insn { op: Op::Add, out, .. } | - Insn { op: Op::And, out, .. } | - Insn { op: Op::CCall, out, .. } | - Insn { op: Op::CPop, out, .. } | - Insn { op: Op::CSelE, out, .. } | - Insn { op: Op::CSelG, out, .. } | - Insn { op: Op::CSelGE, out, .. } | - Insn { op: Op::CSelL, out, .. } | - Insn { op: Op::CSelLE, out, .. } | - Insn { op: Op::CSelNE, out, .. } | - Insn { op: Op::CSelNZ, out, .. } | - Insn { op: Op::CSelZ, out, .. } | - Insn { op: Op::Lea, out, .. } | - Insn { op: Op::LeaLabel, out, .. } | - Insn { op: Op::LiveReg, out, .. } | - Insn { op: Op::Load, out, .. } | - Insn { op: Op::LoadSExt, out, .. } | - Insn { op: Op::LShift, out, .. } | - Insn { op: Op::Not, out, .. } | - Insn { op: Op::Or, out, .. } | - Insn { op: Op::RShift, out, .. } | - Insn { op: Op::Sub, out, .. } | - Insn { op: Op::URShift, out, .. } | - Insn { op: Op::Xor, out, .. } => Some(out), + Insn::Add { out, .. } | + Insn::And { out, .. } | + Insn::CCall { out, .. } | + Insn::CPop { out, .. } | + Insn::CSelE { out, .. } | + Insn::CSelG { out, .. } | + Insn::CSelGE { out, .. } | + Insn::CSelL { out, .. } | + Insn::CSelLE { out, .. } | + Insn::CSelNE { out, .. } | + Insn::CSelNZ { out, .. } | + Insn::CSelZ { out, .. } | + Insn::Lea { out, .. } | + Insn::LeaLabel { out, .. } | + Insn::LiveReg { out, .. } | + Insn::Load { out, .. } | + Insn::LoadSExt { out, .. } | + Insn::LShift { out, .. } | + Insn::Not { out, .. } | + Insn::Or { out, .. } | + Insn::RShift { out, .. } | + Insn::Sub { out, .. } | + Insn::URShift { out, .. } | + Insn::Xor { out, .. } => Some(out), + _ => None + } + } + + /// Returns the target for this instruction if there is one. + pub fn target(&self) -> Option<&Target> { + match self { + Insn::Jbe(target) | + Insn::Je(target) | + Insn::Jl(target) | + Insn::Jmp(target) | + Insn::Jne(target) | + Insn::Jnz(target) | + Insn::Jo(target) | + Insn::Jz(target) | + Insn::LeaLabel { target, .. } => Some(target), + _ => None + } + } + + /// Returns the text associated with this instruction if there is some. + pub fn text(&self) -> Option<&String> { + match self { + Insn::BakeString(text) | + Insn::Comment(text) => Some(text), _ => None } } @@ -547,77 +625,77 @@ impl<'a> Iterator for InsnOpndIterator<'a> { fn next(&mut self) -> Option { match self.insn { - Insn { op: Op::BakeString, .. } | - Insn { op: Op::Breakpoint, .. } | - Insn { op: Op::Comment, .. } | - Insn { op: Op::CPop, .. } | - Insn { op: Op::CPopAll, .. } | - Insn { op: Op::CPushAll, .. } | - Insn { op: Op::FrameSetup, .. } | - Insn { op: Op::FrameTeardown, .. } | - Insn { op: Op::Jbe, .. } | - Insn { op: Op::Je, .. } | - Insn { op: Op::Jl, .. } | - Insn { op: Op::Jmp, .. } | - Insn { op: Op::Jne, .. } | - Insn { op: Op::Jnz, .. } | - Insn { op: Op::Jo, .. } | - Insn { op: Op::Jz, .. } | - Insn { op: Op::Label, .. } | - Insn { op: Op::LeaLabel, .. } | - Insn { op: Op::PadEntryExit, .. } | - Insn { op: Op::PosMarker, .. } => None, - Insn { op: Op::CPopInto, opnds, .. } | - Insn { op: Op::CPush, opnds, .. } | - Insn { op: Op::CRet, opnds, .. } | - Insn { op: Op::JmpOpnd, opnds, .. } | - Insn { op: Op::Lea, opnds, .. } | - Insn { op: Op::LiveReg, opnds, .. } | - Insn { op: Op::Load, opnds, .. } | - Insn { op: Op::LoadSExt, opnds, .. } | - Insn { op: Op::Not, opnds, .. } => { + Insn::BakeString(_) | + Insn::Breakpoint | + Insn::Comment(_) | + Insn::CPop { .. } | + Insn::CPopAll | + Insn::CPushAll | + Insn::FrameSetup | + Insn::FrameTeardown | + Insn::Jbe(_) | + Insn::Je(_) | + Insn::Jl(_) | + Insn::Jmp(_) | + Insn::Jne(_) | + Insn::Jnz(_) | + Insn::Jo(_) | + Insn::Jz(_) | + Insn::Label(_) | + Insn::LeaLabel { .. } | + Insn::PadEntryExit | + Insn::PosMarker(_) => None, + Insn::CPopInto(opnd) | + Insn::CPush(opnd) | + Insn::CRet(opnd) | + Insn::JmpOpnd(opnd) | + Insn::Lea { opnd, .. } | + Insn::LiveReg { opnd, .. } | + Insn::Load { opnd, .. } | + Insn::LoadSExt { opnd, .. } | + Insn::Not { opnd, .. } => { match self.idx { 0 => { self.idx += 1; - Some(&opnds[0]) + Some(&opnd) }, _ => None } }, - Insn { op: Op::Add, opnds, .. } | - Insn { op: Op::And, opnds, .. } | - Insn { op: Op::Cmp, opnds, .. } | - Insn { op: Op::CSelE, opnds, .. } | - Insn { op: Op::CSelG, opnds, .. } | - Insn { op: Op::CSelGE, opnds, .. } | - Insn { op: Op::CSelL, opnds, .. } | - Insn { op: Op::CSelLE, opnds, .. } | - Insn { op: Op::CSelNE, opnds, .. } | - Insn { op: Op::CSelNZ, opnds, .. } | - Insn { op: Op::CSelZ, opnds, .. } | - Insn { op: Op::IncrCounter, opnds, .. } | - Insn { op: Op::LShift, opnds, .. } | - Insn { op: Op::Mov, opnds, .. } | - Insn { op: Op::Or, opnds, .. } | - Insn { op: Op::RShift, opnds, .. } | - Insn { op: Op::Store, opnds, .. } | - Insn { op: Op::Sub, opnds, .. } | - Insn { op: Op::Test, opnds, .. } | - Insn { op: Op::URShift, opnds, .. } | - Insn { op: Op::Xor, opnds, .. } => { + Insn::Add { left: opnd0 @ _, right: opnd1 @ _, .. } | + Insn::And { left: opnd0 @ _, right: opnd1 @ _, .. } | + Insn::Cmp { left: opnd0 @ _, right: opnd1 @ _ } | + Insn::CSelE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelG { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelGE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelL { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelLE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelNE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelNZ { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelZ { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::IncrCounter { mem: opnd0 @ _, value: opnd1 @ _, .. } | + Insn::LShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | + Insn::Mov { dest: opnd0 @ _, src: opnd1 @ _ } | + Insn::Or { left: opnd0 @ _, right: opnd1 @ _, .. } | + Insn::RShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | + Insn::Store { dest: opnd0 @ _, src: opnd1 @ _ } | + Insn::Sub { left: opnd0 @ _, right: opnd1 @ _, .. } | + Insn::Test { left: opnd0 @ _, right: opnd1 @ _ } | + Insn::URShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | + Insn::Xor { left: opnd0 @ _, right: opnd1 @ _, .. } => { match self.idx { 0 => { self.idx += 1; - Some(&opnds[0]) + Some(&opnd0) } 1 => { self.idx += 1; - Some(&opnds[1]) + Some(&opnd1) } _ => None } }, - Insn { op: Op::CCall, opnds, .. } => { + Insn::CCall { opnds, .. } => { if self.idx < opnds.len() { let opnd = &opnds[self.idx]; self.idx += 1; @@ -643,77 +721,77 @@ impl<'a> InsnOpndMutIterator<'a> { pub(super) fn next(&mut self) -> Option<&mut Opnd> { match self.insn { - Insn { op: Op::BakeString, .. } | - Insn { op: Op::Breakpoint, .. } | - Insn { op: Op::Comment, .. } | - Insn { op: Op::CPop, .. } | - Insn { op: Op::CPopAll, .. } | - Insn { op: Op::CPushAll, .. } | - Insn { op: Op::FrameSetup, .. } | - Insn { op: Op::FrameTeardown, .. } | - Insn { op: Op::Jbe, .. } | - Insn { op: Op::Je, .. } | - Insn { op: Op::Jl, .. } | - Insn { op: Op::Jmp, .. } | - Insn { op: Op::Jne, .. } | - Insn { op: Op::Jnz, .. } | - Insn { op: Op::Jo, .. } | - Insn { op: Op::Jz, .. } | - Insn { op: Op::Label, .. } | - Insn { op: Op::LeaLabel, .. } | - Insn { op: Op::PadEntryExit, .. } | - Insn { op: Op::PosMarker, .. } => None, - Insn { op: Op::CPopInto, opnds, .. } | - Insn { op: Op::CPush, opnds, .. } | - Insn { op: Op::CRet, opnds, .. } | - Insn { op: Op::JmpOpnd, opnds, .. } | - Insn { op: Op::Lea, opnds, .. } | - Insn { op: Op::LiveReg, opnds, .. } | - Insn { op: Op::Load, opnds, .. } | - Insn { op: Op::LoadSExt, opnds, .. } | - Insn { op: Op::Not, opnds, .. } => { + Insn::BakeString(_) | + Insn::Breakpoint | + Insn::Comment(_) | + Insn::CPop { .. } | + Insn::CPopAll | + Insn::CPushAll | + Insn::FrameSetup | + Insn::FrameTeardown | + Insn::Jbe(_) | + Insn::Je(_) | + Insn::Jl(_) | + Insn::Jmp(_) | + Insn::Jne(_) | + Insn::Jnz(_) | + Insn::Jo(_) | + Insn::Jz(_) | + Insn::Label(_) | + Insn::LeaLabel { .. } | + Insn::PadEntryExit | + Insn::PosMarker(_) => None, + Insn::CPopInto(opnd) | + Insn::CPush(opnd) | + Insn::CRet(opnd) | + Insn::JmpOpnd(opnd) | + Insn::Lea { opnd, .. } | + Insn::LiveReg { opnd, .. } | + Insn::Load { opnd, .. } | + Insn::LoadSExt { opnd, .. } | + Insn::Not { opnd, .. } => { match self.idx { 0 => { self.idx += 1; - Some(&mut opnds[0]) + Some(opnd) }, _ => None } }, - Insn { op: Op::Add, opnds, .. } | - Insn { op: Op::And, opnds, .. } | - Insn { op: Op::Cmp, opnds, .. } | - Insn { op: Op::CSelE, opnds, .. } | - Insn { op: Op::CSelG, opnds, .. } | - Insn { op: Op::CSelGE, opnds, .. } | - Insn { op: Op::CSelL, opnds, .. } | - Insn { op: Op::CSelLE, opnds, .. } | - Insn { op: Op::CSelNE, opnds, .. } | - Insn { op: Op::CSelNZ, opnds, .. } | - Insn { op: Op::CSelZ, opnds, .. } | - Insn { op: Op::IncrCounter, opnds, .. } | - Insn { op: Op::LShift, opnds, .. } | - Insn { op: Op::Mov, opnds, .. } | - Insn { op: Op::Or, opnds, .. } | - Insn { op: Op::RShift, opnds, .. } | - Insn { op: Op::Store, opnds, .. } | - Insn { op: Op::Sub, opnds, .. } | - Insn { op: Op::Test, opnds, .. } | - Insn { op: Op::URShift, opnds, .. } | - Insn { op: Op::Xor, opnds, .. } => { + Insn::Add { left: opnd0 @ _, right: opnd1 @ _, .. } | + Insn::And { left: opnd0 @ _, right: opnd1 @ _, .. } | + Insn::Cmp { left: opnd0 @ _, right: opnd1 @ _ } | + Insn::CSelE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelG { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelGE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelL { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelLE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelNE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelNZ { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::CSelZ { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | + Insn::IncrCounter { mem: opnd0 @ _, value: opnd1 @ _, .. } | + Insn::LShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | + Insn::Mov { dest: opnd0 @ _, src: opnd1 @ _ } | + Insn::Or { left: opnd0 @ _, right: opnd1 @ _, .. } | + Insn::RShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | + Insn::Store { dest: opnd0 @ _, src: opnd1 @ _ } | + Insn::Sub { left: opnd0 @ _, right: opnd1 @ _, .. } | + Insn::Test { left: opnd0 @ _, right: opnd1 @ _ } | + Insn::URShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | + Insn::Xor { left: opnd0 @ _, right: opnd1 @ _, .. } => { match self.idx { 0 => { self.idx += 1; - Some(&mut opnds[0]) + Some(opnd0) } 1 => { self.idx += 1; - Some(&mut opnds[1]) + Some(opnd1) } _ => None } }, - Insn { op: Op::CCall, opnds, .. } => { + Insn::CCall { opnds, .. } => { if self.idx < opnds.len() { let opnd = &mut opnds[self.idx]; self.idx += 1; @@ -728,7 +806,7 @@ impl<'a> InsnOpndMutIterator<'a> { impl fmt::Debug for Insn { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write!(fmt, "{:?}(", self.op)?; + write!(fmt, "{}(", self.op())?; // Print list of operands let mut opnd_iter = self.opnd_iter(); @@ -741,10 +819,10 @@ impl fmt::Debug for Insn { write!(fmt, ")")?; // Print text, target, and pos if they are present - if let Some(text) = &self.text { + if let Some(text) = self.text() { write!(fmt, " {text:?}")? } - if let Some(target) = self.target { + if let Some(target) = self.target() { write!(fmt, " target={target:?}")?; } @@ -814,23 +892,6 @@ impl Assembler self.live_ranges.push(insn_idx); } - /// Append an instruction to the list by creating a new instruction from the - /// component parts given to this function. This will also create a new - /// output operand from the given operands for the new instruction. - pub(super) fn push_insn_parts( - &mut self, - op: Op, - opnds: Vec, - target: Option, - text: Option, - pos_marker: Option - ) -> Opnd - { - let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); - self.push_insn(Insn { op, text, opnds, out, target, pos_marker }); - out - } - /// Create a new label instance that we can jump to pub fn new_label(&mut self, name: &str) -> Target { @@ -841,23 +902,6 @@ impl Assembler Target::Label(label_idx) } - /// Add a label at the current position - pub fn write_label(&mut self, label: Target) - { - assert!(label.unwrap_label_idx() < self.label_names.len()); - - let insn = Insn { - op: Op::Label, - text: None, - opnds: vec![], - out: Opnd::None, - target: Some(label), - pos_marker: None, - }; - self.insns.push(insn); - self.live_ranges.push(self.insns.len()); - } - /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. @@ -928,7 +972,7 @@ impl Assembler if let Some(Opnd::Reg(reg)) = asm.insns[start_index].out_opnd() { dealloc_reg(&mut pool, ®s, reg); } else { - unreachable!("no register allocated for insn {:?}", insn.op); + unreachable!("no register allocated for insn {:?}", insn); } } } @@ -937,7 +981,7 @@ impl Assembler } // C return values need to be mapped to the C return register - if insn.op == Op::CCall { + if matches!(insn, Insn::CCall { .. }) { assert_eq!(pool, 0, "register lives past C function call"); } @@ -958,7 +1002,7 @@ impl Assembler let mut out_reg: Option = None; // C return values need to be mapped to the C return register - if insn.op == Op::CCall { + if matches!(insn, Insn::CCall { .. }) { out_reg = Some(take_reg(&mut pool, ®s, &C_RET_REG)); } @@ -983,9 +1027,9 @@ impl Assembler // already allocated. if out_reg.is_none() { out_reg = match &insn { - Insn { op: Op::LiveReg, opnds, .. } => { + Insn::LiveReg { opnd, .. } => { // Allocate a specific register - let reg = opnds[0].unwrap_reg(); + let reg = opnd.unwrap_reg(); Some(take_reg(&mut pool, ®s, ®)) }, _ => { @@ -1090,9 +1134,13 @@ impl AssemblerDrainingIterator { /// Returns the next instruction in the list with the indices corresponding /// to the next list of instructions. pub fn next_mapped(&mut self) -> Option<(usize, Insn)> { - self.next_unmapped().map(|(index, insn)| { - let opnds = insn.opnd_iter().map(|opnd| opnd.map_index(&self.indices)).collect(); - (index, Insn { opnds, ..insn }) + self.next_unmapped().map(|(index, mut insn)| { + let mut opnd_iter = insn.opnd_iter_mut(); + while let Some(opnd) = opnd_iter.next() { + *opnd = opnd.map_index(&self.indices); + } + + (index, insn) }) } @@ -1167,273 +1215,279 @@ impl Assembler { #[must_use] pub fn add(&mut self, left: Opnd, right: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); - self.push_insn(Insn { op: Op::Add, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Add { left, right, out }); out } #[must_use] pub fn and(&mut self, left: Opnd, right: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); - self.push_insn(Insn { op: Op::And, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::And { left, right, out }); out } pub fn bake_string(&mut self, text: &str) { - self.push_insn(Insn { op: Op::BakeString, opnds: vec![], out: Opnd::None, text: Some(text.to_string()), target: None, pos_marker: None }); + self.push_insn(Insn::BakeString(text.to_string())); } pub fn breakpoint(&mut self) { - self.push_insn(Insn { op: Op::Breakpoint, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Breakpoint); } #[must_use] pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); - self.push_insn(Insn { op: Op::CCall, opnds, out, text: None, target: Some(Target::FunPtr(fptr)), pos_marker: None }); + self.push_insn(Insn::CCall { target: Target::FunPtr(fptr), opnds, out }); out } pub fn cmp(&mut self, left: Opnd, right: Opnd) { - self.push_insn(Insn { op: Op::Cmp, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Cmp { left, right }); } pub fn comment(&mut self, text: &str) { - self.push_insn(Insn { op: Op::Comment, opnds: vec![], out: Opnd::None, text: Some(text.to_string()), target: None, pos_marker: None }); + self.push_insn(Insn::Comment(text.to_string())); } #[must_use] pub fn cpop(&mut self) -> Opnd { let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS); - self.push_insn(Insn { op: Op::CPop, opnds: vec![], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CPop { out }); out } pub fn cpop_all(&mut self) { - self.push_insn(Insn { op: Op::CPopAll, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CPopAll); } pub fn cpop_into(&mut self, opnd: Opnd) { - self.push_insn(Insn { op: Op::CPopInto, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CPopInto(opnd)); } pub fn cpush(&mut self, opnd: Opnd) { - self.push_insn(Insn { op: Op::CPush, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CPush(opnd)); } pub fn cpush_all(&mut self) { - self.push_insn(Insn { op: Op::CPushAll, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CPushAll); } pub fn cret(&mut self, opnd: Opnd) { - self.push_insn(Insn { op: Op::CRet, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CRet(opnd)); } #[must_use] pub fn csel_e(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); - self.push_insn(Insn { op: Op::CSelE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CSelE { truthy, falsy, out }); out } #[must_use] pub fn csel_g(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); - self.push_insn(Insn { op: Op::CSelG, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CSelG { truthy, falsy, out }); out } #[must_use] pub fn csel_ge(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); - self.push_insn(Insn { op: Op::CSelGE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CSelGE { truthy, falsy, out }); out } #[must_use] pub fn csel_l(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); - self.push_insn(Insn { op: Op::CSelL, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CSelL { truthy, falsy, out }); out } #[must_use] pub fn csel_le(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); - self.push_insn(Insn { op: Op::CSelLE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CSelLE { truthy, falsy, out }); out } #[must_use] pub fn csel_ne(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); - self.push_insn(Insn { op: Op::CSelNE, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CSelNE { truthy, falsy, out }); out } #[must_use] pub fn csel_nz(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); - self.push_insn(Insn { op: Op::CSelNZ, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CSelNZ { truthy, falsy, out }); out } #[must_use] pub fn csel_z(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); - self.push_insn(Insn { op: Op::CSelZ, opnds: vec![truthy, falsy], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::CSelZ { truthy, falsy, out }); out } pub fn frame_setup(&mut self) { - self.push_insn(Insn { op: Op::FrameSetup, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::FrameSetup); } pub fn frame_teardown(&mut self) { - self.push_insn(Insn { op: Op::FrameTeardown, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::FrameTeardown); } pub fn incr_counter(&mut self, mem: Opnd, value: Opnd) { - self.push_insn(Insn { op: Op::IncrCounter, opnds: vec![mem, value], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::IncrCounter { mem, value }); } pub fn jbe(&mut self, target: Target) { - self.push_insn(Insn { op: Op::Jbe, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + self.push_insn(Insn::Jbe(target)); } pub fn je(&mut self, target: Target) { - self.push_insn(Insn { op: Op::Je, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + self.push_insn(Insn::Je(target)); } pub fn jl(&mut self, target: Target) { - self.push_insn(Insn { op: Op::Jl, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + self.push_insn(Insn::Jl(target)); } pub fn jmp(&mut self, target: Target) { - self.push_insn(Insn { op: Op::Jmp, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + self.push_insn(Insn::Jmp(target)); } pub fn jmp_opnd(&mut self, opnd: Opnd) { - self.push_insn(Insn { op: Op::JmpOpnd, opnds: vec![opnd], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::JmpOpnd(opnd)); } pub fn jne(&mut self, target: Target) { - self.push_insn(Insn { op: Op::Jne, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + self.push_insn(Insn::Jne(target)); } pub fn jnz(&mut self, target: Target) { - self.push_insn(Insn { op: Op::Jnz, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + self.push_insn(Insn::Jnz(target)); } pub fn jo(&mut self, target: Target) { - self.push_insn(Insn { op: Op::Jo, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + self.push_insn(Insn::Jo(target)); } pub fn jz(&mut self, target: Target) { - self.push_insn(Insn { op: Op::Jz, opnds: vec![], out: Opnd::None, text: None, target: Some(target), pos_marker: None }); + self.push_insn(Insn::Jz(target)); } #[must_use] pub fn lea(&mut self, opnd: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); - self.push_insn(Insn { op: Op::Lea, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Lea { opnd, out }); out } #[must_use] pub fn lea_label(&mut self, target: Target) -> Opnd { let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS); - self.push_insn(Insn { op: Op::LeaLabel, opnds: vec![], out, text: None, target: Some(target), pos_marker: None }); + self.push_insn(Insn::LeaLabel { target, out }); out } #[must_use] pub fn live_reg_opnd(&mut self, opnd: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); - self.push_insn(Insn { op: Op::LiveReg, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::LiveReg { opnd, out }); out } #[must_use] pub fn load(&mut self, opnd: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); - self.push_insn(Insn { op: Op::Load, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Load { opnd, out }); out } #[must_use] pub fn load_sext(&mut self, opnd: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); - self.push_insn(Insn { op: Op::LoadSExt, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::LoadSExt { opnd, out }); out } #[must_use] pub fn lshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); - self.push_insn(Insn { op: Op::LShift, opnds: vec![opnd, shift], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::LShift { opnd, shift, out }); out } pub fn mov(&mut self, dest: Opnd, src: Opnd) { - self.push_insn(Insn { op: Op::Mov, opnds: vec![dest, src], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Mov { dest, src }); } #[must_use] pub fn not(&mut self, opnd: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); - self.push_insn(Insn { op: Op::Not, opnds: vec![opnd], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Not { opnd, out }); out } #[must_use] pub fn or(&mut self, left: Opnd, right: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); - self.push_insn(Insn { op: Op::Or, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Or { left, right, out }); out } pub fn pad_entry_exit(&mut self) { - self.push_insn(Insn { op: Op::PadEntryExit, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::PadEntryExit); } //pub fn pos_marker(&mut self, marker_fn: F) pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr) + 'static) { - self.push_insn(Insn { op: Op::PosMarker, opnds: vec![], out: Opnd::None, text: None, target: None, pos_marker: Some(Box::new(marker_fn)) }); + self.push_insn(Insn::PosMarker(Box::new(marker_fn))); } #[must_use] pub fn rshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); - self.push_insn(Insn { op: Op::RShift, opnds: vec![opnd, shift], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::RShift { opnd, shift, out }); out } pub fn store(&mut self, dest: Opnd, src: Opnd) { - self.push_insn(Insn { op: Op::Store, opnds: vec![dest, src], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Store { dest, src }); } #[must_use] pub fn sub(&mut self, left: Opnd, right: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); - self.push_insn(Insn { op: Op::Sub, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Sub { left, right, out }); out } pub fn test(&mut self, left: Opnd, right: Opnd) { - self.push_insn(Insn { op: Op::Test, opnds: vec![left, right], out: Opnd::None, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Test { left, right }); } #[must_use] pub fn urshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); - self.push_insn(Insn { op: Op::URShift, opnds: vec![opnd, shift], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::URShift { opnd, shift, out }); out } + /// Add a label at the current position + pub fn write_label(&mut self, target: Target) { + assert!(target.unwrap_label_idx() < self.label_names.len()); + self.push_insn(Insn::Label(target)); + } + #[must_use] pub fn xor(&mut self, left: Opnd, right: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); - self.push_insn(Insn { op: Op::Xor, opnds: vec![left, right], out, text: None, target: None, pos_marker: None }); + self.push_insn(Insn::Xor { left, right, out }); out } } @@ -1444,7 +1498,7 @@ mod tests { #[test] fn test_opnd_iter() { - let insn = Insn { op: Op::Add, opnds: vec![Opnd::None, Opnd::None], out: Opnd::None, text: None, target: None, pos_marker: None }; + let insn = Insn::Add { left: Opnd::None, right: Opnd::None, out: Opnd::None }; let mut opnd_iter = insn.opnd_iter(); assert!(matches!(opnd_iter.next(), Some(Opnd::None))); @@ -1455,7 +1509,7 @@ mod tests { #[test] fn test_opnd_iter_mut() { - let mut insn = Insn { op: Op::Add, opnds: vec![Opnd::None, Opnd::None], out: Opnd::None, text: None, target: None, pos_marker: None }; + let mut insn = Insn::Add { left: Opnd::None, right: Opnd::None, out: Opnd::None }; let mut opnd_iter = insn.opnd_iter_mut(); assert!(matches!(opnd_iter.next(), Some(Opnd::None))); diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index b89b7eb6485d71..08e8849b4d5acd 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -315,9 +315,9 @@ fn test_draining_iterator() { while let Some((index, insn)) = iter.next_unmapped() { match index { - 0 => assert_eq!(insn.op, Op::Load), - 1 => assert_eq!(insn.op, Op::Store), - 2 => assert_eq!(insn.op, Op::Add), + 0 => assert!(matches!(insn, Insn::Load { .. })), + 1 => assert!(matches!(insn, Insn::Store { .. })), + 2 => assert!(matches!(insn, Insn::Add { .. })), _ => panic!("Unexpected instruction index"), }; } @@ -337,7 +337,7 @@ fn test_lookback_iterator() { if index > 0 { let opnd_iter = iter.get_previous().unwrap().opnd_iter(); assert_eq!(opnd_iter.take(1).next(), Some(&Opnd::None)); - assert_eq!(insn.op, Op::Store); + assert!(matches!(insn, Insn::Store { .. })); } } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 0c994144d067be..bda7dc4c066e96 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -69,6 +69,13 @@ impl From for X86Opnd { } } +/// Also implement going from a reference to an operand for convenience. +impl From<&Opnd> for X86Opnd { + fn from(opnd: &Opnd) -> Self { + X86Opnd::from(*opnd) + } +} + impl Assembler { // A special scratch register for intermediate processing. @@ -96,11 +103,47 @@ impl Assembler /// Split IR instructions for the x86 platform fn x86_split(mut self) -> Assembler { + fn split_arithmetic_opnds(asm: &mut Assembler, live_ranges: &Vec, index: usize, unmapped_opnds: &Vec, left: &Opnd, right: &Opnd) -> (Opnd, Opnd) { + match (unmapped_opnds[0], unmapped_opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + (asm.load(*left), asm.load(*right)) + }, + (Opnd::Mem(_), Opnd::UImm(value)) => { + // 32-bit values will be sign-extended + if imm_num_bits(value as i64) > 32 { + (asm.load(*left), asm.load(*right)) + } else { + (asm.load(*left), *right) + } + }, + (Opnd::Mem(_), Opnd::Imm(value)) => { + if imm_num_bits(value) > 32 { + (asm.load(*left), asm.load(*right)) + } else { + (asm.load(*left), *right) + } + }, + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { idx, .. }, _) => { + if live_ranges[idx] > index { + (asm.load(*left), *right) + } else { + (*left, *right) + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + (asm.load(*left), *right) + }, + _ => (*left, *right) + } + } + let live_ranges: Vec = take(&mut self.live_ranges); let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); let mut iterator = self.into_draining_iter(); - while let Some((index, insn)) = iterator.next_unmapped() { + while let Some((index, mut insn)) = iterator.next_unmapped() { // When we're iterating through the instructions with x86_split, we // need to know the previous live ranges in order to tell if a // register lasts beyond the current instruction. So instead of @@ -122,8 +165,15 @@ impl Assembler // - Most instructions can't be encoded with 64-bit immediates. // - We look for Op::Load specifically when emiting to keep GC'ed // VALUEs alive. This is a sort of canonicalization. - let mapped_opnds: Vec = insn.opnd_iter().map(|opnd| { - if insn.op == Op::Load { + let mut unmapped_opnds: Vec = vec![]; + + let is_load = matches!(insn, Insn::Load { .. }); + let mut opnd_iter = insn.opnd_iter_mut(); + + while let Some(opnd) = opnd_iter.next() { + unmapped_opnds.push(*opnd); + + *opnd = if is_load { iterator.map_opnd(*opnd) } else if let Opnd::Value(value) = opnd { // Since mov(mem64, imm32) sign extends, as_i64() makes sure @@ -136,130 +186,137 @@ impl Assembler } else { iterator.map_opnd(*opnd) } - }).collect(); + } - match insn { - Insn { op: Op::Add | Op::Sub | Op::And | Op::Cmp | Op::Or | Op::Test | Op::Xor, opnds, target, text, pos_marker, .. } => { - let (opnd0, opnd1) = match (opnds[0], opnds[1]) { - (Opnd::Mem(_), Opnd::Mem(_)) => { - (asm.load(mapped_opnds[0]), asm.load(mapped_opnds[1])) - }, - (Opnd::Mem(_), Opnd::UImm(value)) => { - // 32-bit values will be sign-extended - if imm_num_bits(value as i64) > 32 { - (asm.load(mapped_opnds[0]), asm.load(mapped_opnds[1])) - } else { - (asm.load(mapped_opnds[0]), mapped_opnds[1]) - } - }, - (Opnd::Mem(_), Opnd::Imm(value)) => { - if imm_num_bits(value) > 32 { - (asm.load(mapped_opnds[0]), asm.load(mapped_opnds[1])) - } else { - (asm.load(mapped_opnds[0]), mapped_opnds[1]) - } - }, - // Instruction output whose live range spans beyond this instruction - (Opnd::InsnOut { idx, .. }, _) => { - if live_ranges[idx] > index { - (asm.load(mapped_opnds[0]), mapped_opnds[1]) - } else { - (mapped_opnds[0], mapped_opnds[1]) - } - }, - // We have to load memory operands to avoid corrupting them - (Opnd::Mem(_) | Opnd::Reg(_), _) => { - (asm.load(mapped_opnds[0]), mapped_opnds[1]) - }, - _ => (mapped_opnds[0], mapped_opnds[1]) - }; + match &mut insn { + Insn::Add { left, right, out } | + Insn::Sub { left, right, out } | + Insn::And { left, right, out } | + Insn::Or { left, right, out } | + Insn::Xor { left, right, out } => { + let (split_left, split_right) = split_arithmetic_opnds(&mut asm, &live_ranges, index, &unmapped_opnds, left, right); + + *left = split_left; + *right = split_right; + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right])); - asm.push_insn_parts(insn.op, vec![opnd0, opnd1], target, text, pos_marker); + asm.push_insn(insn); + }, + Insn::Cmp { left, right } | + Insn::Test { left, right } => { + let (split_left, split_right) = split_arithmetic_opnds(&mut asm, &live_ranges, index, &unmapped_opnds, left, right); + + *left = split_left; + *right = split_right; + + asm.push_insn(insn); }, // These instructions modify their input operand in-place, so we // may need to load the input value to preserve it - Insn { op: Op::LShift | Op::RShift | Op::URShift, opnds, target, text, pos_marker, .. } => { - let (opnd0, opnd1) = match (opnds[0], opnds[1]) { + Insn::LShift { opnd, shift, out } | + Insn::RShift { opnd, shift, out } | + Insn::URShift { opnd, shift, out } => { + match (&unmapped_opnds[0], &unmapped_opnds[1]) { // Instruction output whose live range spans beyond this instruction (Opnd::InsnOut { idx, .. }, _) => { - if live_ranges[idx] > index { - (asm.load(mapped_opnds[0]), mapped_opnds[1]) - } else { - (mapped_opnds[0], mapped_opnds[1]) + if live_ranges[*idx] > index { + *opnd = asm.load(*opnd); } }, // We have to load memory operands to avoid corrupting them (Opnd::Mem(_) | Opnd::Reg(_), _) => { - (asm.load(mapped_opnds[0]), mapped_opnds[1]) + *opnd = asm.load(*opnd); }, - _ => (mapped_opnds[0], mapped_opnds[1]) + _ => {} }; - asm.push_insn_parts(insn.op, vec![opnd0, opnd1], target, text, pos_marker); - }, - Insn { op: Op::CSelZ | Op::CSelNZ | Op::CSelE | Op::CSelNE | Op::CSelL | Op::CSelLE | Op::CSelG | Op::CSelGE, target, text, pos_marker, .. } => { - let new_opnds = mapped_opnds.into_iter().map(|opnd| { - match opnd { - Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, - _ => asm.load(opnd) + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*opnd, *shift])); + asm.push_insn(insn); + }, + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } | + Insn::CSelL { truthy, falsy, out } | + Insn::CSelLE { truthy, falsy, out } | + Insn::CSelG { truthy, falsy, out } | + Insn::CSelGE { truthy, falsy, out } => { + match truthy { + Opnd::Reg(_) | Opnd::InsnOut { .. } => {}, + _ => { + *truthy = asm.load(*truthy); } - }).collect(); + }; - asm.push_insn_parts(insn.op, new_opnds, target, text, pos_marker); + match falsy { + Opnd::Reg(_) | Opnd::InsnOut { .. } => {}, + _ => { + *falsy = asm.load(*falsy); + } + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy])); + asm.push_insn(insn); }, - Insn { op: Op::Mov, .. } => { - match (mapped_opnds[0], mapped_opnds[1]) { + Insn::Mov { dest, src } => { + match (&dest, &src) { (Opnd::Mem(_), Opnd::Mem(_)) => { // We load opnd1 because for mov, opnd0 is the output - let opnd1 = asm.load(mapped_opnds[1]); - asm.mov(mapped_opnds[0], opnd1); + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); }, (Opnd::Mem(_), Opnd::UImm(value)) => { // 32-bit values will be sign-extended - if imm_num_bits(value as i64) > 32 { - let opnd1 = asm.load(mapped_opnds[1]); - asm.mov(mapped_opnds[0], opnd1); + if imm_num_bits(*value as i64) > 32 { + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); } else { - asm.mov(mapped_opnds[0], mapped_opnds[1]); + asm.mov(*dest, *src); } }, (Opnd::Mem(_), Opnd::Imm(value)) => { - if imm_num_bits(value) > 32 { - let opnd1 = asm.load(mapped_opnds[1]); - asm.mov(mapped_opnds[0], opnd1); + if imm_num_bits(*value) > 32 { + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); } else { - asm.mov(mapped_opnds[0], mapped_opnds[1]); + asm.mov(*dest, *src); } }, _ => { - asm.mov(mapped_opnds[0], mapped_opnds[1]); + asm.mov(*dest, *src); } } }, - Insn { op: Op::Not, opnds, .. } => { - let opnd0 = match opnds[0] { + Insn::Not { opnd, .. } => { + let opnd0 = match unmapped_opnds[0] { // If we have an instruction output whose live range // spans beyond this instruction, we have to load it. Opnd::InsnOut { idx, .. } => { if live_ranges[idx] > index { - asm.load(mapped_opnds[0]) + asm.load(*opnd) } else { - mapped_opnds[0] + *opnd } }, // We have to load memory and register operands to avoid // corrupting them. Opnd::Mem(_) | Opnd::Reg(_) => { - asm.load(mapped_opnds[0]) + asm.load(*opnd) }, // Otherwise we can just reuse the existing operand. - _ => mapped_opnds[0] + _ => *opnd }; asm.not(opnd0); }, _ => { - asm.push_insn_parts(insn.op, mapped_opnds, insn.target, insn.text, insn.pos_marker); + if insn.out_opnd().is_some() { + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + let out = insn.out_opnd_mut().unwrap(); + *out = asm.next_opnd_out(out_num_bits); + } + + asm.push_insn(insn); } }; @@ -281,26 +338,24 @@ impl Assembler let start_write_pos = cb.get_write_pos(); for insn in &self.insns { match insn { - Insn { op: Op::Comment, text, .. } => { + Insn::Comment(text) => { if cfg!(feature = "asm_comments") { - cb.add_comment(text.as_ref().unwrap()); + cb.add_comment(text); } }, // Write the label at the current position - Insn { op: Op::Label, target, .. } => { - cb.write_label(target.unwrap().unwrap_label_idx()); + Insn::Label(target) => { + cb.write_label(target.unwrap_label_idx()); }, // Report back the current position in the generated code - Insn { op: Op::PosMarker, pos_marker, .. } => { - let pos = cb.get_write_ptr(); - let pos_marker_fn = pos_marker.as_ref().unwrap(); - pos_marker_fn(pos); + Insn::PosMarker(pos_marker) => { + pos_marker(cb.get_write_ptr()); }, - Insn { op: Op::BakeString, text, .. } => { - for byte in text.as_ref().unwrap().as_bytes() { + Insn::BakeString(text) => { + for byte in text.as_bytes() { cb.write_byte(*byte); } @@ -309,55 +364,55 @@ impl Assembler cb.write_byte(0); }, - Insn { op: Op::Add, opnds, .. } => { - add(cb, opnds[0].into(), opnds[1].into()) + Insn::Add { left, right, .. } => { + add(cb, left.into(), right.into()) }, - Insn { op: Op::FrameSetup, .. } => {}, - Insn { op: Op::FrameTeardown, .. } => {}, + Insn::FrameSetup => {}, + Insn::FrameTeardown => {}, - Insn { op: Op::Sub, opnds, .. } => { - sub(cb, opnds[0].into(), opnds[1].into()) + Insn::Sub { left, right, .. } => { + sub(cb, left.into(), right.into()) }, - Insn { op: Op::And, opnds, .. } => { - and(cb, opnds[0].into(), opnds[1].into()) + Insn::And { left, right, .. } => { + and(cb, left.into(), right.into()) }, - Insn { op: Op::Or, opnds, .. } => { - or(cb, opnds[0].into(), opnds[1].into()); + Insn::Or { left, right, .. } => { + or(cb, left.into(), right.into()); }, - Insn { op: Op::Xor, opnds, .. } => { - xor(cb, opnds[0].into(), opnds[1].into()); + Insn::Xor { left, right, .. } => { + xor(cb, left.into(), right.into()); }, - Insn { op: Op::Not, opnds, .. } => { - not(cb, opnds[0].into()); + Insn::Not { opnd, .. } => { + not(cb, opnd.into()); }, - Insn { op: Op::LShift, opnds, .. } => { - shl(cb, opnds[0].into(), opnds[1].into()) + Insn::LShift { opnd, shift , ..} => { + shl(cb, opnd.into(), shift.into()) }, - Insn { op: Op::RShift, opnds, .. } => { - sar(cb, opnds[0].into(), opnds[1].into()) + Insn::RShift { opnd, shift , ..} => { + sar(cb, opnd.into(), shift.into()) }, - Insn { op: Op::URShift, opnds, .. } => { - shr(cb, opnds[0].into(), opnds[1].into()) + Insn::URShift { opnd, shift, .. } => { + shr(cb, opnd.into(), shift.into()) }, - Insn { op: Op::Store, opnds, .. } => { - mov(cb, opnds[0].into(), opnds[1].into()); + Insn::Store { dest, src } => { + mov(cb, dest.into(), src.into()); }, // This assumes only load instructions can contain references to GC'd Value operands - Insn { op: Op::Load, opnds, out, .. } => { - mov(cb, (*out).into(), opnds[0].into()); + Insn::Load { opnd, out } => { + mov(cb, out.into(), opnd.into()); // If the value being loaded is a heap object - if let Opnd::Value(val) = opnds[0] { + if let Opnd::Value(val) = opnd { if !val.special_const_p() { // The pointer immediate is encoded as the last part of the mov written out let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); @@ -366,45 +421,45 @@ impl Assembler } }, - Insn { op: Op::LoadSExt, opnds, out, .. } => { - movsx(cb, (*out).into(), opnds[0].into()); + Insn::LoadSExt { opnd, out } => { + movsx(cb, out.into(), opnd.into()); }, - Insn { op: Op::Mov, opnds, .. } => { - mov(cb, opnds[0].into(), opnds[1].into()); + Insn::Mov { dest, src } => { + mov(cb, dest.into(), src.into()); }, // Load effective address - Insn { op: Op::Lea, opnds, out, .. } => { - lea(cb, (*out).into(), opnds[0].into()); + Insn::Lea { opnd, out } => { + lea(cb, out.into(), opnd.into()); }, // Load relative address - Insn { op: Op::LeaLabel, out, target, .. } => { - let label_idx = target.unwrap().unwrap_label_idx(); + Insn::LeaLabel { target, out } => { + let label_idx = target.unwrap_label_idx(); cb.label_ref(label_idx, 7, |cb, src_addr, dst_addr| { let disp = dst_addr - src_addr; lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); }); - mov(cb, (*out).into(), Self::SCRATCH0); + mov(cb, out.into(), Self::SCRATCH0); }, // Push and pop to/from the C stack - Insn { op: Op::CPush, opnds, .. } => { - push(cb, opnds[0].into()); + Insn::CPush(opnd) => { + push(cb, opnd.into()); }, - Insn { op: Op::CPop, out, .. } => { - pop(cb, (*out).into()); + Insn::CPop { out } => { + pop(cb, out.into()); }, - Insn { op: Op::CPopInto, opnds, .. } => { - pop(cb, opnds[0].into()); + Insn::CPopInto(opnd) => { + pop(cb, opnd.into()); }, // Push and pop to the C stack all caller-save registers and the // flags - Insn { op: Op::CPushAll, .. } => { + Insn::CPushAll => { let regs = Assembler::get_caller_save_regs(); for reg in regs { @@ -412,7 +467,7 @@ impl Assembler } pushfq(cb); }, - Insn { op: Op::CPopAll, .. } => { + Insn::CPopAll => { let regs = Assembler::get_caller_save_regs(); popfq(cb); @@ -422,7 +477,7 @@ impl Assembler }, // C function call - Insn { op: Op::CCall, opnds, target, .. } => { + Insn::CCall { opnds, target, .. } => { // Temporary assert!(opnds.len() <= _C_ARG_OPNDS.len()); @@ -431,92 +486,92 @@ impl Assembler mov(cb, X86Opnd::Reg(_C_ARG_OPNDS[idx].unwrap_reg()), opnds[idx].into()); } - let ptr = target.unwrap().unwrap_fun_ptr(); + let ptr = target.unwrap_fun_ptr(); call_ptr(cb, RAX, ptr); }, - Insn { op: Op::CRet, opnds, .. } => { + Insn::CRet(opnd) => { // TODO: bias allocation towards return register - if opnds[0] != Opnd::Reg(C_RET_REG) { - mov(cb, RAX, opnds[0].into()); + if *opnd != Opnd::Reg(C_RET_REG) { + mov(cb, RAX, opnd.into()); } ret(cb); }, // Compare - Insn { op: Op::Cmp, opnds, .. } => { - cmp(cb, opnds[0].into(), opnds[1].into()); + Insn::Cmp { left, right } => { + cmp(cb, left.into(), right.into()); } // Test and set flags - Insn { op: Op::Test, opnds, .. } => { - test(cb, opnds[0].into(), opnds[1].into()); + Insn::Test { left, right } => { + test(cb, left.into(), right.into()); } - Insn { op: Op::JmpOpnd, opnds, .. } => { - jmp_rm(cb, opnds[0].into()); + Insn::JmpOpnd(opnd) => { + jmp_rm(cb, opnd.into()); } // Conditional jump to a label - Insn { op: Op::Jmp, target, .. } => { - match target.unwrap() { + Insn::Jmp(target) => { + match *target { Target::CodePtr(code_ptr) => jmp_ptr(cb, code_ptr), Target::Label(label_idx) => jmp_label(cb, label_idx), _ => unreachable!() } } - Insn { op: Op::Je, target, .. } => { - match target.unwrap() { + Insn::Je(target) => { + match *target { Target::CodePtr(code_ptr) => je_ptr(cb, code_ptr), Target::Label(label_idx) => je_label(cb, label_idx), _ => unreachable!() } } - Insn { op: Op::Jne, target, .. } => { - match target.unwrap() { + Insn::Jne(target) => { + match *target { Target::CodePtr(code_ptr) => jne_ptr(cb, code_ptr), Target::Label(label_idx) => jne_label(cb, label_idx), _ => unreachable!() } } - Insn { op: Op::Jl, target, .. } => { - match target.unwrap() { + Insn::Jl(target) => { + match *target { Target::CodePtr(code_ptr) => jl_ptr(cb, code_ptr), Target::Label(label_idx) => jl_label(cb, label_idx), _ => unreachable!() } }, - Insn { op: Op::Jbe, target, .. } => { - match target.unwrap() { + Insn::Jbe(target) => { + match *target { Target::CodePtr(code_ptr) => jbe_ptr(cb, code_ptr), Target::Label(label_idx) => jbe_label(cb, label_idx), _ => unreachable!() } }, - Insn { op: Op::Jz, target, .. } => { - match target.unwrap() { + Insn::Jz(target) => { + match *target { Target::CodePtr(code_ptr) => jz_ptr(cb, code_ptr), Target::Label(label_idx) => jz_label(cb, label_idx), _ => unreachable!() } } - Insn { op: Op::Jnz, target, .. } => { - match target.unwrap() { + Insn::Jnz(target) => { + match *target { Target::CodePtr(code_ptr) => jnz_ptr(cb, code_ptr), Target::Label(label_idx) => jnz_label(cb, label_idx), _ => unreachable!() } } - Insn { op: Op::Jo, target, .. } => { - match target.unwrap() { + Insn::Jo(target) => { + match *target { Target::CodePtr(code_ptr) => jo_ptr(cb, code_ptr), Target::Label(label_idx) => jo_label(cb, label_idx), _ => unreachable!() @@ -524,49 +579,49 @@ impl Assembler } // Atomically increment a counter at a given memory location - Insn { op: Op::IncrCounter, opnds, .. } => { - assert!(matches!(opnds[0], Opnd::Mem(_))); - assert!(matches!(opnds[1], Opnd::UImm(_) | Opnd::Imm(_) ) ); + Insn::IncrCounter { mem, value } => { + assert!(matches!(mem, Opnd::Mem(_))); + assert!(matches!(value, Opnd::UImm(_) | Opnd::Imm(_) ) ); write_lock_prefix(cb); - add(cb, opnds[0].into(), opnds[1].into()); + add(cb, mem.into(), value.into()); }, - Insn { op: Op::Breakpoint, .. } => int3(cb), + Insn::Breakpoint => int3(cb), - Insn { op: Op::CSelZ, opnds, out, .. } => { - mov(cb, (*out).into(), opnds[0].into()); - cmovnz(cb, (*out).into(), opnds[1].into()); + Insn::CSelZ { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovnz(cb, out.into(), falsy.into()); }, - Insn { op: Op::CSelNZ, opnds, out, .. } => { - mov(cb, (*out).into(), opnds[0].into()); - cmovz(cb, (*out).into(), opnds[1].into()); + Insn::CSelNZ { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovz(cb, out.into(), falsy.into()); }, - Insn { op: Op::CSelE, opnds, out, .. } => { - mov(cb, (*out).into(), opnds[0].into()); - cmovne(cb, (*out).into(), opnds[1].into()); + Insn::CSelE { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovne(cb, out.into(), falsy.into()); }, - Insn { op: Op::CSelNE, opnds, out, .. } => { - mov(cb, (*out).into(), opnds[0].into()); - cmove(cb, (*out).into(), opnds[1].into()); + Insn::CSelNE { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmove(cb, out.into(), falsy.into()); }, - Insn { op: Op::CSelL, opnds, out, .. } => { - mov(cb, (*out).into(), opnds[0].into()); - cmovge(cb, (*out).into(), opnds[1].into()); + Insn::CSelL { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovge(cb, out.into(), falsy.into()); }, - Insn { op: Op::CSelLE, opnds, out, .. } => { - mov(cb, (*out).into(), opnds[0].into()); - cmovg(cb, (*out).into(), opnds[1].into()); + Insn::CSelLE { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovg(cb, out.into(), falsy.into()); }, - Insn { op: Op::CSelG, opnds, out, .. } => { - mov(cb, (*out).into(), opnds[0].into()); - cmovle(cb, (*out).into(), opnds[1].into()); + Insn::CSelG { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovle(cb, out.into(), falsy.into()); }, - Insn { op: Op::CSelGE, opnds, out, .. } => { - mov(cb, (*out).into(), opnds[0].into()); - cmovl(cb, (*out).into(), opnds[1].into()); + Insn::CSelGE { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovl(cb, out.into(), falsy.into()); } - Insn { op: Op::LiveReg, .. } => (), // just a reg alloc signal, no code - Insn { op: Op::PadEntryExit, .. } => { + Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::PadEntryExit => { // We assume that our Op::Jmp usage that gets invalidated is <= 5 let code_size: u32 = (cb.get_write_pos() - start_write_pos).try_into().unwrap(); if code_size < 5 { @@ -578,7 +633,7 @@ impl Assembler // we feed to the backend could get lowered into other // instructions. So it's possible that some of our backend // instructions can never make it to the emit stage. - _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn.op) + _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn) }; } From d5fe9e1d9aabacb7bafe97dbbc63b0272be84dee Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 18 Aug 2022 13:00:54 -0700 Subject: [PATCH 499/546] Run test-all with RUBY_YJIT_ENABLE=1 on CI (https://github.com/Shopify/ruby/pull/418) --- .cirrus.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 21f3e386513bf3..a00946e90ce9e6 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -172,4 +172,5 @@ yjit_task: # Run John's YJIT instruction tests, and make sure we can load the test-all runner test_yjit_script: source $HOME/.cargo/env && make -j test-all TESTS='test/ruby/test_method.rb test/ruby/test_yjit.rb' RUN_OPTS="--yjit-call-threshold=1" - # TODO: check that we can we run all of test-all successfully + # Run test-all with the default call threshold + test_yjit_script: source $HOME/.cargo/env && RUBY_YJIT_ENABLE=1 make -j test-all From 932885244ecb62b22904b9d9842fb7f2be5b7d00 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 18 Aug 2022 15:40:15 -0400 Subject: [PATCH 500/546] Better variable name, no must_use on ccall (https://github.com/Shopify/ruby/pull/424) --- yjit/src/backend/arm64/mod.rs | 4 ++-- yjit/src/backend/ir.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 60cdf2b9d1eaf3..3f1bbf99b02308 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -234,7 +234,7 @@ impl Assembler // such that only the Op::Load instruction needs to handle that // case. If the values aren't heap objects then we'll treat them as // if they were just unsigned integer. - let skip_load = matches!(insn, Insn::Load { .. }); + let is_load = matches!(insn, Insn::Load { .. }); let mut opnd_iter = insn.opnd_iter_mut(); while let Some(opnd) = opnd_iter.next() { @@ -242,7 +242,7 @@ impl Assembler Opnd::Value(value) => { if value.special_const_p() { *opnd = Opnd::UImm(value.as_u64()); - } else if !skip_load { + } else if !is_load { *opnd = asm.load(*opnd); } }, diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index fe525cf31d3d70..4a7bea8dac50eb 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -1234,7 +1234,6 @@ impl Assembler { self.push_insn(Insn::Breakpoint); } - #[must_use] pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); self.push_insn(Insn::CCall { target: Target::FunPtr(fptr), opnds, out }); From 29bda0ff8124ccfe660bfde43b59acd825f2d014 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 18 Aug 2022 15:41:41 -0400 Subject: [PATCH 501/546] Use shorter syntax for the same pattern (https://github.com/Shopify/ruby/pull/425) --- yjit/src/backend/ir.rs | 84 +++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 4a7bea8dac50eb..f01ab398da3243 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -662,27 +662,27 @@ impl<'a> Iterator for InsnOpndIterator<'a> { _ => None } }, - Insn::Add { left: opnd0 @ _, right: opnd1 @ _, .. } | - Insn::And { left: opnd0 @ _, right: opnd1 @ _, .. } | - Insn::Cmp { left: opnd0 @ _, right: opnd1 @ _ } | - Insn::CSelE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelG { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelGE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelL { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelLE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelNE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelNZ { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelZ { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::IncrCounter { mem: opnd0 @ _, value: opnd1 @ _, .. } | - Insn::LShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | - Insn::Mov { dest: opnd0 @ _, src: opnd1 @ _ } | - Insn::Or { left: opnd0 @ _, right: opnd1 @ _, .. } | - Insn::RShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | - Insn::Store { dest: opnd0 @ _, src: opnd1 @ _ } | - Insn::Sub { left: opnd0 @ _, right: opnd1 @ _, .. } | - Insn::Test { left: opnd0 @ _, right: opnd1 @ _ } | - Insn::URShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | - Insn::Xor { left: opnd0 @ _, right: opnd1 @ _, .. } => { + Insn::Add { left: opnd0, right: opnd1, .. } | + Insn::And { left: opnd0, right: opnd1, .. } | + Insn::Cmp { left: opnd0, right: opnd1 } | + Insn::CSelE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelG { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelGE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelL { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelLE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::IncrCounter { mem: opnd0, value: opnd1, .. } | + Insn::LShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Mov { dest: opnd0, src: opnd1 } | + Insn::Or { left: opnd0, right: opnd1, .. } | + Insn::RShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Store { dest: opnd0, src: opnd1 } | + Insn::Sub { left: opnd0, right: opnd1, .. } | + Insn::Test { left: opnd0, right: opnd1 } | + Insn::URShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Xor { left: opnd0, right: opnd1, .. } => { match self.idx { 0 => { self.idx += 1; @@ -758,27 +758,27 @@ impl<'a> InsnOpndMutIterator<'a> { _ => None } }, - Insn::Add { left: opnd0 @ _, right: opnd1 @ _, .. } | - Insn::And { left: opnd0 @ _, right: opnd1 @ _, .. } | - Insn::Cmp { left: opnd0 @ _, right: opnd1 @ _ } | - Insn::CSelE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelG { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelGE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelL { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelLE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelNE { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelNZ { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::CSelZ { truthy: opnd0 @ _, falsy: opnd1 @ _, .. } | - Insn::IncrCounter { mem: opnd0 @ _, value: opnd1 @ _, .. } | - Insn::LShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | - Insn::Mov { dest: opnd0 @ _, src: opnd1 @ _ } | - Insn::Or { left: opnd0 @ _, right: opnd1 @ _, .. } | - Insn::RShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | - Insn::Store { dest: opnd0 @ _, src: opnd1 @ _ } | - Insn::Sub { left: opnd0 @ _, right: opnd1 @ _, .. } | - Insn::Test { left: opnd0 @ _, right: opnd1 @ _ } | - Insn::URShift { opnd: opnd0 @ _, shift: opnd1 @ _, .. } | - Insn::Xor { left: opnd0 @ _, right: opnd1 @ _, .. } => { + Insn::Add { left: opnd0, right: opnd1, .. } | + Insn::And { left: opnd0, right: opnd1, .. } | + Insn::Cmp { left: opnd0, right: opnd1 } | + Insn::CSelE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelG { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelGE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelL { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelLE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::IncrCounter { mem: opnd0, value: opnd1, .. } | + Insn::LShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Mov { dest: opnd0, src: opnd1 } | + Insn::Or { left: opnd0, right: opnd1, .. } | + Insn::RShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Store { dest: opnd0, src: opnd1 } | + Insn::Sub { left: opnd0, right: opnd1, .. } | + Insn::Test { left: opnd0, right: opnd1 } | + Insn::URShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Xor { left: opnd0, right: opnd1, .. } => { match self.idx { 0 => { self.idx += 1; From 93c5a5f02373d9ebad3a158fd783886bc3f7bf7d Mon Sep 17 00:00:00 2001 From: Noah Gibbs Date: Fri, 19 Aug 2022 15:31:14 +0100 Subject: [PATCH 502/546] Fix and re-enable String to_s, << and unary plus (https://github.com/Shopify/ruby/pull/429) --- yjit/src/codegen.rs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index b94ddc32d557da..e23171d2a06b26 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3697,7 +3697,6 @@ fn jit_rb_str_bytesize( true } -/* // Codegen for rb_str_to_s() // When String#to_s is called on a String instance, the method returns self and // most of the overhead comes from setting up the method call. We observed that @@ -3705,7 +3704,7 @@ fn jit_rb_str_bytesize( fn jit_rb_str_to_s( _jit: &mut JITState, _ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3714,14 +3713,13 @@ fn jit_rb_str_to_s( known_recv_class: *const VALUE, ) -> bool { if !known_recv_class.is_null() && unsafe { *known_recv_class == rb_cString } { - add_comment(cb, "to_s on plain string"); + asm.comment("to_s on plain string"); // The method returns the receiver, which is already on the stack. // No stack movement. return true; } false } -*/ // Codegen for rb_str_concat() -- *not* String#concat // Frequently strings are concatenated using "out_str << next_str". @@ -3750,8 +3748,7 @@ fn jit_rb_str_concat( let side_exit = get_side_exit(jit, ocb, ctx); // Guard that the argument is of class String at runtime. - let insn_opnd = StackOpnd(0); - let arg_type = ctx.get_opnd_type(insn_opnd); + let arg_type = ctx.get_opnd_type(StackOpnd(0)); let concat_arg = ctx.stack_pop(1); let recv = ctx.stack_pop(1); @@ -3765,12 +3762,8 @@ fn jit_rb_str_concat( asm.jnz(side_exit.into()); asm.cmp(arg_opnd, Qnil.into()); asm.jbe(side_exit.into()); - - ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); } guard_object_is_string(asm, arg_opnd, side_exit); - // We know this is a string-or-subclass, but not necessarily that it's a ::String - ctx.upgrade_opnd_type(insn_opnd, Type::TString); } // Test if string encodings differ. If different, use rb_str_append. If the same, @@ -6138,11 +6131,11 @@ impl CodegenGlobals { self.yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal); // rb_str_to_s() methods in string.c - //self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); - //self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); + self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); + self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); self.yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize); - //self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); - //self.yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); + self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); + self.yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); // Thread.current self.yjit_reg_method( From d433eb957bf3826e7aea97c12f0cdc9fcb9a1b43 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 19 Aug 2022 14:00:28 -0700 Subject: [PATCH 503/546] Run tests with --yjit-call-threshold=1 on arm64 (https://github.com/Shopify/ruby/pull/426) --- .cirrus.yml | 49 ++++++++----------------------------------------- 1 file changed, 8 insertions(+), 41 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index a00946e90ce9e6..708a329e5751e1 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -125,43 +125,6 @@ yjit_task: boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 - bootstrap_tests_script: | - RUST_BACKTRACE=1 ruby --disable=gems bootstraptest/runner.rb --ruby="./miniruby -I./lib -I. -I.ext/common --disable-gems --yjit-call-threshold=1 --yjit-verify-ctx" \ - bootstraptest/test_attr.rb \ - bootstraptest/test_autoload.rb \ - bootstraptest/test_block.rb \ - bootstraptest/test_class.rb \ - bootstraptest/test_constant_cache.rb \ - bootstraptest/test_env.rb \ - bootstraptest/test_eval.rb \ - bootstraptest/test_exception.rb \ - bootstraptest/test_fiber.rb \ - bootstraptest/test_finalizer.rb \ - bootstraptest/test_flip.rb \ - bootstraptest/test_flow.rb \ - bootstraptest/test_fork.rb \ - bootstraptest/test_gc.rb \ - bootstraptest/test_insns.rb \ - bootstraptest/test_io.rb \ - bootstraptest/test_jump.rb \ - bootstraptest/test_literal.rb \ - bootstraptest/test_literal_suffix.rb \ - bootstraptest/test_load.rb \ - bootstraptest/test_marshal.rb \ - bootstraptest/test_massign.rb \ - bootstraptest/test_method.rb \ - bootstraptest/test_objectspace.rb \ - bootstraptest/test_ractor.rb \ - bootstraptest/test_proc.rb \ - bootstraptest/test_string.rb \ - bootstraptest/test_struct.rb \ - bootstraptest/test_syntax.rb \ - bootstraptest/test_thread.rb \ - bootstraptest/test_yjit_30k_ifelse.rb \ - bootstraptest/test_yjit_30k_methods.rb \ - bootstraptest/test_yjit_new_backend.rb \ - bootstraptest/test_yjit_rust_port.rb \ - bootstraptest/test_yjit.rb # Check that we can do a full ruby build full_build_script: source $HOME/.cargo/env && make -j @@ -169,8 +132,12 @@ yjit_task: # Check that we can build rdoc successfully make_rdoc_script: source $HOME/.cargo/env && make -j rdoc - # Run John's YJIT instruction tests, and make sure we can load the test-all runner - test_yjit_script: source $HOME/.cargo/env && make -j test-all TESTS='test/ruby/test_method.rb test/ruby/test_yjit.rb' RUN_OPTS="--yjit-call-threshold=1" + # Check that we can run btest successfully + make_btest_script: source $HOME/.cargo/env && make -j btest RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" - # Run test-all with the default call threshold - test_yjit_script: source $HOME/.cargo/env && RUBY_YJIT_ENABLE=1 make -j test-all + # Check that we can run test-all successfully (running TestGCCompact separately until we fix its performance) + make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS='--name=!/TestGCCompact/' + test_gc_compact_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" + + # Check that we can run test-spec successfully + make_test_spec_script: source $HOME/.cargo/env && make -j test-spec RUN_OPTS="--yjit-call-threshold=1" From 54c7bc67a2d54311b77aca9233b23a9e7a1ca581 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 23 Aug 2022 13:41:22 -0400 Subject: [PATCH 504/546] Various AArch64 optimizations (https://github.com/Shopify/ruby/pull/433) * When we're storing an immediate 0 value at a memory address, we can use STUR XZR, Xd instead of loading 0 into a register and then storing that register. * When we're moving 0 into an argument register, we can use MOV Xd, XZR instead of loading the value into a register first. * In the newarray instruction, we can skip looking at the stack at all if the number of values we're using is 0. --- yjit/src/asm/arm64/mod.rs | 3 +++ yjit/src/asm/arm64/opnd.rs | 5 ++++- yjit/src/backend/arm64/mod.rs | 14 ++++++++++++-- yjit/src/codegen.rs | 11 ++++++++--- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index e5ba2f81ead4ab..fb07498ce2ad52 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -542,6 +542,9 @@ pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into() }, + (A64Opnd::Reg(rd), A64Opnd::UImm(0)) => { + LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into() + }, (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => { LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into() }, diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index a10e28945573e6..c89481fb03db2f 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -111,6 +111,9 @@ pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 }; pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 }; pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 }; +// zero register +pub const XZR_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 31 }; + // 64-bit registers pub const X0: A64Opnd = A64Opnd::Reg(X0_REG); pub const X1: A64Opnd = A64Opnd::Reg(X1_REG); @@ -143,7 +146,7 @@ pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 }); pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 }); pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 }); -pub const X31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 31 }); +pub const X31: A64Opnd = A64Opnd::Reg(XZR_REG); // 32-bit registers pub const W0: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 0 }); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 3f1bbf99b02308..08eb5efa3fb05c 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -289,7 +289,14 @@ impl Assembler // Note: the iteration order is reversed to avoid corrupting x0, // which is both the return value and first argument register for (idx, opnd) in opnds.into_iter().enumerate().rev() { - let value = split_load_operand(asm, opnd); + // If the value that we're sending is 0, then we can use + // the zero register, so in this case we'll just send + // a UImm of 0 along as the argument to the move. + let value = match opnd { + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), + _ => split_load_operand(asm, opnd) + }; + asm.mov(C_ARG_OPNDS[idx], value); } @@ -386,7 +393,10 @@ impl Assembler }; }, Insn::Mov { dest, src } => { - let value = match (dest, src) { + let value: Opnd = match (dest, src) { + // If the first operand is zero, then we can just use + // the zero register. + (Opnd::Mem(_), Opnd::UImm(0) | Opnd::Imm(0)) => Opnd::Reg(XZR_REG), // If the first operand is a memory operand, we're going // to transform this into a store instruction, so we'll // need to load this anyway. diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index e23171d2a06b26..1336fe3c57c487 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1168,9 +1168,14 @@ fn gen_newarray( // Save the PC and SP because we are allocating jit_prepare_routine_call(jit, ctx, asm); - let offset_magnitude = SIZEOF_VALUE as u32 * n; - let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); - let values_ptr = asm.lea(values_opnd); + // If n is 0, then elts is never going to be read, so we can just pass null + let values_ptr = if n == 0 { + Opnd::UImm(0) + } else { + let offset_magnitude = SIZEOF_VALUE as u32 * n; + let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); + asm.lea(values_opnd) + }; // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts); let new_ary = asm.ccall( From def3ade8a809a230648cdffbf4ab066b07fe7bf1 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 23 Aug 2022 13:46:43 -0700 Subject: [PATCH 505/546] Add --yjit-dump-disasm to dump every compiled code (https://github.com/Shopify/ruby/pull/430) * Add --yjit-dump-disasm to dump every compiled code * Just use get_option * Carve out disasm_from_addr * Avoid push_str with format! * Share the logic through asm.compile * This seems to negatively impact the compilation speed --- yjit/src/asm/mod.rs | 10 ++++-- yjit/src/backend/ir.rs | 19 ++++++++-- yjit/src/codegen.rs | 15 ++++++-- yjit/src/core.rs | 1 + yjit/src/disasm.rs | 82 ++++++++++++++++++++++++------------------ yjit/src/options.rs | 5 +++ 6 files changed, 91 insertions(+), 41 deletions(-) diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index fef4518816f177..4029e2ca6745b5 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -57,6 +57,10 @@ pub struct CodeBlock { #[cfg(feature = "asm_comments")] asm_comments: BTreeMap>, + // True for OutlinedCb + #[cfg(feature = "disasm")] + pub outlined: bool, + // Set if the CodeBlock is unable to output some instructions, // for example, when there is not enough space or when a jump // target is too far away. @@ -65,7 +69,7 @@ pub struct CodeBlock { impl CodeBlock { /// Make a new CodeBlock - pub fn new(mem_block: VirtualMem) -> Self { + pub fn new(mem_block: VirtualMem, outlined: bool) -> Self { Self { mem_size: mem_block.virtual_region_size(), mem_block, @@ -75,6 +79,8 @@ impl CodeBlock { label_refs: Vec::new(), #[cfg(feature = "asm_comments")] asm_comments: BTreeMap::new(), + #[cfg(feature = "disasm")] + outlined, dropped_bytes: false, } } @@ -282,7 +288,7 @@ impl CodeBlock { let mem_start: *const u8 = alloc.mem_start(); let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size); - Self::new(virt_mem) + Self::new(virt_mem, false) } } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index f01ab398da3243..33a79a417922aa 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -10,6 +10,7 @@ use crate::cruby::{VALUE}; use crate::virtualmem::{CodePtr}; use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; use crate::core::{Context, Type, TempMapping}; +use crate::options::*; #[cfg(target_arch = "x86_64")] use crate::backend::x86_64::*; @@ -1075,11 +1076,25 @@ impl Assembler /// compiling multiple blocks at a time? pub fn compile(self, cb: &mut CodeBlock) -> Vec { + #[cfg(feature = "disasm")] + let start_addr = cb.get_write_ptr().raw_ptr(); + let alloc_regs = Self::get_alloc_regs(); - self.compile_with_regs(cb, alloc_regs) + let gc_offsets = self.compile_with_regs(cb, alloc_regs); + + #[cfg(feature = "disasm")] + if get_option!(dump_disasm) && !cb.outlined { + use crate::disasm::disasm_addr_range; + let last_ptr = cb.get_write_ptr(); + let disasm = disasm_addr_range(cb, start_addr, last_ptr.raw_ptr() as usize - start_addr as usize); + if disasm.len() > 0 { + println!("{disasm}"); + } + } + gc_offsets } - /// Compile with a limited number of registers + /// Compile with a limited number of registers. Used only for unit tests. pub fn compile_with_num_regs(self, cb: &mut CodeBlock, num_regs: usize) -> Vec { let mut alloc_regs = Self::get_alloc_regs(); diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 1336fe3c57c487..7c4c974345b024 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -622,9 +622,13 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O cb.align_pos(64); let code_ptr = cb.get_write_ptr(); - add_comment(cb, "yjit entry"); let mut asm = Assembler::new(); + if get_option!(dump_disasm) { + asm.comment(&format!("YJIT entry: {}", iseq_get_location(iseq))); + } else { + asm.comment("YJIT entry"); + } asm.frame_setup(); @@ -748,6 +752,11 @@ pub fn gen_single_block( // Create a backend assembler instance let mut asm = Assembler::new(); + #[cfg(feature = "disasm")] + if get_option!(dump_disasm) { + asm.comment(&format!("Block: {} (ISEQ offset: {})", iseq_get_location(blockid.iseq), blockid.idx)); + } + // For each instruction to compile // NOTE: could rewrite this loop with a std::iter::Iterator while insn_idx < iseq_size { @@ -6049,8 +6058,8 @@ impl CodegenGlobals { half_size ); - let cb = CodeBlock::new(first_half); - let ocb = OutlinedCb::wrap(CodeBlock::new(second_half)); + let cb = CodeBlock::new(first_half, false); + let ocb = OutlinedCb::wrap(CodeBlock::new(second_half, true)); (cb, ocb) }; diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 7d07918228ab13..fa82dcc30892b0 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1510,6 +1510,7 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { cb.set_write_ptr(branch.start_addr.unwrap()); let mut asm = Assembler::new(); + asm.comment("regenerate_branch"); (branch.gen_fn)( &mut asm, diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 015c0c25ef5d3c..3d1c5b33fd9093 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -1,6 +1,9 @@ use crate::core::*; use crate::cruby::*; use crate::yjit::yjit_enabled_p; +use crate::asm::CodeBlock; +use crate::codegen::CodePtr; +use std::fmt::Write; /// Primitive called in yjit.rb /// Produce a string representing the disassembly for an ISEQ @@ -36,7 +39,7 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU #[cfg(feature = "disasm")] pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String { - let mut out = String::from(""); + let mut out = String::from(""); // Get a list of block versions generated for this iseq let mut block_list = get_iseq_block_list(iseq); @@ -67,26 +70,6 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> St total_code_size += blockref.borrow().code_size(); } - // Initialize capstone - use capstone::prelude::*; - - #[cfg(target_arch = "x86_64")] - let mut cs = Capstone::new() - .x86() - .mode(arch::x86::ArchMode::Mode64) - .syntax(arch::x86::ArchSyntax::Intel) - .build() - .unwrap(); - - #[cfg(target_arch = "aarch64")] - let mut cs = Capstone::new() - .arm64() - .mode(arch::arm64::ArchMode::Arm) - .detail(true) - .build() - .unwrap(); - cs.set_skipdata(true); - out.push_str(&format!("NUM BLOCK VERSIONS: {}\n", block_list.len())); out.push_str(&format!( "TOTAL INLINE CODE SIZE: {} bytes\n", @@ -115,19 +98,7 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> St out.push_str(&format!("== {:=<60}\n", block_ident)); // Disassemble the instructions - let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) }; - let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); - - // For each instruction in this block - for insn in insns.as_ref() { - // Comments for this block - if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) { - for comment in comment_list { - out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment)); - } - } - out.push_str(&format!(" {}\n", insn)); - } + out.push_str(&disasm_addr_range(global_cb, start_addr, code_size)); // If this is not the last block if block_idx < block_list.len() - 1 { @@ -147,6 +118,49 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> St return out; } + +#[cfg(feature = "disasm")] +pub fn disasm_addr_range(cb: &CodeBlock, start_addr: *const u8, code_size: usize) -> String { + let mut out = String::from(""); + + // Initialize capstone + use capstone::prelude::*; + + #[cfg(target_arch = "x86_64")] + let mut cs = Capstone::new() + .x86() + .mode(arch::x86::ArchMode::Mode64) + .syntax(arch::x86::ArchSyntax::Intel) + .build() + .unwrap(); + + #[cfg(target_arch = "aarch64")] + let mut cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .detail(true) + .build() + .unwrap(); + cs.set_skipdata(true); + + // Disassemble the instructions + let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) }; + let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); + + // For each instruction in this block + for insn in insns.as_ref() { + // Comments for this block + if let Some(comment_list) = cb.comments_at(insn.address() as usize) { + for comment in comment_list { + write!(&mut out, " \x1b[1m# {}\x1b[0m\n", comment).unwrap(); + } + } + write!(&mut out, " {}\n", insn).unwrap(); + } + + return out; +} + /// Primitive called in yjit.rb /// Produce a list of instructions compiled for an isew #[no_mangle] diff --git a/yjit/src/options.rs b/yjit/src/options.rs index 7436b3583bb24a..2e141445f13677 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -30,6 +30,9 @@ pub struct Options { /// Dump compiled and executed instructions for debugging pub dump_insns: bool, + /// Dump all compiled instructions in inlined CodeBlock + pub dump_disasm: bool, + /// Print when specific ISEQ items are compiled or invalidated pub dump_iseq_disasm: Option, @@ -53,6 +56,7 @@ pub static mut OPTIONS: Options = Options { gen_stats: false, gen_trace_exits: false, dump_insns: false, + dump_disasm: false, verify_ctx: false, global_constant_state: false, dump_iseq_disasm: None, @@ -128,6 +132,7 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { ("stats", "") => unsafe { OPTIONS.gen_stats = true }, ("trace-exits", "") => unsafe { OPTIONS.gen_trace_exits = true; OPTIONS.gen_stats = true }, ("dump-insns", "") => unsafe { OPTIONS.dump_insns = true }, + ("dump-disasm", "") => unsafe { OPTIONS.dump_disasm = true }, ("verify-ctx", "") => unsafe { OPTIONS.verify_ctx = true }, ("global-constant-state", "") => unsafe { OPTIONS.global_constant_state = true }, From 5ef048e5b1c3dd61adf782ace570bb0a1f9bb12f Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Wed, 24 Aug 2022 15:13:08 -0400 Subject: [PATCH 506/546] Update yjit.md Add VMIL paper, update supported CPUs. --- doc/yjit/yjit.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md index f879e227adb9d0..f13fa027b99762 100644 --- a/doc/yjit/yjit.md +++ b/doc/yjit/yjit.md @@ -20,6 +20,7 @@ This project is open source and falls under the same license as CRuby. If you wish to learn more about the approach taken, here are some conference talks and publications: - RubyKaigi 2021 talk: [YJIT: Building a New JIT Compiler Inside CRuby](https://www.youtube.com/watch?v=PBVLf3yfMs8) - Blog post: [YJIT: Building a New JIT Compiler Inside CRuby](https://pointersgonewild.com/2021/06/02/yjit-building-a-new-jit-compiler-inside-cruby/) +- VMIL 2021 paper: [YJIT: A Basic Block Versioning JIT Compiler for CRuby](https://dl.acm.org/doi/10.1145/3486606.3486781) - MoreVMs 2021 talk: [YJIT: Building a New JIT Compiler Inside CRuby](https://www.youtube.com/watch?v=vucLAqv7qpc) - ECOOP 2016 talk: [Interprocedural Type Specialization of JavaScript Programs Without Type Analysis](https://www.youtube.com/watch?v=sRNBY7Ss97A) - ECOOP 2016 paper: [Interprocedural Type Specialization of JavaScript Programs Without Type Analysis](https://drops.dagstuhl.de/opus/volltexte/2016/6101/pdf/LIPIcs-ECOOP-2016-7.pdf) @@ -45,7 +46,7 @@ YJIT is a work in progress and as such may not yet be mature enough for mission- - No garbage collection for generated code. - Currently supports only macOS and Linux. -- Currently supports only x86-64 CPUs. +- Supports x86-64 and arm64/aarch64 CPUs only. Because there is no GC for generated code yet, your software could run out of executable memory if it is large enough. You can change how much executable memory is allocated using [YJIT's command-line options](#command-line-options). @@ -308,9 +309,9 @@ You can use the Intel syntax for disassembly in LLDB, keeping it consistent with echo "settings set target.x86-disassembly-flavor intel" >> ~/.lldbinit ``` -## Running YJIT on M1 +## Running x86 YJIT on Apple's Rosetta -It is possible to run YJIT on an Apple M1 via Rosetta. You can find basic +For development purposes, it is possible to run x86 YJIT on an Apple M1 via Rosetta. You can find basic instructions below, but there are a few caveats listed further down. First, install Rosetta: @@ -343,7 +344,7 @@ $ rustup default stable-x86_64-apple-darwin While in your i386 shell, install Cargo and Homebrew, then hack away! -### M1 Caveats +### Rosetta Caveats 1. You must install a version of Homebrew for each architecture 2. Cargo will install in $HOME/.cargo by default, and I don't know a good way to change architectures after install From 232e43fd52e53f667c2c290cffb4afa524889f0f Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 25 Aug 2022 09:18:34 -0700 Subject: [PATCH 507/546] Respect RUBY_TESTOPTS on test-all (https://github.com/Shopify/ruby/pull/435) * Respect RUBY_TESTOPTS on test-all * Increase the Cirrus timeout * Increase the CSV test timeout --- .cirrus.yml | 3 ++- test/csv/parse/test_general.rb | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 708a329e5751e1..038df9530c08ec 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -91,6 +91,7 @@ yjit_task: matrix: CC: clang-12 CC: gcc-11 + timeout_in: 90m id_script: id set_env_script: # Set `GNUMAKEFLAGS`, because the flags are GNU make specific. Note using @@ -136,7 +137,7 @@ yjit_task: make_btest_script: source $HOME/.cargo/env && make -j btest RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" # Check that we can run test-all successfully (running TestGCCompact separately until we fix its performance) - make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS='--name=!/TestGCCompact/' + make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --name=!/TestGCCompact/' test_gc_compact_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" # Check that we can run test-spec successfully diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index d2b74008ebb46e..c740462c01dd1b 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -247,6 +247,9 @@ def test_seeked_string_io def assert_parse_errors_out(data, **options) assert_raise(CSV::MalformedCSVError) do timeout = 0.2 + if defined?(RubyVM::YJIT.enabled?) and RubyVM::YJIT.enabled? + timeout = 1 # for --yjit-call-threshold=1 + end if defined?(RubyVM::MJIT.enabled?) and RubyVM::MJIT.enabled? timeout = 5 # for --jit-wait end From 4b7d3884dff2cd44d51d5b51f9254c4e0980a45e Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 25 Aug 2022 14:05:25 -0700 Subject: [PATCH 508/546] Avoid randomizing the order of test-all on arm64 (https://github.com/Shopify/ruby/pull/440) for now --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 038df9530c08ec..df57ebd7f65b39 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -137,7 +137,7 @@ yjit_task: make_btest_script: source $HOME/.cargo/env && make -j btest RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" # Check that we can run test-all successfully (running TestGCCompact separately until we fix its performance) - make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --name=!/TestGCCompact/' + make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --test-order=alpha --name=!/TestGCCompact/' test_gc_compact_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" # Check that we can run test-spec successfully From 929a6a75eb3e3404eb8f6246a40b2cd73d0d3fed Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 25 Aug 2022 21:06:22 -0400 Subject: [PATCH 509/546] Remove ir_ssa.rs as we aren't using it and it's now outdated --- yjit/src/backend/ir_ssa.rs | 1261 ------------------------------------ yjit/src/backend/mod.rs | 1 - yjit/src/codegen.rs | 2 +- 3 files changed, 1 insertion(+), 1263 deletions(-) delete mode 100644 yjit/src/backend/ir_ssa.rs diff --git a/yjit/src/backend/ir_ssa.rs b/yjit/src/backend/ir_ssa.rs deleted file mode 100644 index cd7f03c4faf442..00000000000000 --- a/yjit/src/backend/ir_ssa.rs +++ /dev/null @@ -1,1261 +0,0 @@ -#![allow(dead_code)] -#![allow(unused_variables)] -#![allow(unused_imports)] - -use std::fmt; -use std::convert::From; -use crate::cruby::{VALUE}; -use crate::virtualmem::{CodePtr}; -use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; -use crate::core::{Context, Type, TempMapping}; - -/* -#[cfg(target_arch = "x86_64")] -use crate::backend::x86_64::*; - -#[cfg(target_arch = "aarch64")] -use crate::backend::arm64::*; - - -pub const EC: Opnd = _EC; -pub const CFP: Opnd = _CFP; -pub const SP: Opnd = _SP; - -pub const C_ARG_OPNDS: [Opnd; 6] = _C_ARG_OPNDS; -pub const C_RET_OPND: Opnd = _C_RET_OPND; -*/ - - - -// Dummy reg struct -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub struct Reg -{ - reg_no: u8, - num_bits: u8, -} - - - - - - - -/// Instruction opcodes -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub enum Op -{ - // Add a comment into the IR at the point that this instruction is added. - // It won't have any impact on that actual compiled code. - Comment, - - // Add a label into the IR at the point that this instruction is added. - Label, - - // Mark a position in the generated code - PosMarker, - - // Bake a string directly into the instruction stream. - BakeString, - - // Add two operands together, and return the result as a new operand. This - // operand can then be used as the operand on another instruction. It - // accepts two operands, which can be of any type - // - // Under the hood when allocating registers, the IR will determine the most - // efficient way to get these values into memory. For example, if both - // operands are immediates, then it will load the first one into a register - // first with a mov instruction and then add them together. If one of them - // is a register, however, it will just perform a single add instruction. - Add, - - // This is the same as the OP_ADD instruction, except for subtraction. - Sub, - - // This is the same as the OP_ADD instruction, except that it performs the - // binary AND operation. - And, - - // Perform the NOT operation on an individual operand, and return the result - // as a new operand. This operand can then be used as the operand on another - // instruction. - Not, - - // - // Low-level instructions - // - - // A low-level instruction that loads a value into a register. - Load, - - // A low-level instruction that loads a value into a register and - // sign-extends it to a 64-bit value. - LoadSExt, - - // Low-level instruction to store a value to memory. - Store, - - // Load effective address - Lea, - - // Load effective address relative to the current instruction pointer. It - // accepts a single signed immediate operand. - LeaLabel, - - // A low-level mov instruction. It accepts two operands. - Mov, - - // Bitwise AND test instruction - Test, - - // Compare two operands - Cmp, - - // Unconditional jump to a branch target - Jmp, - - // Unconditional jump which takes a reg/mem address operand - JmpOpnd, - - // Low-level conditional jump instructions - Jbe, - Je, - Jne, - Jz, - Jnz, - Jo, - - // Conditional select instructions - CSelZ, - CSelNZ, - CSelE, - CSelNE, - CSelL, - CSelLE, - CSelG, - CSelGE, - - // Push and pop registers to/from the C stack - CPush, - CPop, - CPopInto, - - // Push and pop all of the caller-save registers and the flags to/from the C - // stack - CPushAll, - CPopAll, - - // C function call with N arguments (variadic) - CCall, - - // C function return - CRet, - - // Atomically increment a counter - // Input: memory operand, increment value - // Produces no output - IncrCounter, - - // Trigger a debugger breakpoint - Breakpoint, - - /// Set up the frame stack as necessary per the architecture. - FrameSetup, - - /// Tear down the frame stack as necessary per the architecture. - FrameTeardown, - - /// Take a specific register. Signal the register allocator to not use it. - LiveReg, -} - -/// Instruction idx in an assembler -/// This is used like a pointer -type InsnIdx = u32; - -/// Instruction operand index -type OpndIdx = u32; - -// Memory operand base -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub enum MemBase -{ - Reg(u8), - InsnOut(InsnIdx), -} - -// Memory location -#[derive(Copy, Clone, PartialEq, Eq)] -pub struct Mem -{ - // Base register number or instruction index - pub(super) base: MemBase, - - // Offset relative to the base pointer - pub(super) disp: i32, - - // Size in bits - pub(super) num_bits: u8, -} - -impl fmt::Debug for Mem { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write!(fmt, "Mem{}[{:?}", self.num_bits, self.base)?; - if self.disp != 0 { - let sign = if self.disp > 0 { '+' } else { '-' }; - write!(fmt, " {sign} {}", self.disp)?; - } - - write!(fmt, "]") - } -} - -/// Operand to an IR instruction -#[derive(Clone, Copy, PartialEq, Eq)] -pub enum Opnd -{ - None, // For insns with no output - - // Immediate Ruby value, may be GC'd, movable - Value(VALUE), - - // Output of a preceding instruction in this block - InsnOut{ idx: InsnIdx, num_bits: u8 }, - - // Low-level operands, for lowering - Imm(i64), // Raw signed immediate - UImm(u64), // Raw unsigned immediate - Mem(Mem), // Memory location - Reg(Reg), // Machine register -} - -impl fmt::Debug for Opnd { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - use Opnd::*; - match self { - Self::None => write!(fmt, "None"), - Value(val) => write!(fmt, "Value({val:?})"), - InsnOut { idx, num_bits } => write!(fmt, "Out{num_bits}({idx})"), - Imm(signed) => write!(fmt, "{signed:x}_i64"), - UImm(unsigned) => write!(fmt, "{unsigned:x}_u64"), - // Say Mem and Reg only once - Mem(mem) => write!(fmt, "{mem:?}"), - Reg(reg) => write!(fmt, "{reg:?}"), - } - } -} - -impl Opnd -{ - /// Convenience constructor for memory operands - pub fn mem(num_bits: u8, base: Opnd, disp: i32) -> Self { - match base { - Opnd::Reg(base_reg) => { - assert!(base_reg.num_bits == 64); - Opnd::Mem(Mem { - base: MemBase::Reg(base_reg.reg_no), - disp: disp, - num_bits: num_bits, - }) - }, - - Opnd::InsnOut{idx, num_bits } => { - assert!(num_bits == 64); - Opnd::Mem(Mem { - base: MemBase::InsnOut(idx), - disp: disp, - num_bits: num_bits, - }) - }, - - _ => unreachable!("memory operand with non-register base") - } - } - - /// Constructor for constant pointer operand - pub fn const_ptr(ptr: *const u8) -> Self { - Opnd::UImm(ptr as u64) - } - - pub fn is_some(&self) -> bool { - match *self { - Opnd::None => false, - _ => true, - } - } - - /// Unwrap a register operand - pub fn unwrap_reg(&self) -> Reg { - match self { - Opnd::Reg(reg) => *reg, - _ => unreachable!("trying to unwrap {:?} into reg", self) - } - } - - /// Get the size in bits for register/memory operands - pub fn rm_num_bits(&self) -> u8 { - match *self { - Opnd::Reg(reg) => reg.num_bits, - Opnd::Mem(mem) => mem.num_bits, - Opnd::InsnOut{ num_bits, .. } => num_bits, - _ => unreachable!() - } - } -} - -impl From for Opnd { - fn from(value: usize) -> Self { - Opnd::UImm(value.try_into().unwrap()) - } -} - -impl From for Opnd { - fn from(value: u64) -> Self { - Opnd::UImm(value.try_into().unwrap()) - } -} - -impl From for Opnd { - fn from(value: i64) -> Self { - Opnd::Imm(value) - } -} - -impl From for Opnd { - fn from(value: i32) -> Self { - Opnd::Imm(value.try_into().unwrap()) - } -} - -impl From for Opnd { - fn from(value: u32) -> Self { - Opnd::UImm(value as u64) - } -} - -impl From for Opnd { - fn from(value: VALUE) -> Self { - let VALUE(uimm) = value; - Opnd::UImm(uimm as u64) - } -} - -/// Branch target (something that we can jump to) -/// for branch instructions -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub enum Target -{ - CodePtr(CodePtr), // Pointer to a piece of YJIT-generated code (e.g. side-exit) - FunPtr(*const u8), // Pointer to a C function - Label(usize), // A label within the generated code -} - -impl Target -{ - pub fn unwrap_fun_ptr(&self) -> *const u8 { - match self { - Target::FunPtr(ptr) => *ptr, - _ => unreachable!("trying to unwrap {:?} into fun ptr", self) - } - } - - pub fn unwrap_label_idx(&self) -> usize { - match self { - Target::Label(idx) => *idx, - _ => unreachable!() - } - } -} - -impl From for Target { - fn from(code_ptr: CodePtr) -> Self { - Target::CodePtr(code_ptr) - } -} - -type PosMarkerFn = Box; - -/// YJIT IR instruction -pub struct Insn -{ - /// Other instructions using this instruction's output - pub(super) uses: Vec<(InsnIdx, OpndIdx)>, - - // Opcode for the instruction - pub(super) op: Op, - - // Optional string for comments and labels - pub(super) text: Option, - - // List of input operands/values - pub(super) opnds: Vec, - - // Output operand for this instruction - pub(super) out: Opnd, - - // List of branch targets (branch instructions only) - pub(super) target: Option, - - // Callback to mark the position of this instruction - // in the generated code - pub(super) pos_marker: Option, -} - -impl Insn { - fn new(op: Op, out: Opnd) -> Self { - Self { - uses: Vec::new(), - op, - text: None, - opnds: Vec::default(), - out, - target: None, - pos_marker: None, - } - } -} - -/// A container for an instruction within a doubly-linked list. -struct InsnNode { - insn: Insn, - prev_idx: Option, - next_idx: Option -} - -impl InsnNode { - fn new(insn: Insn, prev_idx: Option) -> Self { - Self { insn, prev_idx, next_idx: None } - } -} - -/// A doubly-linked list containing instructions. -pub(super) struct InsnList { - insns: Vec, - first_idx: Option, - last_idx: Option -} - -impl InsnList { - fn new() -> Self { - Self { insns: Vec::default(), first_idx: None, last_idx: None } - } - - /// Returns the next instruction index that will be generated - fn next_idx(&self) -> InsnIdx { - self.insns.len() as InsnIdx - } - - /// Return a mutable reference to the instruction for the given index - fn get_ref_mut(&mut self, idx: InsnIdx) -> &mut Insn { - &mut self.insns[idx as usize].insn - } - - /// Push a new instruction onto the end of the list - fn push(&mut self, insn: Insn) -> InsnIdx { - let insn_idx = self.next_idx(); - - // Push the new node onto the list - self.insns.push(InsnNode::new(insn, self.last_idx)); - - // Update the first index if it's not already set - self.first_idx = self.first_idx.or(Some(insn_idx)); - - // Update the last node's next_idx field if necessary - if let Some(last_idx) = self.last_idx { - self.insns[last_idx as usize].next_idx = Some(insn_idx); - } - - // Update the last index - self.last_idx = Some(insn_idx); - - insn_idx - } - - /// Remove an instruction from the list at a given index - fn remove(&mut self, insn_idx: InsnIdx) { - let prev_idx = self.insns[insn_idx as usize].prev_idx; - let next_idx = self.insns[insn_idx as usize].next_idx; - - // Update the previous node's next_idx field if necessary - if let Some(prev_idx) = prev_idx { - self.insns[prev_idx as usize].next_idx = next_idx; - } else { - assert_eq!(self.first_idx, Some(insn_idx)); - self.first_idx = next_idx; - } - - // Update the next node's prev_idx field if necessary - if let Some(next_idx) = next_idx { - self.insns[next_idx as usize].prev_idx = prev_idx; - } else { - assert_eq!(self.last_idx, Some(insn_idx)); - self.last_idx = prev_idx; - } - } -} - -/// An iterator that will walk through the list of instructions in order -/// according to the linked list. -pub(super) struct InsnListIterator<'a> { - insn_list: &'a InsnList, - insn_idx: Option -} - -impl<'a> Iterator for InsnListIterator<'a> { - type Item = &'a Insn; - - /// Return an option containing the next instruction in the list. - fn next(&mut self) -> Option { - self.insn_idx.map(|idx| { - let node = &self.insn_list.insns[idx as usize]; - self.insn_idx = node.next_idx; - &node.insn - }) - } -} - -impl<'a> IntoIterator for &'a InsnList { - type Item = &'a Insn; - type IntoIter = InsnListIterator<'a>; - - fn into_iter(self) -> Self::IntoIter { - InsnListIterator { insn_list: self, insn_idx: self.first_idx } - } -} - -/* -impl fmt::Debug for Insn { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write!(fmt, "{:?}(", self.op)?; - - // Print list of operands - let mut opnd_iter = self.opnds.iter(); - if let Some(first_opnd) = opnd_iter.next() { - write!(fmt, "{first_opnd:?}")?; - } - for opnd in opnd_iter { - write!(fmt, ", {opnd:?}")?; - } - write!(fmt, ")")?; - - // Print text, target, and pos if they are present - if let Some(text) = &self.text { - write!(fmt, " {text:?}")? - } - if let Some(target) = self.target { - write!(fmt, " target={target:?}")?; - } - - write!(fmt, " -> {:?}", self.out) - } -} -*/ - - - - - - -/// Object into which we assemble instructions to be -/// optimized and lowered -pub struct Assembler -{ - /// The list of instructions created by this assembler - pub(super) insn_list: InsnList, - - /// Names of labels - pub(super) label_names: Vec, - - /* - /// FIXME: only compute the live ranges when doing register allocation? - /// - /// Parallel vec with insns - /// Index of the last insn using the output of this insn - //pub(super) live_ranges: Vec, - */ -} - - - - - - - -impl Assembler -{ - pub fn new() -> Self { - Self { insn_list: InsnList::new(), label_names: Vec::default() } - } - - /// Append an instruction to the list - pub(super) fn push_insn( - &mut self, - op: Op, - opnds: Vec, - target: Option, - text: Option, - pos_marker: Option - ) -> Opnd - { - let insn_idx = self.insn_list.next_idx(); - let mut out_num_bits: u8 = 0; - - for (opnd_idx, opnd) in opnds.iter().enumerate() { - match *opnd { - Opnd::InsnOut{ num_bits, .. } | - Opnd::Mem(Mem { num_bits, .. }) | - Opnd::Reg(Reg { num_bits, .. }) => { - if out_num_bits == 0 { - out_num_bits = num_bits - } - else if out_num_bits != num_bits { - panic!("operands of incompatible sizes"); - } - } - _ => {} - } - - // Track which instructions this insn is using as operands - if let Opnd::InsnOut { idx, .. } = *opnd { - self.insn_list.get_ref_mut(idx).uses.push((insn_idx, opnd_idx as OpndIdx)); - } - } - - if out_num_bits == 0 { - out_num_bits = 64; - } - - // Operand for the output of this instruction - let out_opnd = Opnd::InsnOut{ idx: insn_idx, num_bits: out_num_bits }; - - self.insn_list.push(Insn { - uses: Vec::default(), - op, - text, - opnds, - out: out_opnd, - target, - pos_marker, - }); - - // Return an operand for the output of this instruction - out_opnd - } - - /// Replace uses of this instruction by another operand - pub(super) fn replace_uses(&mut self, insn_idx: InsnIdx, replace_with: Opnd) - { - // We're going to clear the vector of uses - let uses = std::mem::take(&mut self.insn_list.get_ref_mut(insn_idx).uses); - - // For each use of this instruction - for (use_idx, opnd_idx) in uses { - - // TODO: assert that this is indeed a use of this insn (sanity check) - - let use_insn = self.insn_list.get_ref_mut(use_idx); - use_insn.opnds[opnd_idx as usize] = replace_with; - - // If replace_with is an insn, update its uses - if let Opnd::InsnOut { idx, .. } = replace_with { - let repl_insn = &mut self.insn_list.insns[idx as usize]; - assert!(repl_insn.prev_idx.is_some() || repl_insn.next_idx.is_some()); - repl_insn.insn.uses.push((use_idx, opnd_idx)); - } - } - } - - /// Remove a specific insn from the assembler - pub(super) fn remove_insn(&mut self, insn_idx: InsnIdx) - { - // Note: we don't remove it from the vec because we do that - // only when we're done with the assembler - self.insn_list.remove(insn_idx); - } - - - - // TODO: we need an insert_before() - // To insert an instruction before another instruction - - - - - - - // TODO: can we implement some kind of insn_iter()? - // could be useful for the emit passes - - - - - - - - // TODO: use push_insn for comment? - /* - /// Add a comment at the current position - pub fn comment(&mut self, text: &str) - { - let insn = Insn { - op: Op::Comment, - text: Some(text.to_owned()), - opnds: vec![], - out: Opnd::None, - target: None, - pos_marker: None, - }; - self.insns.push(insn); - self.live_ranges.push(self.insns.len()); - } - - /// Bake a string at the current position - pub fn bake_string(&mut self, text: &str) - { - let insn = Insn { - op: Op::BakeString, - text: Some(text.to_owned()), - opnds: vec![], - out: Opnd::None, - target: None, - pos_marker: None, - }; - self.insns.push(insn); - self.live_ranges.push(self.insns.len()); - } - */ - - - - - - - /// Load an address relative to the given label. - #[must_use] - pub fn lea_label(&mut self, target: Target) -> Opnd { - self.push_insn(Op::LeaLabel, vec![], Some(target), None, None) - } - - /// Create a new label instance that we can jump to - pub fn new_label(&mut self, name: &str) -> Target - { - assert!(!name.contains(" "), "use underscores in label names, not spaces"); - - let label_idx = self.label_names.len(); - self.label_names.push(name.to_string()); - Target::Label(label_idx) - } - - - - - // TODO: use push_insn for this? - /* - /// Add a label at the current position - pub fn write_label(&mut self, label: Target) - { - assert!(label.unwrap_label_idx() < self.label_names.len()); - - let insn = Insn { - op: Op::Label, - text: None, - opnds: vec![], - out: Opnd::None, - target: Some(label), - pos_marker: None, - }; - self.insns.push(insn); - self.live_ranges.push(self.insns.len()); - } - */ - - - - - /* - /// Transform input instructions, consumes the input assembler - pub(super) fn forward_pass(mut self, mut map_insn: F) -> Assembler - where F: FnMut(&mut Assembler, usize, Op, Vec, Option, Option, Option) - { - let mut asm = Assembler { - insns: Vec::default(), - live_ranges: Vec::default(), - label_names: self.label_names, - }; - - // Indices maps from the old instruction index to the new instruction - // index. - let mut indices: Vec = Vec::default(); - - // Map an operand to the next set of instructions by correcting previous - // InsnOut indices. - fn map_opnd(opnd: Opnd, indices: &mut Vec) -> Opnd { - match opnd { - Opnd::InsnOut{ idx, num_bits } => { - Opnd::InsnOut{ idx: indices[idx], num_bits } - } - Opnd::Mem(Mem{ base: MemBase::InsnOut(idx), disp, num_bits, }) => { - Opnd::Mem(Mem{ base:MemBase::InsnOut(indices[idx]), disp, num_bits }) - } - _ => opnd - } - } - - for (index, insn) in self.insns.drain(..).enumerate() { - let opnds: Vec = insn.opnds.into_iter().map(|opnd| map_opnd(opnd, &mut indices)).collect(); - - // For each instruction, either handle it here or allow the map_insn - // callback to handle it. - match insn.op { - Op::Comment => { - asm.comment(insn.text.unwrap().as_str()); - }, - _ => { - map_insn(&mut asm, index, insn.op, opnds, insn.target, insn.text, insn.pos_marker); - } - }; - - // Here we're assuming that if we've pushed multiple instructions, - // the output that we're using is still the final instruction that - // was pushed. - indices.push(asm.insns.len() - 1); - } - - asm - } - */ - - - /* - /// Sets the out field on the various instructions that require allocated - /// registers because their output is used as the operand on a subsequent - /// instruction. This is our implementation of the linear scan algorithm. - pub(super) fn alloc_regs(mut self, regs: Vec) -> Assembler - { - //dbg!(&self); - - // First, create the pool of registers. - let mut pool: u32 = 0; - - // Mutate the pool bitmap to indicate that the register at that index - // has been allocated and is live. - fn alloc_reg(pool: &mut u32, regs: &Vec) -> Reg { - for (index, reg) in regs.iter().enumerate() { - if (*pool & (1 << index)) == 0 { - *pool |= 1 << index; - return *reg; - } - } - - unreachable!("Register spill not supported"); - } - - // Allocate a specific register - fn take_reg(pool: &mut u32, regs: &Vec, reg: &Reg) -> Reg { - let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); - - if let Some(reg_index) = reg_index { - assert_eq!(*pool & (1 << reg_index), 0); - *pool |= 1 << reg_index; - } - - return *reg; - } - - // Mutate the pool bitmap to indicate that the given register is being - // returned as it is no longer used by the instruction that previously - // held it. - fn dealloc_reg(pool: &mut u32, regs: &Vec, reg: &Reg) { - let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); - - if let Some(reg_index) = reg_index { - *pool &= !(1 << reg_index); - } - } - - let live_ranges: Vec = std::mem::take(&mut self.live_ranges); - - let asm = self.forward_pass(|asm, index, op, opnds, target, text, pos_marker| { - // Check if this is the last instruction that uses an operand that - // spans more than one instruction. In that case, return the - // allocated register to the pool. - for opnd in &opnds { - match opnd { - Opnd::InsnOut{idx, .. } | - Opnd::Mem( Mem { base: MemBase::InsnOut(idx), .. }) => { - // Since we have an InsnOut, we know it spans more that one - // instruction. - let start_index = *idx; - assert!(start_index < index); - - // We're going to check if this is the last instruction that - // uses this operand. If it is, we can return the allocated - // register to the pool. - if live_ranges[start_index] == index { - if let Opnd::Reg(reg) = asm.insns[start_index].out { - dealloc_reg(&mut pool, ®s, ®); - } else { - unreachable!("no register allocated for insn {:?}", op); - } - } - } - - _ => {} - } - } - - // C return values need to be mapped to the C return register - if op == Op::CCall { - assert_eq!(pool, 0, "register lives past C function call"); - } - - // If this instruction is used by another instruction, - // we need to allocate a register to it - let mut out_reg = Opnd::None; - if live_ranges[index] != index { - - // C return values need to be mapped to the C return register - if op == Op::CCall { - out_reg = Opnd::Reg(take_reg(&mut pool, ®s, &C_RET_REG)) - } - - // If this instruction's first operand maps to a register and - // this is the last use of the register, reuse the register - // We do this to improve register allocation on x86 - // e.g. out = add(reg0, reg1) - // reg0 = add(reg0, reg1) - if opnds.len() > 0 { - if let Opnd::InsnOut{idx, ..} = opnds[0] { - if live_ranges[idx] == index { - if let Opnd::Reg(reg) = asm.insns[idx].out { - out_reg = Opnd::Reg(take_reg(&mut pool, ®s, ®)) - } - } - } - } - - // Allocate a new register for this instruction - if out_reg == Opnd::None { - out_reg = if op == Op::LiveReg { - // Allocate a specific register - let reg = opnds[0].unwrap_reg(); - Opnd::Reg(take_reg(&mut pool, ®s, ®)) - } else { - Opnd::Reg(alloc_reg(&mut pool, ®s)) - } - } - } - - // Replace InsnOut operands by their corresponding register - let reg_opnds: Vec = opnds.into_iter().map(|opnd| - match opnd { - Opnd::InsnOut{idx, ..} => asm.insns[idx].out, - Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { - let out_reg = asm.insns[idx].out.unwrap_reg(); - Opnd::Mem(Mem { - base: MemBase::Reg(out_reg.reg_no), - disp, - num_bits - }) - } - _ => opnd, - } - ).collect(); - - asm.push_insn(op, reg_opnds, target, text, pos_marker); - - // Set the output register for this instruction - let num_insns = asm.insns.len(); - let mut new_insn = &mut asm.insns[num_insns - 1]; - if let Opnd::Reg(reg) = out_reg { - let num_out_bits = new_insn.out.rm_num_bits(); - out_reg = Opnd::Reg(reg.sub_reg(num_out_bits)) - } - new_insn.out = out_reg; - }); - - assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); - asm - } - */ - - - - /* - /// Compile the instructions down to machine code - /// NOTE: should compile return a list of block labels to enable - /// compiling multiple blocks at a time? - pub fn compile(self, cb: &mut CodeBlock) -> Vec - { - let alloc_regs = Self::get_alloc_regs(); - self.compile_with_regs(cb, alloc_regs) - } - - /// Compile with a limited number of registers - pub fn compile_with_num_regs(self, cb: &mut CodeBlock, num_regs: usize) -> Vec - { - let mut alloc_regs = Self::get_alloc_regs(); - let alloc_regs = alloc_regs.drain(0..num_regs).collect(); - self.compile_with_regs(cb, alloc_regs) - } - */ -} - - - - - - - - - - - - - -/* -impl fmt::Debug for Assembler { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write!(fmt, "Assembler\n")?; - - for (idx, insn) in self.insns.iter().enumerate() { - write!(fmt, " {idx:03} {insn:?}\n")?; - } - - Ok(()) - } -} -*/ - -impl Assembler -{ - pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd - { - let target = Target::FunPtr(fptr); - self.push_insn(Op::CCall, opnds, Some(target), None, None) - } - - //pub fn pos_marker(&mut self, marker_fn: F) - pub fn pos_marker(&mut self, marker_fn: PosMarkerFn) - { - self.push_insn(Op::PosMarker, vec![], None, None, Some(marker_fn)); - } -} - -macro_rules! def_push_jcc { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - pub fn $op_name(&mut self, target: Target) - { - self.push_insn($opcode, vec![], Some(target), None, None); - } - } - }; -} - -macro_rules! def_push_0_opnd { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - #[must_use] - pub fn $op_name(&mut self) -> Opnd - { - self.push_insn($opcode, vec![], None, None, None) - } - } - }; -} - -macro_rules! def_push_0_opnd_no_out { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - pub fn $op_name(&mut self) - { - self.push_insn($opcode, vec![], None, None, None); - } - } - }; -} - -macro_rules! def_push_1_opnd { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - #[must_use] - pub fn $op_name(&mut self, opnd0: Opnd) -> Opnd - { - self.push_insn($opcode, vec![opnd0], None, None, None) - } - } - }; -} - -macro_rules! def_push_1_opnd_no_out { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - pub fn $op_name(&mut self, opnd0: Opnd) - { - self.push_insn($opcode, vec![opnd0], None, None, None); - } - } - }; -} - -macro_rules! def_push_2_opnd { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - #[must_use] - pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd - { - self.push_insn($opcode, vec![opnd0, opnd1], None, None, None) - } - } - }; -} - -macro_rules! def_push_2_opnd_no_out { - ($op_name:ident, $opcode:expr) => { - impl Assembler - { - pub fn $op_name(&mut self, opnd0: Opnd, opnd1: Opnd) - { - self.push_insn($opcode, vec![opnd0, opnd1], None, None, None); - } - } - }; -} - -def_push_1_opnd_no_out!(jmp_opnd, Op::JmpOpnd); -def_push_jcc!(jmp, Op::Jmp); -def_push_jcc!(je, Op::Je); -def_push_jcc!(jne, Op::Jne); -def_push_jcc!(jbe, Op::Jbe); -def_push_jcc!(jz, Op::Jz); -def_push_jcc!(jnz, Op::Jnz); -def_push_jcc!(jo, Op::Jo); -def_push_2_opnd!(add, Op::Add); -def_push_2_opnd!(sub, Op::Sub); -def_push_2_opnd!(and, Op::And); -def_push_1_opnd!(not, Op::Not); -def_push_1_opnd_no_out!(cpush, Op::CPush); -def_push_0_opnd!(cpop, Op::CPop); -def_push_1_opnd_no_out!(cpop_into, Op::CPopInto); -def_push_0_opnd_no_out!(cpush_all, Op::CPushAll); -def_push_0_opnd_no_out!(cpop_all, Op::CPopAll); -def_push_1_opnd_no_out!(cret, Op::CRet); -def_push_1_opnd!(load, Op::Load); -def_push_1_opnd!(load_sext, Op::LoadSExt); -def_push_1_opnd!(lea, Op::Lea); -def_push_1_opnd!(live_reg_opnd, Op::LiveReg); -def_push_2_opnd_no_out!(store, Op::Store); -def_push_2_opnd_no_out!(mov, Op::Mov); -def_push_2_opnd_no_out!(cmp, Op::Cmp); -def_push_2_opnd_no_out!(test, Op::Test); -def_push_0_opnd_no_out!(breakpoint, Op::Breakpoint); -def_push_2_opnd_no_out!(incr_counter, Op::IncrCounter); -def_push_2_opnd!(csel_z, Op::CSelZ); -def_push_2_opnd!(csel_nz, Op::CSelNZ); -def_push_2_opnd!(csel_e, Op::CSelE); -def_push_2_opnd!(csel_ne, Op::CSelNE); -def_push_2_opnd!(csel_l, Op::CSelL); -def_push_2_opnd!(csel_le, Op::CSelLE); -def_push_2_opnd!(csel_g, Op::CSelG); -def_push_2_opnd!(csel_ge, Op::CSelGE); -def_push_0_opnd_no_out!(frame_setup, Op::FrameSetup); -def_push_0_opnd_no_out!(frame_teardown, Op::FrameTeardown); - -#[cfg(test)] -mod tests -{ - use super::*; - - #[test] - fn test_push_insn() - { - let mut asm = Assembler::new(); - let v0 = asm.add(1.into(), 2.into()); - let v1 = asm.add(v0, 3.into()); - } - - #[test] - fn test_replace_insn() - { - let mut asm = Assembler::new(); - let v0 = asm.add(1_u64.into(), 2_u64.into()); - let v1 = asm.add(v0, 3_u64.into()); - - if let Opnd::InsnOut{ idx, ..} = v0 { - asm.replace_uses(idx, 3_u64.into()); - asm.remove_insn(idx); - } - else - { - panic!(); - } - - // Nobody is using v1, but we should still be able to "replace" and remove it - if let Opnd::InsnOut{ idx, ..} = v1 { - asm.replace_uses(idx, 6_u64.into()); - asm.remove_insn(idx); - } - else - { - panic!(); - } - - assert!(asm.insn_list.first_idx.is_none()); - assert!(asm.insn_list.last_idx.is_none()); - } - - #[test] - fn test_replace_insn_with_insn() - { - let mut asm = Assembler::new(); - let v0 = asm.add(1.into(), 2.into()); - let v1 = asm.add(v0, 3.into()); - let v2 = asm.add(v0, 4.into()); - - if let Opnd::InsnOut{ idx, ..} = v0 { - let v3 = asm.load(4.into()); - asm.replace_uses(idx, v3); - asm.remove_insn(idx); - } - else - { - panic!(); - } - } - - #[test] - fn test_insn_list_push_and_remove() { - let mut insn_list = InsnList::new(); - - let insn_idx = insn_list.push(Insn::new(Op::Load, Opnd::None)); - insn_list.remove(insn_idx); - - assert_eq!(insn_list.first_idx, None); - assert_eq!(insn_list.last_idx, None); - } - - #[test] - fn test_insn_list_iterator() { - let mut insn_list = InsnList::new(); - - let first_insn_idx = insn_list.push(Insn::new(Op::Add, Opnd::None)); - let second_insn_idx = insn_list.push(Insn::new(Op::Sub, Opnd::None)); - let third_insn_idx = insn_list.push(Insn::new(Op::Load, Opnd::None)); - - for (insn_idx, insn) in insn_list.into_iter().enumerate() { - match insn_idx { - 0 => assert_eq!(insn.op, Op::Add), - 1 => assert_eq!(insn.op, Op::Sub), - 2 => assert_eq!(insn.op, Op::Load), - _ => panic!("Unexpected instruction index") - }; - } - } -} diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs index 790df0d0326b0e..47946950946438 100644 --- a/yjit/src/backend/mod.rs +++ b/yjit/src/backend/mod.rs @@ -5,5 +5,4 @@ pub mod x86_64; pub mod arm64; pub mod ir; -pub mod ir_ssa; mod tests; diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 7c4c974345b024..07e8500f620b46 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1146,7 +1146,7 @@ fn gen_opt_plus( // Check that both operands are fixnums guard_two_fixnums(ctx, asm, side_exit); - // Get the operands and destination from the stack + // Get the operands from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); From 44c6bcff1d068a2a5d191f602efc99a28e94dbc1 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 25 Aug 2022 21:19:26 -0400 Subject: [PATCH 510/546] LDRH and STRH for AArch64 (https://github.com/Shopify/ruby/pull/438) --- yjit/src/asm/arm64/inst/halfword_imm.rs | 176 ++++++++++++++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/mod.rs | 120 ++++++++++++++++ 3 files changed, 298 insertions(+) create mode 100644 yjit/src/asm/arm64/inst/halfword_imm.rs diff --git a/yjit/src/asm/arm64/inst/halfword_imm.rs b/yjit/src/asm/arm64/inst/halfword_imm.rs new file mode 100644 index 00000000000000..675e33d4a8b33e --- /dev/null +++ b/yjit/src/asm/arm64/inst/halfword_imm.rs @@ -0,0 +1,176 @@ +/// Whether this is a load or a store. +enum Op { + Load = 1, + Store = 0 +} + +/// The type of indexing to perform for this instruction. +enum Index { + /// No indexing. + None = 0b00, + + /// Mutate the register after the read. + PostIndex = 0b01, + + /// Mutate the register before the read. + PreIndex = 0b11 +} + +/// The struct that represents an A64 halfword instruction that can be encoded. +/// +/// LDRH/STRH +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 1 1 0 0 1 0 | +/// | op imm12.................................... rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +/// LDRH (pre-index/post-index) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 1 1 0 0 0 0 0 | +/// | op imm9.......................... index rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct HalfwordImm { + /// The number of the 32-bit register to be loaded. + rt: u8, + + /// The number of the 64-bit base register to calculate the memory address. + rn: u8, + + /// The type of indexing to perform for this instruction. + index: Index, + + /// The immediate offset from the base register. + imm: i16, + + /// The operation to perform. + op: Op +} + +impl HalfwordImm { + /// LDRH + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + pub fn ldrh(rt: u8, rn: u8, imm12: i16) -> Self { + Self { rt, rn, index: Index::None, imm: imm12, op: Op::Load } + } + + /// LDRH (pre-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + pub fn ldrh_pre(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Load } + } + + /// LDRH (post-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + pub fn ldrh_post(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Load } + } + + /// STRH + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + pub fn strh(rt: u8, rn: u8, imm12: i16) -> Self { + Self { rt, rn, index: Index::None, imm: imm12, op: Op::Store } + } + + /// STRH (pre-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + pub fn strh_pre(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Store } + } + + /// STRH (post-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + pub fn strh_post(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Store } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b111100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: HalfwordImm) -> Self { + let (mut opc, imm) = match inst.index { + Index::None => { + let mut imm12 = ((inst.imm / 2) as u32) & ((1 << 12) - 1); + (0b100, imm12) + }, + Index::PreIndex | Index::PostIndex => { + let mut imm9 = (inst.imm as u32) & ((1 << 9) - 1); + (0b000, (imm9 << 2) | (inst.index as u32)) + } + }; + + 0 + | (FAMILY << 25) + | ((opc | (inst.op as u32)) << 22) + | (imm << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: HalfwordImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldrh() { + let inst = HalfwordImm::ldrh(0, 1, 8); + let result: u32 = inst.into(); + assert_eq!(0x79401020, result); + } + + #[test] + fn test_ldrh_pre() { + let inst = HalfwordImm::ldrh_pre(0, 1, 16); + let result: u32 = inst.into(); + assert_eq!(0x78410c20, result); + } + + #[test] + fn test_ldrh_post() { + let inst = HalfwordImm::ldrh_post(0, 1, 24); + let result: u32 = inst.into(); + assert_eq!(0x78418420, result); + } + + #[test] + fn test_ldrh_post_negative() { + let inst = HalfwordImm::ldrh_post(0, 1, -24); + let result: u32 = inst.into(); + assert_eq!(0x785e8420, result); + } + + #[test] + fn test_strh() { + let inst = HalfwordImm::strh(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x79000020, result); + } + + #[test] + fn test_strh_pre() { + let inst = HalfwordImm::strh_pre(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x78000c20, result); + } + + #[test] + fn test_strh_post() { + let inst = HalfwordImm::strh_post(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x78000420, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index ab41464013dc50..f4c27a51021cae 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -9,6 +9,7 @@ mod call; mod conditional; mod data_imm; mod data_reg; +mod halfword_imm; mod load_literal; mod load_register; mod load_store; @@ -30,6 +31,7 @@ pub use call::Call; pub use conditional::Conditional; pub use data_imm::DataImm; pub use data_reg::DataReg; +pub use halfword_imm::HalfwordImm; pub use load_literal::LoadLiteral; pub use load_register::LoadRegister; pub use load_store::LoadStore; diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index fb07498ce2ad52..cf898d2b5a9706 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -423,6 +423,51 @@ pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { cb.write_bytes(&bytes); } +/// LDRH - load a halfword from memory +pub fn ldrh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 12), "The displacement must be 12 bits or less."); + + HalfwordImm::ldrh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldrh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDRH (pre-index) - load a halfword from memory, update the base pointer before loading it +pub fn ldrh_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::ldrh_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldrh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDRH (post-index) - load a halfword from memory, update the base pointer after loading it +pub fn ldrh_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::ldrh_post(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldrh instruction.") + }; + + cb.write_bytes(&bytes); +} + /// Whether or not a memory address displacement fits into the maximum number of /// bits such that it can be used without loading it into a register first. pub fn mem_disp_fits_bits(disp: i32) -> bool { @@ -741,6 +786,51 @@ pub fn str_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// STRH - store a halfword into memory +pub fn strh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 12), "The displacement must be 12 bits or less."); + + HalfwordImm::strh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to strh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STRH (pre-index) - store a halfword into memory, update the base pointer before loading it +pub fn strh_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::strh_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to strh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STRH (post-index) - store a halfword into memory, update the base pointer after loading it +pub fn strh_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::strh_post(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to strh instruction.") + }; + + cb.write_bytes(&bytes); +} + /// STUR - store a value in a register at a memory address pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { @@ -1098,6 +1188,21 @@ mod tests { check_bytes("6a0d41f8", |cb| ldr_pre(cb, X10, A64Opnd::new_mem(64, X11, 16))); } + #[test] + fn test_ldrh() { + check_bytes("6a194079", |cb| ldrh(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_ldrh_pre() { + check_bytes("6acd4078", |cb| ldrh_pre(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_ldrh_post() { + check_bytes("6ac54078", |cb| ldrh_post(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + #[test] fn test_ldur_memory() { check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123))); @@ -1223,6 +1328,21 @@ mod tests { check_bytes("6a0d1ff8", |cb| str_pre(cb, X10, A64Opnd::new_mem(64, X11, -16))); } + #[test] + fn test_strh() { + check_bytes("6a190079", |cb| strh(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_strh_pre() { + check_bytes("6acd0078", |cb| strh_pre(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_strh_post() { + check_bytes("6ac50078", |cb| strh_post(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + #[test] fn test_stur() { check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(64, X11, 128))); From c2e9253893461f931ea1a59b5996db06394c009f Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 25 Aug 2022 18:19:40 -0700 Subject: [PATCH 511/546] Stop saying it's temp checks (https://github.com/Shopify/ruby/pull/441) --- .cirrus.yml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index df57ebd7f65b39..db8cd0b9344196 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -63,10 +63,9 @@ task: make_test-all_script: make test-all make_test-spec_script: make test-spec - # The following is to test YJIT on ARM64 CPUs available on Cirrus CI yjit_task: - name: Arm64 Graviton2 / $CC YJIT New Backend Temp Checks + name: Arm64 Graviton2 / $CC YJIT auto_cancellation: $CIRRUS_BRANCH != 'master' skip: "changesIncludeOnly('doc/**', '**.{md,rdoc}')" arm_container: @@ -126,19 +125,9 @@ yjit_task: boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 - - # Check that we can do a full ruby build full_build_script: source $HOME/.cargo/env && make -j - - # Check that we can build rdoc successfully make_rdoc_script: source $HOME/.cargo/env && make -j rdoc - - # Check that we can run btest successfully make_btest_script: source $HOME/.cargo/env && make -j btest RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" - - # Check that we can run test-all successfully (running TestGCCompact separately until we fix its performance) make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --test-order=alpha --name=!/TestGCCompact/' test_gc_compact_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" - - # Check that we can run test-spec successfully make_test_spec_script: source $HOME/.cargo/env && make -j test-spec RUN_OPTS="--yjit-call-threshold=1" From 29e0713a1272cb63f1e3cebfab85dec2424ead0f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 25 Aug 2022 21:19:56 -0400 Subject: [PATCH 512/546] TBZ and TBNZ for AArch64 (https://github.com/Shopify/ruby/pull/434) --- yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/inst/test_bit.rs | 135 ++++++++++++++++++++++++++++ yjit/src/asm/arm64/mod.rs | 34 +++++++ 3 files changed, 171 insertions(+) create mode 100644 yjit/src/asm/arm64/inst/test_bit.rs diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index f4c27a51021cae..b3a77e73c98eb7 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -22,6 +22,7 @@ mod reg_pair; mod sbfm; mod shift_imm; mod sys_reg; +mod test_bit; pub use atomic::Atomic; pub use branch::Branch; @@ -44,3 +45,4 @@ pub use reg_pair::RegisterPair; pub use sbfm::SBFM; pub use shift_imm::ShiftImm; pub use sys_reg::SysReg; +pub use test_bit::TestBit; diff --git a/yjit/src/asm/arm64/inst/test_bit.rs b/yjit/src/asm/arm64/inst/test_bit.rs new file mode 100644 index 00000000000000..1961e659490b94 --- /dev/null +++ b/yjit/src/asm/arm64/inst/test_bit.rs @@ -0,0 +1,135 @@ +/// The upper bit of the bit number to test. +#[derive(Debug)] +enum B5 { + /// When the bit number is below 32. + B532 = 0, + + /// When the bit number is equal to or above 32. + B564 = 1 +} + +/// A convenience function so that we can convert the bit number directly into a +/// B5 variant. +impl From for B5 { + fn from(bit_num: u8) -> Self { + match bit_num { + 0..=31 => B5::B532, + 32..=63 => B5::B564, + _ => panic!("Invalid bit number: {}", bit_num) + } + } +} + +/// The operation to perform for this instruction. +enum Op { + /// The test bit zero operation. + TBZ = 0, + + /// The test bit not zero operation. + TBNZ = 1 +} + +/// The struct that represents an A64 test bit instruction that can be encoded. +/// +/// TBNZ/TBZ +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 0 1 1 | +/// | b5 op b40............. imm14.......................................... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct TestBit { + /// The number of the register to test. + rt: u8, + + /// The PC-relative offset to the target instruction in term of number of + /// instructions. + imm14: i16, + + /// The lower 5 bits of the bit number to be tested. + b40: u8, + + /// The operation to perform for this instruction. + op: Op, + + /// The upper bit of the bit number to test. + b5: B5 +} + +impl TestBit { + /// TBNZ + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBNZ--Test-bit-and-Branch-if-Nonzero-?lang=en + pub fn tbnz(rt: u8, bit_num: u8, offset: i16) -> Self { + Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBNZ, b5: bit_num.into() } + } + + /// TBZ + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBZ--Test-bit-and-Branch-if-Zero-?lang=en + pub fn tbz(rt: u8, bit_num: u8, offset: i16) -> Self { + Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBZ, b5: bit_num.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b11011; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: TestBit) -> Self { + let b40 = (inst.b40 & 0b11111) as u32; + let mut imm14 = (inst.imm14 & ((1 << 13) - 1)) as u32; + + if inst.imm14 < 0 { + imm14 |= (1 << 13); + } + + 0 + | ((inst.b5 as u32) << 31) + | (FAMILY << 25) + | ((inst.op as u32) << 24) + | (b40 << 19) + | (imm14 << 5) + | inst.rt as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: TestBit) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tbnz() { + let inst = TestBit::tbnz(0, 0, 0); + let result: u32 = inst.into(); + assert_eq!(0x37000000, result); + } + + #[test] + fn test_tbnz_negative() { + let inst = TestBit::tbnz(0, 0, -1); + let result: u32 = inst.into(); + assert_eq!(0x3707ffe0, result); + } + + #[test] + fn test_tbz() { + let inst = TestBit::tbz(0, 0, 0); + let result: u32 = inst.into(); + assert_eq!(0x36000000, result); + } + + #[test] + fn test_tbz_negative() { + let inst = TestBit::tbz(0, 0, -1); + let result: u32 = inst.into(); + assert_eq!(0x3607ffe0, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index cf898d2b5a9706..a6aa8ffcbb46a4 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -934,6 +934,30 @@ pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// TBNZ - test bit and branch if not zero +pub fn tbnz(cb: &mut CodeBlock, rt: A64Opnd, bit_num: A64Opnd, offset: A64Opnd) { + let bytes: [u8; 4] = match (rt, bit_num, offset) { + (A64Opnd::Reg(rt), A64Opnd::UImm(bit_num), A64Opnd::Imm(offset)) => { + TestBit::tbnz(rt.reg_no, bit_num.try_into().unwrap(), offset.try_into().unwrap()).into() + }, + _ => panic!("Invalid operand combination to tbnz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// TBZ - test bit and branch if zero +pub fn tbz(cb: &mut CodeBlock, rt: A64Opnd, bit_num: A64Opnd, offset: A64Opnd) { + let bytes: [u8; 4] = match (rt, bit_num, offset) { + (A64Opnd::Reg(rt), A64Opnd::UImm(bit_num), A64Opnd::Imm(offset)) => { + TestBit::tbz(rt.reg_no, bit_num.try_into().unwrap(), offset.try_into().unwrap()).into() + }, + _ => panic!("Invalid operand combination to tbz instruction.") + }; + + cb.write_bytes(&bytes); +} + /// TST - test the bits of a register against a mask, then update flags pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rn, rm) { @@ -1393,6 +1417,16 @@ mod tests { check_bytes("6a7d4093", |cb| sxtw(cb, X10, W11)); } + #[test] + fn test_tbnz() { + check_bytes("4a005037", |cb| tbnz(cb, X10, A64Opnd::UImm(10), A64Opnd::Imm(2))); + } + + #[test] + fn test_tbz() { + check_bytes("4a005036", |cb| tbz(cb, X10, A64Opnd::UImm(10), A64Opnd::Imm(2))); + } + #[test] fn test_tst_register() { check_bytes("1f0001ea", |cb| tst(cb, X0, X1)); From 46007b88af82d6ff22fc01edb7c74922dfa5c68a Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 26 Aug 2022 14:02:51 -0400 Subject: [PATCH 513/546] A64: Only clear icache when writing out new code (https://github.com/Shopify/ruby/pull/442) Previously we cleared the cache for all the code in the system when we flip memory protection, which was prohibitively expensive since the operation is not constant time. Instead, only clear the cache for the memory region of newly written code when we write out new code. This brings the runtime for the 30k_if_else test down to about 6 seconds from the previous 45 seconds on my laptop. --- yjit.c | 10 ++++++++-- yjit/bindgen/src/main.rs | 1 + yjit/src/backend/arm64/mod.rs | 10 ++++++++++ yjit/src/cruby_bindings.inc.rs | 6 ++++++ 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/yjit.c b/yjit.c index 70b98d4844a42d..a8341910706e9d 100644 --- a/yjit.c +++ b/yjit.c @@ -78,11 +78,17 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size) rb_bug("Couldn't make JIT page (%p, %lu bytes) executable, errno: %s\n", mem_block, (unsigned long)mem_size, strerror(errno)); } +} +// `start` is inclusive and `end` is exclusive. +void +rb_yjit_icache_invalidate(void *start, void *end) +{ // Clear/invalidate the instruction cache. Compiles to nothing on x86_64 - // but required on ARM. On Darwin it's the same as calling sys_icache_invalidate(). + // but required on ARM before running freshly written code. + // On Darwin it's the same as calling sys_icache_invalidate(). #ifdef __GNUC__ - __builtin___clear_cache(mem_block, (char *)mem_block + mem_size); + __builtin___clear_cache(start, end); #endif } diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index a4c0b9850444b9..df4083638d40bd 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -273,6 +273,7 @@ fn main() { .allowlist_function("rb_RSTRING_LEN") .allowlist_function("rb_ENCODING_GET") .allowlist_function("rb_yjit_exit_locations_dict") + .allowlist_function("rb_yjit_icache_invalidate") // from vm_sync.h .allowlist_function("rb_vm_barrier") diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 08eb5efa3fb05c..e63b0113918092 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -961,6 +961,16 @@ impl Assembler }; } + // Invalidate icache for newly written out region so we don't run + // stale code. + { + let start = cb.get_ptr(start_write_pos).raw_ptr(); + let write_ptr = cb.get_write_ptr().raw_ptr(); + let codeblock_end = cb.get_ptr(cb.get_mem_size()).raw_ptr(); + let end = std::cmp::min(write_ptr, codeblock_end); + unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + } + gc_offsets } diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 83e9f580bfa9ad..591408e1da644e 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1020,6 +1020,12 @@ extern "C" { extern "C" { pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32); } +extern "C" { + pub fn rb_yjit_icache_invalidate( + start: *mut ::std::os::raw::c_void, + end: *mut ::std::os::raw::c_void, + ); +} extern "C" { pub fn rb_yjit_exit_locations_dict( yjit_raw_samples: *mut VALUE, From d694f320e40e77ab432f4d21575251ac0ab4ab76 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 26 Aug 2022 19:21:45 -0400 Subject: [PATCH 514/546] Fixed width immediates (https://github.com/Shopify/ruby/pull/437) There are a lot of times when encoding AArch64 instructions that we need to represent an integer value with a custom fixed width. For example, the offset for a B instruction is 26 bits, so we store an i32 on the instruction struct and then mask it when we encode. We've been doing this masking everywhere, which has worked, but it's getting a bit copy-pasty all over the place. This commit centralizes that logic to make sure we stay consistent. --- yjit/src/asm/arm64/arg/mod.rs | 2 + yjit/src/asm/arm64/arg/truncate.rs | 66 +++++++++++++++++++++++++ yjit/src/asm/arm64/inst/branch_cond.rs | 18 ++++--- yjit/src/asm/arm64/inst/call.rs | 14 +++--- yjit/src/asm/arm64/inst/data_reg.rs | 6 +-- yjit/src/asm/arm64/inst/halfword_imm.rs | 7 ++- yjit/src/asm/arm64/inst/load_literal.rs | 6 +-- yjit/src/asm/arm64/inst/load_store.rs | 6 +-- yjit/src/asm/arm64/inst/logical_reg.rs | 6 +-- yjit/src/asm/arm64/inst/reg_pair.rs | 10 ++-- yjit/src/asm/arm64/inst/sbfm.rs | 9 ++-- yjit/src/asm/arm64/inst/test_bit.rs | 8 ++- 12 files changed, 110 insertions(+), 48 deletions(-) create mode 100644 yjit/src/asm/arm64/arg/truncate.rs diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs index 30f3cc3dfedb6b..9bf4a8ea1322b3 100644 --- a/yjit/src/asm/arm64/arg/mod.rs +++ b/yjit/src/asm/arm64/arg/mod.rs @@ -6,9 +6,11 @@ mod condition; mod sf; mod shifted_imm; mod sys_reg; +mod truncate; pub use bitmask_imm::BitmaskImmediate; pub use condition::Condition; pub use sf::Sf; pub use shifted_imm::ShiftedImmediate; pub use sys_reg::SystemRegister; +pub use truncate::{truncate_imm, truncate_uimm}; diff --git a/yjit/src/asm/arm64/arg/truncate.rs b/yjit/src/asm/arm64/arg/truncate.rs new file mode 100644 index 00000000000000..52f2c012cb5b72 --- /dev/null +++ b/yjit/src/asm/arm64/arg/truncate.rs @@ -0,0 +1,66 @@ +// There are many instances in AArch64 instruction encoding where you represent +// an integer value with a particular bit width that isn't a power of 2. These +// functions represent truncating those integer values down to the appropriate +// number of bits. + +/// Truncate a signed immediate to fit into a compile-time known width. It is +/// assumed before calling this function that the value fits into the correct +/// size. If it doesn't, then this function will panic. +/// +/// When the value is positive, this should effectively be a no-op since we're +/// just dropping leading zeroes. When the value is negative we should only be +/// dropping leading ones. +pub fn truncate_imm, const WIDTH: usize>(imm: T) -> u32 { + let value: i32 = imm.into(); + let masked = (value as u32) & ((1 << WIDTH) - 1); + + // Assert that we didn't drop any bits by truncating. + if value >= 0 { + assert_eq!(value as u32, masked); + } else { + assert_eq!(value as u32, masked | (u32::MAX << WIDTH)); + } + + masked +} + +/// Truncate an unsigned immediate to fit into a compile-time known width. It is +/// assumed before calling this function that the value fits into the correct +/// size. If it doesn't, then this function will panic. +/// +/// This should effectively be a no-op since we're just dropping leading zeroes. +pub fn truncate_uimm, const WIDTH: usize>(uimm: T) -> u32 { + let value: u32 = uimm.into(); + let masked = (value & ((1 << WIDTH) - 1)); + + // Assert that we didn't drop any bits by truncating. + assert_eq!(value, masked); + + masked +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_truncate_imm_positive() { + let inst = truncate_imm::(5); + let result: u32 = inst.into(); + assert_eq!(0b0101, result); + } + + #[test] + fn test_truncate_imm_negative() { + let inst = truncate_imm::(-5); + let result: u32 = inst.into(); + assert_eq!(0b1011, result); + } + + #[test] + fn test_truncate_uimm() { + let inst = truncate_uimm::(5); + let result: u32 = inst.into(); + assert_eq!(0b0101, result); + } +} diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs index 33cc9c364918e7..a6bc79dffede0a 100644 --- a/yjit/src/asm/arm64/inst/branch_cond.rs +++ b/yjit/src/asm/arm64/inst/branch_cond.rs @@ -1,4 +1,4 @@ -use super::super::arg::Condition; +use super::super::arg::{Condition, truncate_imm}; /// The struct that represents an A64 conditional branch instruction that can be /// encoded. @@ -31,12 +31,10 @@ const FAMILY: u32 = 0b101; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: BranchCond) -> Self { - let imm19 = (inst.imm19 as u32) & ((1 << 19) - 1); - 0 | (1 << 30) | (FAMILY << 26) - | (imm19 << 5) + | (truncate_imm::<_, 19>(inst.imm19) << 5) | (inst.cond as u32) } } @@ -66,8 +64,14 @@ mod tests { } #[test] - fn test_b_ne_neg() { - let result: u32 = BranchCond::bcond(Condition::NE, -128).into(); - assert_eq!(0x54fffc01, result); + fn test_b_eq_max() { + let result: u32 = BranchCond::bcond(Condition::EQ, (1 << 20) - 4).into(); + assert_eq!(0x547fffe0, result); + } + + #[test] + fn test_b_eq_min() { + let result: u32 = BranchCond::bcond(Condition::EQ, -(1 << 20)).into(); + assert_eq!(0x54800000, result); } } diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs index 8d65359f771261..32d924f799186f 100644 --- a/yjit/src/asm/arm64/inst/call.rs +++ b/yjit/src/asm/arm64/inst/call.rs @@ -1,3 +1,5 @@ +use super::super::arg::truncate_imm; + /// The operation to perform for this instruction. enum Op { /// Branch directly, with a hint that this is not a subroutine call or @@ -45,12 +47,10 @@ const FAMILY: u32 = 0b101; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: Call) -> Self { - let imm26 = (inst.imm26 as u32) & ((1 << 26) - 1); - 0 | ((inst.op as u32) << 31) | (FAMILY << 26) - | imm26 + | truncate_imm::<_, 26>(inst.imm26) } } @@ -92,13 +92,13 @@ mod tests { #[test] fn test_b_positive() { - let result: u32 = Call::b(256).into(); - assert_eq!(0x14000100, result); + let result: u32 = Call::b((1 << 25) - 1).into(); + assert_eq!(0x15ffffff, result); } #[test] fn test_b_negative() { - let result: u32 = Call::b(-256).into(); - assert_eq!(0x17ffff00, result); + let result: u32 = Call::b(-(1 << 25)).into(); + assert_eq!(0x16000000, result); } } diff --git a/yjit/src/asm/arm64/inst/data_reg.rs b/yjit/src/asm/arm64/inst/data_reg.rs index e2c2723fcffe1d..a742121f1fd172 100644 --- a/yjit/src/asm/arm64/inst/data_reg.rs +++ b/yjit/src/asm/arm64/inst/data_reg.rs @@ -1,4 +1,4 @@ -use super::super::arg::Sf; +use super::super::arg::{Sf, truncate_uimm}; /// The operation being performed by this instruction. enum Op { @@ -129,8 +129,6 @@ const FAMILY: u32 = 0b0101; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: DataReg) -> Self { - let imm6 = (inst.imm6 as u32) & ((1 << 6) - 1); - 0 | ((inst.sf as u32) << 31) | ((inst.op as u32) << 30) @@ -139,7 +137,7 @@ impl From for u32 { | (1 << 24) | ((inst.shift as u32) << 22) | ((inst.rm as u32) << 16) - | (imm6 << 10) + | (truncate_uimm::<_, 6>(inst.imm6) << 10) | ((inst.rn as u32) << 5) | inst.rd as u32 } diff --git a/yjit/src/asm/arm64/inst/halfword_imm.rs b/yjit/src/asm/arm64/inst/halfword_imm.rs index 675e33d4a8b33e..c31d1f8945c120 100644 --- a/yjit/src/asm/arm64/inst/halfword_imm.rs +++ b/yjit/src/asm/arm64/inst/halfword_imm.rs @@ -1,3 +1,5 @@ +use super::super::arg::truncate_imm; + /// Whether this is a load or a store. enum Op { Load = 1, @@ -95,11 +97,12 @@ impl From for u32 { fn from(inst: HalfwordImm) -> Self { let (mut opc, imm) = match inst.index { Index::None => { - let mut imm12 = ((inst.imm / 2) as u32) & ((1 << 12) - 1); + assert_eq!(inst.imm & 1, 0, "immediate offset must be even"); + let imm12 = truncate_imm::<_, 12>(inst.imm / 2); (0b100, imm12) }, Index::PreIndex | Index::PostIndex => { - let mut imm9 = (inst.imm as u32) & ((1 << 9) - 1); + let imm9 = truncate_imm::<_, 9>(inst.imm); (0b000, (imm9 << 2) | (inst.index as u32)) } }; diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs index d2a5d57eea81e2..c5ab09713c0ea1 100644 --- a/yjit/src/asm/arm64/inst/load_literal.rs +++ b/yjit/src/asm/arm64/inst/load_literal.rs @@ -1,3 +1,5 @@ +use super::super::arg::truncate_imm; + /// The size of the operands being operated on. enum Opc { Size32 = 0b00, @@ -50,13 +52,11 @@ const FAMILY: u32 = 0b0100; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: LoadLiteral) -> Self { - let imm19 = (inst.imm19 as u32) & ((1 << 19) - 1); - 0 | ((inst.opc as u32) << 30) | (1 << 28) | (FAMILY << 25) - | (imm19 << 5) + | (truncate_imm::<_, 19>(inst.imm19) << 5) | (inst.rt as u32) } } diff --git a/yjit/src/asm/arm64/inst/load_store.rs b/yjit/src/asm/arm64/inst/load_store.rs index 80a67c837e025b..ea42f2d17f2d87 100644 --- a/yjit/src/asm/arm64/inst/load_store.rs +++ b/yjit/src/asm/arm64/inst/load_store.rs @@ -1,3 +1,5 @@ +use super::super::arg::truncate_imm; + /// The size of the operands being operated on. enum Size { Size32 = 0b10, @@ -110,14 +112,12 @@ const FAMILY: u32 = 0b0100; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: LoadStore) -> Self { - let imm9 = (inst.imm9 as u32) & ((1 << 9) - 1); - 0 | ((inst.size as u32) << 30) | (0b11 << 28) | (FAMILY << 25) | ((inst.opc as u32) << 22) - | (imm9 << 12) + | (truncate_imm::<_, 9>(inst.imm9) << 12) | ((inst.idx as u32) << 10) | ((inst.rn as u32) << 5) | (inst.rt as u32) diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs index 83230ac5b2ba62..a96805c9f961c0 100644 --- a/yjit/src/asm/arm64/inst/logical_reg.rs +++ b/yjit/src/asm/arm64/inst/logical_reg.rs @@ -1,4 +1,4 @@ -use super::super::arg::Sf; +use super::super::arg::{Sf, truncate_uimm}; /// Whether or not this is a NOT instruction. enum N { @@ -124,8 +124,6 @@ const FAMILY: u32 = 0b0101; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: LogicalReg) -> Self { - let imm6 = (inst.imm6 as u32) & ((1 << 6) - 1); - 0 | ((inst.sf as u32) << 31) | ((inst.opc as u32) << 29) @@ -133,7 +131,7 @@ impl From for u32 { | ((inst.shift as u32) << 22) | ((inst.n as u32) << 21) | ((inst.rm as u32) << 16) - | (imm6 << 10) + | (truncate_uimm::<_, 6>(inst.imm6) << 10) | ((inst.rn as u32) << 5) | inst.rd as u32 } diff --git a/yjit/src/asm/arm64/inst/reg_pair.rs b/yjit/src/asm/arm64/inst/reg_pair.rs index d8fece2ed64863..87690e3b4ab0e1 100644 --- a/yjit/src/asm/arm64/inst/reg_pair.rs +++ b/yjit/src/asm/arm64/inst/reg_pair.rs @@ -1,3 +1,5 @@ +use super::super::arg::truncate_imm; + /// The operation to perform for this instruction. enum Opc { /// When the registers are 32-bits wide. @@ -114,18 +116,12 @@ const FAMILY: u32 = 0b0100; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: RegisterPair) -> Self { - let mut imm7 = (inst.imm7 as u32) & ((1 << 7) - 1); - - if inst.imm7 < 0 { - imm7 |= 1 << 6; - } - 0 | ((inst.opc as u32) << 30) | (1 << 29) | (FAMILY << 25) | ((inst.index as u32) << 22) - | (imm7 << 15) + | (truncate_imm::<_, 7>(inst.imm7) << 15) | ((inst.rt2 as u32) << 10) | ((inst.rn as u32) << 5) | (inst.rt1 as u32) diff --git a/yjit/src/asm/arm64/inst/sbfm.rs b/yjit/src/asm/arm64/inst/sbfm.rs index 6f69e58043c2a1..860299898040fc 100644 --- a/yjit/src/asm/arm64/inst/sbfm.rs +++ b/yjit/src/asm/arm64/inst/sbfm.rs @@ -1,4 +1,4 @@ -use super::super::arg::Sf; +use super::super::arg::{Sf, truncate_uimm}; /// The struct that represents an A64 signed bitfield move instruction that can /// be encoded. @@ -56,16 +56,13 @@ const FAMILY: u32 = 0b1001; impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: SBFM) -> Self { - let immr = (inst.immr as u32) & ((1 << 6) - 1); - let imms = (inst.imms as u32) & ((1 << 6) - 1); - 0 | ((inst.sf as u32) << 31) | (FAMILY << 25) | (1 << 24) | ((inst.n as u32) << 22) - | (immr << 16) - | (imms << 10) + | (truncate_uimm::<_, 6>(inst.immr) << 16) + | (truncate_uimm::<_, 6>(inst.imms) << 10) | ((inst.rn as u32) << 5) | inst.rd as u32 } diff --git a/yjit/src/asm/arm64/inst/test_bit.rs b/yjit/src/asm/arm64/inst/test_bit.rs index 1961e659490b94..c57a05ad2b83c6 100644 --- a/yjit/src/asm/arm64/inst/test_bit.rs +++ b/yjit/src/asm/arm64/inst/test_bit.rs @@ -1,3 +1,5 @@ +use super::super::arg::truncate_imm; + /// The upper bit of the bit number to test. #[derive(Debug)] enum B5 { @@ -77,11 +79,7 @@ impl From for u32 { /// Convert an instruction into a 32-bit value. fn from(inst: TestBit) -> Self { let b40 = (inst.b40 & 0b11111) as u32; - let mut imm14 = (inst.imm14 & ((1 << 13) - 1)) as u32; - - if inst.imm14 < 0 { - imm14 |= (1 << 13); - } + let imm14 = truncate_imm::<_, 14>(inst.imm14); 0 | ((inst.b5 as u32) << 31) From b5358a98e632200fdca6ac6f8bd36e448d3795aa Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Mon, 29 Aug 2022 18:18:20 +0200 Subject: [PATCH 515/546] Update to ruby/mspec@37151a0 --- spec/mspec/lib/mspec/runner/actions/leakchecker.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/mspec/lib/mspec/runner/actions/leakchecker.rb b/spec/mspec/lib/mspec/runner/actions/leakchecker.rb index 596b120d9f18ce..69181b71d3a4ad 100644 --- a/spec/mspec/lib/mspec/runner/actions/leakchecker.rb +++ b/spec/mspec/lib/mspec/runner/actions/leakchecker.rb @@ -173,7 +173,8 @@ def check_tempfile_leak def find_threads Thread.list.find_all {|t| - t != Thread.current && t.alive? + t != Thread.current && t.alive? && + !(t.thread_variable?(:"\0__detached_thread__") && t.thread_variable_get(:"\0__detached_thread__")) } end From 1315c5aad9d31a91e494657d98f61c9d6f65d8b1 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Mon, 29 Aug 2022 18:18:23 +0200 Subject: [PATCH 516/546] Update to ruby/spec@b8a8240 --- spec/ruby/core/dir/shared/chroot.rb | 2 +- spec/ruby/core/env/shared/update.rb | 7 +++---- spec/ruby/library/datetime/to_time_spec.rb | 3 ++- spec/ruby/library/time/to_datetime_spec.rb | 3 ++- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/spec/ruby/core/dir/shared/chroot.rb b/spec/ruby/core/dir/shared/chroot.rb index b14a4336707601..7c668c0fbbd42f 100644 --- a/spec/ruby/core/dir/shared/chroot.rb +++ b/spec/ruby/core/dir/shared/chroot.rb @@ -3,7 +3,7 @@ DirSpecs.create_mock_dirs @real_root = "../" * (File.dirname(__FILE__).count('/') - 1) - @ref_dir = File.join("/", Dir.new('/').entries.first) + @ref_dir = File.join("/", File.basename(Dir["/*"].first)) end after :all do diff --git a/spec/ruby/core/env/shared/update.rb b/spec/ruby/core/env/shared/update.rb index 3101f9c561d8bf..7d4799955bdd52 100644 --- a/spec/ruby/core/env/shared/update.rb +++ b/spec/ruby/core/env/shared/update.rb @@ -17,10 +17,9 @@ ruby_version_is "3.2" do it "adds the multiple parameter hashes to ENV, returning ENV" do - ENV.send(@method, {"foo" => "0", "bar" => "1"}, {"baz" => "2"}).should equal(ENV) - ENV["foo"].should == "0" - ENV["bar"].should == "1" - ENV["baz"].should == "2" + ENV.send(@method, {"foo" => "multi1"}, {"bar" => "multi2"}).should equal(ENV) + ENV["foo"].should == "multi1" + ENV["bar"].should == "multi2" end end diff --git a/spec/ruby/library/datetime/to_time_spec.rb b/spec/ruby/library/datetime/to_time_spec.rb index 88a7aaa7390afe..95eca864dadc6e 100644 --- a/spec/ruby/library/datetime/to_time_spec.rb +++ b/spec/ruby/library/datetime/to_time_spec.rb @@ -18,7 +18,8 @@ time.sec.should == 59 end - version_is(Date::VERSION, '3.2.3') do + date_version = defined?(Date::VERSION) ? Date::VERSION : '0.0.0' + version_is(date_version, '3.2.3') do it "returns a Time representing the same instant before Gregorian" do datetime = DateTime.civil(1582, 10, 4, 23, 58, 59) time = datetime.to_time.utc diff --git a/spec/ruby/library/time/to_datetime_spec.rb b/spec/ruby/library/time/to_datetime_spec.rb index c5561535b288b8..6025950b5941b3 100644 --- a/spec/ruby/library/time/to_datetime_spec.rb +++ b/spec/ruby/library/time/to_datetime_spec.rb @@ -13,7 +13,8 @@ datetime.sec.should == 59 end - version_is(Date::VERSION, '3.2.3') do + date_version = defined?(Date::VERSION) ? Date::VERSION : '0.0.0' + version_is(date_version, '3.2.3') do it "returns a DateTime representing the same instant before Gregorian" do time = Time.utc(1582, 10, 14, 23, 58, 59) datetime = time.to_datetime From 737402e9383332d028c1c2f26a3f1a2cd78fab0d Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 29 Aug 2022 09:55:53 -0700 Subject: [PATCH 517/546] Skip a couple of chroot spec faillures I don't come up with a way to fix it right away. We'd need some experiments on a pull request. --- spec/ruby/core/dir/shared/chroot.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spec/ruby/core/dir/shared/chroot.rb b/spec/ruby/core/dir/shared/chroot.rb index 7c668c0fbbd42f..8c0599fe3f175d 100644 --- a/spec/ruby/core/dir/shared/chroot.rb +++ b/spec/ruby/core/dir/shared/chroot.rb @@ -14,10 +14,13 @@ DirSpecs.delete_mock_dirs end + # Pending until https://github.com/ruby/ruby/runs/8075149420 is fixed + compilations_ci = ENV["GITHUB_WORKFLOW"] == "Compilations" + it "can be used to change the process' root directory" do -> { Dir.send(@method, File.dirname(__FILE__)) }.should_not raise_error File.should.exist?("/#{File.basename(__FILE__)}") - end + end unless compilations_ci it "returns 0 if successful" do Dir.send(@method, '/').should == 0 @@ -31,7 +34,7 @@ Dir.send(@method, @real_root) File.should.exist?(@ref_dir) File.should_not.exist?("/#{File.basename(__FILE__)}") - end + end unless compilations_ci it "calls #to_path on non-String argument" do p = mock('path') From ddca3482ef53911ce732c91e715d0439d3b47514 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 29 Aug 2022 18:05:06 -0700 Subject: [PATCH 518/546] Check only symbol flag bits (#6301) * Check only symbol flag bits * Check all 4 bits --- yjit/src/codegen.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 07e8500f620b46..744495eb298eac 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -3454,7 +3454,8 @@ fn jit_guard_known_klass( asm.comment("guard object is static symbol"); assert!(RUBY_SPECIAL_SHIFT == 8); - asm.cmp(obj_opnd, Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + let flag_bits = asm.and(obj_opnd, Opnd::UImm(0xf)); + asm.cmp(flag_bits, Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); } From d6acaa4d423200a1dcd8ea62fc65fc60944f2e84 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 30 Aug 2022 11:21:20 +0900 Subject: [PATCH 519/546] Undefine `ruby_debug_log` macro before the function definition Fix up 27173e3735ff. --- debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/debug.c b/debug.c index 3af7f26275033a..3dd0f7190621f5 100644 --- a/debug.c +++ b/debug.c @@ -499,6 +499,7 @@ pretty_filename(const char *path) return path; } +#undef ruby_debug_log void ruby_debug_log(const char *file, int line, const char *func_name, const char *fmt, ...) { From b98653f766613dbbaed02522af26f80078361502 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Tue, 30 Aug 2022 10:05:18 +0200 Subject: [PATCH 520/546] [rubygems/rubygems] Let `Dir.tmpdir` use the standard path We're not fully in control of this folder, even when running our own tests, because MJIT creates some temp folders there when invoking GC. This bite tests running in ruby-core when making the behavior of `FileUtils.rm_rf` more strict, because these extra files could not be removed. Since this was originally added due to some failures on systems with non standard permissions on tmp folders, but I can no longer reproduce those, I'll remove it. https://github.com/rubygems/rubygems/commit/d2f21596ee --- test/rubygems/helper.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index 37e113d3fcacde..e569986ef21c36 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -334,8 +334,6 @@ def setup # capture output Gem::DefaultUserInteraction.ui = Gem::MockGemUi.new - ENV["TMPDIR"] = @tempdir - @orig_SYSTEM_WIDE_CONFIG_FILE = Gem::ConfigFile::SYSTEM_WIDE_CONFIG_FILE Gem::ConfigFile.send :remove_const, :SYSTEM_WIDE_CONFIG_FILE Gem::ConfigFile.send :const_set, :SYSTEM_WIDE_CONFIG_FILE, From 497b5ee1803e67efe641671276a05831ad788898 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 30 Aug 2022 12:44:08 -0700 Subject: [PATCH 521/546] Normalize the YJIT Cirrus workflow a little --- .cirrus.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index db8cd0b9344196..ee5bafdee78b65 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -126,8 +126,7 @@ yjit_task: test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 full_build_script: source $HOME/.cargo/env && make -j - make_rdoc_script: source $HOME/.cargo/env && make -j rdoc - make_btest_script: source $HOME/.cargo/env && make -j btest RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" + make_btest_script: source $HOME/.cargo/env && make -j test RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --test-order=alpha --name=!/TestGCCompact/' test_gc_compact_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" make_test_spec_script: source $HOME/.cargo/env && make -j test-spec RUN_OPTS="--yjit-call-threshold=1" From 918a658556d96aeb7bc0c8425e9095f0a22b04a1 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 30 Aug 2022 12:44:55 -0700 Subject: [PATCH 522/546] Run cargo test on Cirrus --- .cirrus.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.cirrus.yml b/.cirrus.yml index ee5bafdee78b65..be4d453fb89075 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -126,6 +126,7 @@ yjit_task: test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 full_build_script: source $HOME/.cargo/env && make -j + cargo_test_script: source $HOME/.cargo/env && cd yjit && cargo test make_btest_script: source $HOME/.cargo/env && make -j test RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --test-order=alpha --name=!/TestGCCompact/' test_gc_compact_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" From 5dbc725f4d6b07d9d1214ae896fd3feeae8aa23b Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 30 Aug 2022 13:02:26 -0700 Subject: [PATCH 523/546] Skip linking rb_yjit_icache_invalidate on cargo test Co-authored-by: Kevin Newton --- yjit/src/backend/arm64/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index e63b0113918092..69524be6113280 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -963,6 +963,7 @@ impl Assembler // Invalidate icache for newly written out region so we don't run // stale code. + #[cfg(not(test))] { let start = cb.get_ptr(start_write_pos).raw_ptr(); let write_ptr = cb.get_write_ptr().raw_ptr(); From 4a4daf00afbd5f6041d2b832df10c5c389781f1a Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 30 Aug 2022 16:04:02 -0400 Subject: [PATCH 524/546] Update .cirrus.yml --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index be4d453fb89075..4949e6cb3ea24d 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -127,7 +127,7 @@ yjit_task: output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 full_build_script: source $HOME/.cargo/env && make -j cargo_test_script: source $HOME/.cargo/env && cd yjit && cargo test - make_btest_script: source $HOME/.cargo/env && make -j test RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" + make_test_script: source $HOME/.cargo/env && make -j test RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --test-order=alpha --name=!/TestGCCompact/' test_gc_compact_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" make_test_spec_script: source $HOME/.cargo/env && make -j test-spec RUN_OPTS="--yjit-call-threshold=1" From f4cdfff0843db6ee5798f57992b2a84f5ab2c578 Mon Sep 17 00:00:00 2001 From: git Date: Wed, 31 Aug 2022 06:22:01 +0900 Subject: [PATCH 525/546] * 2022-08-31 [ci skip] --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 2f2b02a57fe9ab..b531db40965cf4 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 30 +#define RUBY_RELEASE_DAY 31 #include "ruby/version.h" #include "ruby/internal/abi.h" From 221a52368362a16570f5025a9a055534a49bfbe9 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 30 Aug 2022 17:07:27 -0700 Subject: [PATCH 526/546] Put a documentation about MJIT [ci skip] --- doc/mjit/mjit.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 doc/mjit/mjit.md diff --git a/doc/mjit/mjit.md b/doc/mjit/mjit.md new file mode 100644 index 00000000000000..4d345a95ae61f1 --- /dev/null +++ b/doc/mjit/mjit.md @@ -0,0 +1,16 @@ +# MJIT + +Here are some tips that might be useful when you work on MJIT: + +## Always run make install + +Always run `make install` before running MJIT. It could easily cause a SEGV if you don't. +MJIT looks for the installed header for security reasons. + +## --mjit-debug vs --mjit-debug=-ggdb3 + +`--mjit-debug=[flags]` allows you to specify arbitrary flags while keeping other compiler flags like `-O3`, +which is useful for profiling benchmarks. + +`--mjit-debug` alone, on the other hand, disables `-O3` and adds debug flags. +If you're debugging MJIT, what you need to use is not `--mjit-debug=-ggdb3` but `--mjit-debug`. From 739380c97d2c6440da0d5ae07f9291b1e8cde1b2 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 20 Aug 2022 11:25:54 +0200 Subject: [PATCH 527/546] [ruby/net-protocol] Improve BufferedIO performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `BufferedIO` is a bit inefficient for reading large responses because it use the classic `buffer.slice!` technique which cause a lot of unnecessary string copying. This is particularly visible on line based protocol when reading line by line. Instead of repeatedly shifting the string, we can keep track of which offset we're at, to know how many bytes are left in the buffer. This change also open the door to further optimization by increasing the buffer size, as previously `slice!` would get slower the larger the buffer is. Benchmark results: ``` === 1k === Warming up -------------------------------------- 1k 1.234k i/100ms 1k opt 1.283k i/100ms Calculating ------------------------------------- 1k 12.615k (± 0.9%) i/s - 64.168k in 5.086995s 1k opt 12.856k (± 0.9%) i/s - 65.433k in 5.090051s Comparison: 1k: 12615.2 i/s 1k opt: 12856.0 i/s - 1.02x (± 0.00) faster === 10k === Warming up -------------------------------------- 10k 1.165k i/100ms 10k opt 1.269k i/100ms Calculating ------------------------------------- 10k 11.550k (± 2.4%) i/s - 58.250k in 5.046378s 10k opt 12.736k (± 1.0%) i/s - 64.719k in 5.081969s Comparison: 10k: 11550.3 i/s 10k opt: 12736.3 i/s - 1.10x (± 0.00) faster === 100k === Warming up -------------------------------------- 100k 809.000 i/100ms 100k opt 926.000 i/100ms Calculating ------------------------------------- 100k 8.054k (± 3.0%) i/s - 40.450k in 5.028299s 100k opt 9.286k (± 2.2%) i/s - 47.226k in 5.088841s Comparison: 100k: 8053.6 i/s 100k opt: 9285.5 i/s - 1.15x (± 0.00) faster === 1M === Warming up -------------------------------------- 1M 249.000 i/100ms 1M opt 315.000 i/100ms Calculating ------------------------------------- 1M 2.448k (± 2.5%) i/s - 12.450k in 5.089744s 1M opt 3.119k (± 2.6%) i/s - 15.750k in 5.053772s Comparison: 1M: 2447.8 i/s 1M opt: 3118.8 i/s - 1.27x (± 0.00) faster ``` Profiling before (1MB responses): ``` ================================== Mode: wall(1000) Samples: 5276 (0.00% miss rate) GC: 394 (7.47%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 1622 (30.7%) 1622 (30.7%) IO#wait_readable 777 (14.7%) 777 (14.7%) IO#read_nonblock 365 (6.9%) 365 (6.9%) (sweeping) 2705 (51.3%) 364 (6.9%) Net::BufferedIO#rbuf_fill 264 (5.0%) 264 (5.0%) String#index 223 (4.2%) 223 (4.2%) String#sub 221 (4.2%) 221 (4.2%) String#slice! 185 (3.5%) 185 (3.5%) String#split 108 (2.0%) 108 (2.0%) IO#write_nonblock 101 (1.9%) 101 (1.9%) String#downcase 66 (1.3%) 66 (1.3%) Net::BufferedIO#LOG 57 (1.1%) 57 (1.1%) String#count 51 (1.0%) 51 (1.0%) String#to_s 391 (7.4%) 50 (0.9%) Net::HTTPGenericRequest#write_header 50 (0.9%) 50 (0.9%) String#capitalize 49 (0.9%) 49 (0.9%) Array#join 47 (0.9%) 47 (0.9%) String#b 106 (2.0%) 36 (0.7%) Net::HTTPHeader#set_field 34 (0.6%) 34 (0.6%) Module#=== 33 (0.6%) 33 (0.6%) String#[] 140 (2.7%) 29 (0.5%) Net::BufferedIO#write0 29 (0.5%) 29 (0.5%) (marking) 281 (5.3%) 27 (0.5%) Net::BufferedIO#rbuf_consume 1195 (22.6%) 25 (0.5%) Net::HTTPResponse#read_body 1024 (19.4%) 25 (0.5%) Net::HTTPResponse.each_response_header 86 (1.6%) 24 (0.5%) Net::HTTPHeader#set_field 23 (0.4%) 23 (0.4%) Net::HTTP#proxy_uri 51 (1.0%) 23 (0.4%) Net::HTTPHeader#initialize_http_header 2225 (42.2%) 22 (0.4%) Net::BufferedIO#readuntil 20 (0.4%) 20 (0.4%) Regexp#=== ``` Profiling after (1MB responses): ``` ================================== Mode: wall(1000) Samples: 15180 (0.00% miss rate) GC: 1688 (11.12%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 4534 (29.9%) 4534 (29.9%) IO#read_nonblock 10650 (70.2%) 3944 (26.0%) Net::HTTPOpt::BufferedIOOpt#rbuf_fill 2101 (13.8%) 2101 (13.8%) IO#wait_readable 1442 (9.5%) 1442 (9.5%) (sweeping) 360 (2.4%) 360 (2.4%) String#sub 312 (2.1%) 312 (2.1%) String#split 265 (1.7%) 265 (1.7%) String#bytesize 246 (1.6%) 246 (1.6%) (marking) 151 (1.0%) 151 (1.0%) IO#write_nonblock 125 (0.8%) 125 (0.8%) String#downcase 116 (0.8%) 116 (0.8%) String#index 113 (0.7%) 113 (0.7%) Module#=== 162 (1.1%) 89 (0.6%) Net::HTTPOpt::BufferedIOOpt#rbuf_consume_all_shareable! 158 (1.0%) 65 (0.4%) Net::HTTPHeader#set_field 63 (0.4%) 63 (0.4%) String#capitalize 63 (0.4%) 63 (0.4%) BasicObject#equal? 58 (0.4%) 58 (0.4%) Regexp#match 58 (0.4%) 58 (0.4%) String#[] 449 (3.0%) 56 (0.4%) Net::HTTPGenericRequest#write_header 53 (0.3%) 53 (0.3%) String#to_s 52 (0.3%) 52 (0.3%) Net::HTTPOpt::BufferedIOOpt#LOG 52 (0.3%) 52 (0.3%) String#count 44 (0.3%) 44 (0.3%) String#byteslice 44 (0.3%) 44 (0.3%) Array#join 1096 (7.2%) 42 (0.3%) Net::HTTPResponse.each_response_header 2617 (17.2%) 40 (0.3%) Net::HTTPOpt::BufferedIOOpt#readuntil 132 (0.9%) 30 (0.2%) Net::HTTPOpt::BufferedIOOpt#rbuf_consume 28 (0.2%) 28 (0.2%) Regexp#=== 27 (0.2%) 27 (0.2%) Net::HTTP#proxy_uri 8862 (58.4%) 27 (0.2%) Net::HTTPResponse#read_body ```` Benchmark code: ```ruby require "fileutils" DIR = "/tmp/www" FileUtils.mkdir_p(DIR) HOST = "127.0.0.1" PORT = 8080 CONF = <<~EOS daemon off; worker_processes 2; events { worker_connections 128; } http { server_tokens off; charset utf-8; server { server_name localhost; listen #{HOST}:#{PORT}; keepalive_requests 10000000; keepalive_timeout 3600s; error_page 500 502 503 504 /50x.html; location / { root #{DIR}; } } } EOS File.write(File.join(DIR, "1k.txt"), 'a' * 1024) File.write(File.join(DIR, "10k.txt"), 'a' * 1024 * 10) File.write(File.join(DIR, "100k.txt"), 'a' * 1024 * 100) File.write(File.join(DIR, "1M.txt"), 'a' * 1024 * 1024) File.write(File.join(DIR, "nginx.conf"), CONF) require "benchmark/ips" require "net/http" nginx_pid = Process.spawn('nginx', '-c', File.join(DIR, "nginx.conf")) module Net class HTTPOpt < HTTP class BufferedIOOpt < ::Net::BufferedIO #:nodoc: internal use only def initialize(io, read_timeout: 60, write_timeout: 60, continue_timeout: nil, debug_output: nil) @io = io @read_timeout = read_timeout @write_timeout = write_timeout @continue_timeout = continue_timeout @debug_output = debug_output @rbuf = ''.b @rbuf_offset = 0 end attr_reader :io attr_accessor :read_timeout attr_accessor :write_timeout attr_accessor :continue_timeout attr_accessor :debug_output def inspect "#<#{self.class} io=#{@io}>" end def eof? @io.eof? end def closed? @io.closed? end def close @io.close end # # Read # public def read(len, dest = ''.b, ignore_eof = false) LOG "reading #{len} bytes..." read_bytes = 0 begin while read_bytes + rbuf_size < len if s = rbuf_consume_all_shareable! read_bytes += s.bytesize dest << s end rbuf_fill end s = rbuf_consume(len - read_bytes) read_bytes += s.bytesize dest << s rescue EOFError raise unless ignore_eof end LOG "read #{read_bytes} bytes" dest end def read_all(dest = ''.b) LOG 'reading all...' read_bytes = 0 begin while true if s = rbuf_consume_all_shareable! read_bytes += s.bytesize dest << s end rbuf_fill end rescue EOFError ; end LOG "read #{read_bytes} bytes" dest end def readuntil(terminator, ignore_eof = false) offset = @rbuf_offset begin until idx = @rbuf.index(terminator, offset) offset = @rbuf.bytesize rbuf_fill end return rbuf_consume(idx + terminator.bytesize - @rbuf_offset) rescue EOFError raise unless ignore_eof return rbuf_consume end end def readline readuntil("\n").chop end private BUFSIZE = 1024 * 16 def rbuf_fill tmp = @rbuf_empty ? @rbuf : nil case rv = @io.read_nonblock(BUFSIZE, tmp, exception: false) when String @rbuf_empty = false if rv.equal?(tmp) @rbuf_offset = 0 else @rbuf << rv rv.clear end return when :wait_readable (io = @io.to_io).wait_readable(@read_timeout) or raise Net::ReadTimeout.new(io) # continue looping when :wait_writable # OpenSSL::Buffering#read_nonblock may fail with IO::WaitWritable. # http://www.openssl.org/support/faq.html#PROG10 (io = @io.to_io).wait_writable(@read_timeout) or raise Net::ReadTimeout.new(io) # continue looping when nil raise EOFError, 'end of file reached' end while true end def rbuf_flush if @rbuf_empty @rbuf.clear @rbuf_offset = 0 end nil end def rbuf_size @rbuf.bytesize - @rbuf_offset end # Warning: this method may share the buffer to avoid # copying. The caller must no longer use the returned # string once rbuf_fill has been called again def rbuf_consume_all_shareable! @rbuf_empty = true buf = if @rbuf_offset == 0 @rbuf else @rbuf.byteslice(@rbuf_offset..-1) end @rbuf_offset = @rbuf.bytesize buf end def rbuf_consume(len = nil) if @rbuf_offset == 0 && (len.nil? || len == @rbuf.bytesize) s = @rbuf @rbuf = ''.b @rbuf_offset = 0 @rbuf_empty = true elsif len.nil? s = @rbuf.byteslice(@rbuf_offset..-1) @rbuf = ''.b @rbuf_offset = 0 @rbuf_empty = true else s = @rbuf.byteslice(@rbuf_offset, len) @rbuf_offset += len @rbuf_empty = @rbuf_offset == @rbuf.bytesize rbuf_flush end @debug_output << %Q[-> #{s.dump}\n] if @debug_output s end # # Write # public def write(*strs) writing { write0(*strs) } end alias << write def writeline(str) writing { write0 str + "\r\n" } end private def writing @written_bytes = 0 @debug_output << '<- ' if @debug_output yield @debug_output << "\n" if @debug_output bytes = @written_bytes @written_bytes = nil bytes end def write0(*strs) @debug_output << strs.map(&:dump).join if @debug_output orig_written_bytes = @written_bytes strs.each_with_index do |str, i| need_retry = true case len = @io.write_nonblock(str, exception: false) when Integer @written_bytes += len len -= str.bytesize if len == 0 if strs.size == i+1 return @written_bytes - orig_written_bytes else need_retry = false # next string end elsif len < 0 str = str.byteslice(len, -len) else # len > 0 need_retry = false # next string end # continue looping when :wait_writable (io = @io.to_io).wait_writable(@write_timeout) or raise Net::WriteTimeout.new(io) # continue looping end while need_retry end end # # Logging # private def LOG_off @save_debug_out = @debug_output @debug_output = nil end def LOG_on @debug_output = @save_debug_out end def LOG(msg) return unless @debug_output @debug_output << msg + "\n" end end BufferedIO = BufferedIOOpt # Unchanged from ruby 3.1.1, only allow to lookup the mofidied BufferedIO def connect if use_ssl? # reference early to load OpenSSL before connecting, # as OpenSSL may take time to load. @ssl_context = OpenSSL::SSL::SSLContext.new end if proxy? then conn_addr = proxy_address conn_port = proxy_port else conn_addr = conn_address conn_port = port end D "opening connection to #{conn_addr}:#{conn_port}..." begin s = Socket.tcp conn_addr, conn_port, @local_host, @local_port, connect_timeout: @open_timeout rescue => e e = Net::OpenTimeout.new(e) if e.is_a?(Errno::ETIMEDOUT) #for compatibility with previous versions raise e, "Failed to open TCP connection to " + "#{conn_addr}:#{conn_port} (#{e.message})" end s.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1) D "opened" if use_ssl? if proxy? plain_sock = BufferedIO.new(s, read_timeout: @read_timeout, write_timeout: @write_timeout, continue_timeout: @continue_timeout, debug_output: @debug_output) buf = "CONNECT #{conn_address}:#{@port} HTTP/#{HTTPVersion}\r\n" buf << "Host: #{@address}:#{@port}\r\n" if proxy_user credential = ["#{proxy_user}:#{proxy_pass}"].pack('m0') buf << "Proxy-Authorization: Basic #{credential}\r\n" end buf << "\r\n" plain_sock.write(buf) HTTPResponse.read_new(plain_sock).value # assuming nothing left in buffers after successful CONNECT response end ssl_parameters = Hash.new iv_list = instance_variables SSL_IVNAMES.each_with_index do |ivname, i| if iv_list.include?(ivname) value = instance_variable_get(ivname) unless value.nil? ssl_parameters[SSL_ATTRIBUTES[i]] = value end end end @ssl_context.set_params(ssl_parameters) @ssl_context.session_cache_mode = OpenSSL::SSL::SSLContext::SESSION_CACHE_CLIENT | OpenSSL::SSL::SSLContext::SESSION_CACHE_NO_INTERNAL_STORE @ssl_context.session_new_cb = proc {|sock, sess| @ssl_session = sess } D "starting SSL for #{conn_addr}:#{conn_port}..." s = OpenSSL::SSL::SSLSocket.new(s, @ssl_context) s.sync_close = true # Server Name Indication (SNI) RFC 3546 s.hostname = @address if s.respond_to? :hostname= if @ssl_session and Process.clock_gettime(Process::CLOCK_REALTIME) < @ssl_session.time.to_f + @ssl_session.timeout s.session = @ssl_session end ssl_socket_connect(s, @open_timeout) if (@ssl_context.verify_mode != OpenSSL::SSL::VERIFY_NONE) && @ssl_context.verify_hostname s.post_connection_check(@address) end D "SSL established, protocol: #{s.ssl_version}, cipher: #{s.cipher[0]}" end @socket = BufferedIO.new(s, read_timeout: @read_timeout, write_timeout: @write_timeout, continue_timeout: @continue_timeout, debug_output: @debug_output) @last_communicated = nil on_connect rescue => exception if s D "Conn close because of connect error #{exception}" s.close end raise end private :connect end end begin sleep 0.2 connection = Net::HTTP.start(HOST, PORT) connection.keep_alive_timeout = 3600 connection_opt = Net::HTTPOpt.start(HOST, PORT) connection_opt.keep_alive_timeout = 3600 unless connection.request_get("/100k.txt").body == connection_opt.request_get("/100k.txt").body abort("bug?") end if ARGV.first == "profile" require 'stackprof' require 'json' StackProf.run(mode: :wall, out: "/tmp/stackprof-net-http.dump", raw: true) do 40_000.times do connection.request_get("/1M.txt").body end end File.write("/tmp/stackprof-net-http.json", JSON.dump(Marshal.load(File.binread("/tmp/stackprof-net-http.dump")))) system("stackprof", "/tmp/stackprof-net-http.rb") StackProf.run(mode: :wall, out: "/tmp/stackprof-net-http-opt.dump", raw: true) do 40_000.times do connection_opt.request_get("/1M.txt").body end end File.write("/tmp/stackprof-net-http-opt.json", JSON.dump(Marshal.load(File.binread("/tmp/stackprof-net-http-opt.dump")))) system("stackprof", "/tmp/stackprof-net-http-opt.dump") else %w(1k 10k 100k 1M).each do |size| puts "=== #{size} ===" Benchmark.ips do |x| path = "/#{size}.txt" x.report("#{size}") { connection.request_get(path).body } x.report("#{size} opt") { connection_opt.request_get(path).body } x.compare!(order: :baseline) end puts end end ensure Process.kill('TERM', nginx_pid) Process.wait(nginx_pid) end ``` https://github.com/ruby/net-protocol/commit/781e400389 --- lib/net/protocol.rb | 82 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/lib/net/protocol.rb b/lib/net/protocol.rb index 822bc00574500a..c676854b672112 100644 --- a/lib/net/protocol.rb +++ b/lib/net/protocol.rb @@ -120,6 +120,7 @@ def initialize(io, read_timeout: 60, write_timeout: 60, continue_timeout: nil, d @continue_timeout = continue_timeout @debug_output = debug_output @rbuf = ''.b + @rbuf_offset = 0 end attr_reader :io @@ -154,14 +155,15 @@ def read(len, dest = ''.b, ignore_eof = false) LOG "reading #{len} bytes..." read_bytes = 0 begin - while read_bytes + @rbuf.size < len - s = rbuf_consume(@rbuf.size) - read_bytes += s.size - dest << s + while read_bytes + rbuf_size < len + if s = rbuf_consume_all_shareable! + read_bytes += s.bytesize + dest << s + end rbuf_fill end s = rbuf_consume(len - read_bytes) - read_bytes += s.size + read_bytes += s.bytesize dest << s rescue EOFError raise unless ignore_eof @@ -175,9 +177,10 @@ def read_all(dest = ''.b) read_bytes = 0 begin while true - s = rbuf_consume(@rbuf.size) - read_bytes += s.size - dest << s + if s = rbuf_consume_all_shareable! + read_bytes += s.bytesize + dest << s + end rbuf_fill end rescue EOFError @@ -188,14 +191,16 @@ def read_all(dest = ''.b) end def readuntil(terminator, ignore_eof = false) + offset = @rbuf_offset begin - until idx = @rbuf.index(terminator) + until idx = @rbuf.index(terminator, offset) + offset = @rbuf.bytesize rbuf_fill end - return rbuf_consume(idx + terminator.size) + return rbuf_consume(idx + terminator.bytesize - @rbuf_offset) rescue EOFError raise unless ignore_eof - return rbuf_consume(@rbuf.size) + return rbuf_consume end end @@ -208,12 +213,16 @@ def readline BUFSIZE = 1024 * 16 def rbuf_fill - tmp = @rbuf.empty? ? @rbuf : nil + tmp = @rbuf_empty ? @rbuf : nil case rv = @io.read_nonblock(BUFSIZE, tmp, exception: false) when String - return if rv.equal?(tmp) - @rbuf << rv - rv.clear + @rbuf_empty = false + if rv.equal?(tmp) + @rbuf_offset = 0 + else + @rbuf << rv + rv.clear + end return when :wait_readable (io = @io.to_io).wait_readable(@read_timeout) or raise Net::ReadTimeout.new(io) @@ -228,13 +237,50 @@ def rbuf_fill end while true end - def rbuf_consume(len) - if len == @rbuf.size + def rbuf_flush + if @rbuf_empty + @rbuf.clear + @rbuf_offset = 0 + end + nil + end + + def rbuf_size + @rbuf.bytesize - @rbuf_offset + end + + # Warning: this method may share the buffer to avoid + # copying. The caller must no longer use the returned + # string once rbuf_fill has been called again + def rbuf_consume_all_shareable! + @rbuf_empty = true + buf = if @rbuf_offset == 0 + @rbuf + else + @rbuf.byteslice(@rbuf_offset..-1) + end + @rbuf_offset = @rbuf.bytesize + buf + end + + def rbuf_consume(len = nil) + if @rbuf_offset == 0 && (len.nil? || len == @rbuf.bytesize) s = @rbuf @rbuf = ''.b + @rbuf_offset = 0 + @rbuf_empty = true + elsif len.nil? + s = @rbuf.byteslice(@rbuf_offset..-1) + @rbuf = ''.b + @rbuf_offset = 0 + @rbuf_empty = true else - s = @rbuf.slice!(0, len) + s = @rbuf.byteslice(@rbuf_offset, len) + @rbuf_offset += len + @rbuf_empty = @rbuf_offset == @rbuf.bytesize + rbuf_flush end + @debug_output << %Q[-> #{s.dump}\n] if @debug_output s end From 08c3d08a18b29c46156ca3370c682c03696448a7 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 31 Aug 2022 15:23:50 +0900 Subject: [PATCH 528/546] `ruby_init_setproctitle` declaration has moved to internal/missing.h --- rubystub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/rubystub.c b/rubystub.c index e7f46e78a561fb..75aeca18699ed3 100644 --- a/rubystub.c +++ b/rubystub.c @@ -1,4 +1,5 @@ #include "internal.h" +#include "internal/missing.h" #if defined HAVE_DLADDR #include #endif From dc66f7246abd06e2e5d6e9b8c02403b05885283e Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 31 Aug 2022 15:24:37 +0900 Subject: [PATCH 529/546] Scripts under libexec may not be considered to be placed in bin --- defs/gmake.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/defs/gmake.mk b/defs/gmake.mk index af4d27a5a2c42e..c95e309b5ee0ed 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -137,7 +137,7 @@ config.status: $(wildcard config.cache) STUBPROGRAM = rubystub$(EXEEXT) IGNOREDPATTERNS = %~ .% %.orig %.rej \#%\# SCRIPTBINDIR := $(if $(EXEEXT),,exec/) -SCRIPTPROGRAMS = $(addprefix $(SCRIPTBINDIR),$(addsuffix $(EXEEXT),$(filter-out $(IGNOREDPATTERNS),$(notdir $(wildcard $(srcdir)/libexec/*))))) +SCRIPTPROGRAMS = $(addprefix $(SCRIPTBINDIR),$(addsuffix $(EXEEXT),$(filter-out $(IGNOREDPATTERNS),$(notdir $(wildcard $(srcdir)/bin/*))))) stub: $(STUBPROGRAM) scriptbin: $(SCRIPTPROGRAMS) From 22542a6fb0c8a9f3ac6128938580f7625d30f644 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 31 Aug 2022 15:28:31 +0900 Subject: [PATCH 530/546] SCRIPTBINDIR is ok if it just exists [ci skip] --- defs/gmake.mk | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/defs/gmake.mk b/defs/gmake.mk index c95e309b5ee0ed..944b9b41ed9912 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -163,9 +163,8 @@ $(SCRIPTBINDIR)%$(EXEEXT): bin/% $(STUBPROGRAM) \ $(Q) chmod +x $@ $(Q) $(POSTLINK) -$(TIMESTAMPDIR)/.exec.time: - $(Q) mkdir exec - $(Q) exit > $@ +$(SCRIPTBINDIR): + $(Q) mkdir $@ .PHONY: commit commit: $(if $(filter commit,$(MAKECMDGOALS)),$(filter-out commit,$(MAKECMDGOALS))) up From 9dc60653db186b1ae9400ed75b413a07728ce6ff Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 30 Aug 2022 14:16:02 +0900 Subject: [PATCH 531/546] Extract `update_coderange` macro Which restarts scanning the code range in unscanned part. --- sprintf.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/sprintf.c b/sprintf.c index 32a72439af0b6a..b2bdd4a0726acc 100644 --- a/sprintf.c +++ b/sprintf.c @@ -221,7 +221,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) VALUE result; long scanned = 0; - int coderange = ENC_CODERANGE_7BIT; + enum ruby_coderange_type coderange = ENC_CODERANGE_7BIT; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; @@ -246,6 +246,16 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) rb_raise(rb_eArgError, "flag after precision"); \ } +#define update_coderange(partial) do { \ + if (coderange != ENC_CODERANGE_BROKEN && scanned < blen \ + && rb_enc_to_index(enc) /* != ENCINDEX_ASCII_8BIT */) { \ + int cr = coderange; \ + scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); \ + ENC_CODERANGE_SET(result, \ + (partial && cr == ENC_CODERANGE_UNKNOWN ? \ + ENC_CODERANGE_BROKEN : (coderange = cr))); \ + } \ + } while (0) ++argc; --argv; StringValue(fmt); @@ -273,10 +283,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) rb_raise(rb_eArgError, "incomplete format specifier; use %%%% (double %%) instead"); } PUSH(p, t - p); - if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { - scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); - ENC_CODERANGE_SET(result, coderange); - } + update_coderange(FALSE); if (t >= end) { /* end of fmt string */ goto sprint_exit; @@ -492,13 +499,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) format_s1: len = RSTRING_LEN(str); rb_str_set_len(result, blen); - if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { - int cr = coderange; - scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); - ENC_CODERANGE_SET(result, - (cr == ENC_CODERANGE_UNKNOWN ? - ENC_CODERANGE_BROKEN : (coderange = cr))); - } + update_coderange(TRUE); enc = rb_enc_check(result, str); if (flags&(FPREC|FWIDTH)) { slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); @@ -930,10 +931,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) flags = FNONE; } - if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { - scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); - ENC_CODERANGE_SET(result, coderange); - } + update_coderange(FALSE); sprint_exit: rb_str_tmp_frozen_release(orig, fmt); /* XXX - We cannot validate the number of arguments if (digit)$ style used. From 576bdec03f0d58847690a0607c788ada433ce60f Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 30 Aug 2022 18:12:08 +0900 Subject: [PATCH 532/546] [Bug #18973] Promote US-ASCII to ASCII-8BIT when adding 8-bit char --- internal/string.h | 1 + sprintf.c | 13 +++++++++---- string.c | 32 ++++++++++++++++++++++++-------- test/ruby/test_sprintf.rb | 3 +++ 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/internal/string.h b/internal/string.h index 8fb9553d033954..46862d77f5cbc2 100644 --- a/internal/string.h +++ b/internal/string.h @@ -43,6 +43,7 @@ char *rb_str_to_cstr(VALUE str); const char *ruby_escaped_char(int c); void rb_str_make_independent(VALUE str); int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc); +int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code); static inline bool STR_EMBED_P(VALUE str); static inline bool STR_SHARED_P(VALUE str); diff --git a/sprintf.c b/sprintf.c index b2bdd4a0726acc..bfe25e1d3c8b05 100644 --- a/sprintf.c +++ b/sprintf.c @@ -454,13 +454,18 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) str = tmp; goto format_s1; } - else { - n = NUM2INT(val); - if (n >= 0) n = rb_enc_codelen((c = n), enc); - } + n = NUM2INT(val); + if (n >= 0) n = rb_enc_codelen((c = n), enc); if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } + int encidx = rb_ascii8bit_appendable_encoding_index(enc, c); + if (encidx >= 0 && encidx != rb_enc_to_index(enc)) { + /* special case */ + rb_enc_associate_index(result, encidx); + enc = rb_enc_from_index(encidx); + coderange = ENC_CODERANGE_VALID; + } if (!(flags & FWIDTH)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); diff --git a/string.c b/string.c index 564812ae51fdb6..951aeca6dd90ca 100644 --- a/string.c +++ b/string.c @@ -3481,17 +3481,13 @@ rb_str_concat(VALUE str1, VALUE str2) return rb_str_append(str1, str2); } - encidx = rb_enc_to_index(enc); - if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) { - /* US-ASCII automatically extended to ASCII-8BIT */ + encidx = rb_ascii8bit_appendable_encoding_index(enc, code); + if (encidx >= 0) { char buf[1]; buf[0] = (char)code; - if (code > 0xFF) { - rb_raise(rb_eRangeError, "%u out of char range", code); - } rb_str_cat(str1, buf, 1); - if (encidx == ENCINDEX_US_ASCII && code > 127) { - rb_enc_associate_index(str1, ENCINDEX_ASCII_8BIT); + if (encidx != rb_enc_to_index(enc)) { + rb_enc_associate_index(str1, encidx); ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID); } } @@ -3524,6 +3520,26 @@ rb_str_concat(VALUE str1, VALUE str2) return str1; } +int +rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code) +{ + int encidx = rb_enc_to_index(enc); + + if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) { + /* US-ASCII automatically extended to ASCII-8BIT */ + if (code > 0xFF) { + rb_raise(rb_eRangeError, "%u out of char range", code); + } + if (encidx == ENCINDEX_US_ASCII && code > 127) { + return ENCINDEX_ASCII_8BIT; + } + return encidx; + } + else { + return -1; + } +} + /* * call-seq: * prepend(*other_strings) -> string diff --git a/test/ruby/test_sprintf.rb b/test/ruby/test_sprintf.rb index 803399fdb3c940..c453ecd350b9b4 100644 --- a/test/ruby/test_sprintf.rb +++ b/test/ruby/test_sprintf.rb @@ -369,6 +369,9 @@ def test_char assert_equal(" " * BSIZ + "a", sprintf("%#{ BSIZ + 1 }c", ?a)) assert_equal("a" + " " * BSIZ, sprintf("%-#{ BSIZ + 1 }c", ?a)) assert_raise(ArgumentError) { sprintf("%c", -1) } + s = sprintf("%c".encode(Encoding::US_ASCII), 0x80) + assert_equal("\x80".b, s) + assert_predicate(s, :valid_encoding?) end def test_string From 2b967cd4b73425681877c247e5731353e171a3ef Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 30 Aug 2017 22:49:53 +0900 Subject: [PATCH 533/546] Let fake.rb allow newlines in expanded strings --- template/fake.rb.in | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/template/fake.rb.in b/template/fake.rb.in index aead377ed87387..9177fb10c5168a 100644 --- a/template/fake.rb.in +++ b/template/fake.rb.in @@ -10,7 +10,9 @@ end if inc = arg['i'] src = inc == '-' ? STDIN.read : File.read(inc) def src.value(name) - eval(self[/\bruby_#{name}(?:\[\])?\s*=\s*((?:"(?:\\.|[^\"\\])*"\s*)*(?=;)|[^{};]+)/m, 1].gsub(/#/, '\\#')) + eval(self[/\bruby_#{name}(?:\[\])?\s*=\s*((?:"(?:\\.|[^\"\\])*"\s*)*(?=;)|[^{};]+)/m, 1]. + gsub(/#/, '\\#'). + gsub(/((?:\G|[^\\])(?:\\\\)*)\n/, '\1')) end arg['versions'] = version = {} File.read(File.join(arg['srcdir'], 'version.c')). From 32a059151507876de804adbfbf4926937333e091 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 30 Aug 2017 23:23:38 +0900 Subject: [PATCH 534/546] Move macros from version.h to version.c Moved the contents of `ruby_description` and `ruby_copyright` which are never used in the other places. --- version.c | 27 ++++++++++++++++++++++++++- version.h | 30 ------------------------------ 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/version.c b/version.c index a628952907d93f..5ccc4cc508c047 100644 --- a/version.c +++ b/version.c @@ -20,6 +20,29 @@ #define EXIT_SUCCESS 0 #endif +#ifdef RUBY_REVISION +# if RUBY_PATCHLEVEL == -1 +# ifndef RUBY_BRANCH_NAME +# define RUBY_BRANCH_NAME "master" +# endif +# define RUBY_REVISION_STR " "RUBY_BRANCH_NAME" "RUBY_REVISION +# else +# define RUBY_REVISION_STR " revision "RUBY_REVISION +# endif +#else +# define RUBY_REVISION "HEAD" +# define RUBY_REVISION_STR "" +#endif +#if !defined RUBY_RELEASE_DATETIME || RUBY_PATCHLEVEL != -1 +# undef RUBY_RELEASE_DATETIME +# define RUBY_RELEASE_DATETIME RUBY_RELEASE_DATE +#endif + +# define RUBY_DESCRIPTION_WITH(opt) \ + "ruby " RUBY_VERSION RUBY_PATCHLEVEL_STR " " \ + "(" RUBY_RELEASE_DATETIME RUBY_REVISION_STR ")" opt " " \ + "[" RUBY_PLATFORM "]" + #define PRINT(type) puts(ruby_##type) #define MKSTR(type) rb_obj_freeze(rb_usascii_str_new_static(ruby_##type, sizeof(ruby_##type)-1)) #define MKINT(name) INT2FIX(ruby_##name) @@ -44,7 +67,9 @@ const int ruby_patchlevel = RUBY_PATCHLEVEL; const char ruby_description[] = RUBY_DESCRIPTION_WITH(""); static const char ruby_description_with_mjit[] = RUBY_DESCRIPTION_WITH(" +MJIT"); static const char ruby_description_with_yjit[] = RUBY_DESCRIPTION_WITH(" +YJIT"); -const char ruby_copyright[] = RUBY_COPYRIGHT; +const char ruby_copyright[] = "ruby - Copyright (C) " + RUBY_BIRTH_YEAR_STR "-" RUBY_RELEASE_YEAR_STR " " + RUBY_AUTHOR; const char ruby_engine[] = "ruby"; // Might change after initialization diff --git a/version.h b/version.h index b531db40965cf4..abc553b3a70fb9 100644 --- a/version.h +++ b/version.h @@ -65,34 +65,4 @@ # include "revision.h" #endif -#ifdef RUBY_REVISION -# if RUBY_PATCHLEVEL == -1 -# ifndef RUBY_BRANCH_NAME -# define RUBY_BRANCH_NAME "master" -# endif -# define RUBY_REVISION_STR " "RUBY_BRANCH_NAME" "RUBY_REVISION -# else -# define RUBY_REVISION_STR " revision "RUBY_REVISION -# endif -#else -# define RUBY_REVISION "HEAD" -# define RUBY_REVISION_STR "" -#endif -#if !defined RUBY_RELEASE_DATETIME || RUBY_PATCHLEVEL != -1 -# undef RUBY_RELEASE_DATETIME -# define RUBY_RELEASE_DATETIME RUBY_RELEASE_DATE -#endif - -# define RUBY_DESCRIPTION_WITH(opt) \ - "ruby "RUBY_VERSION \ - RUBY_PATCHLEVEL_STR \ - " ("RUBY_RELEASE_DATETIME \ - RUBY_REVISION_STR")"opt" " \ - "["RUBY_PLATFORM"]" -# define RUBY_COPYRIGHT \ - "ruby - Copyright (C) " \ - RUBY_BIRTH_YEAR_STR"-" \ - RUBY_RELEASE_YEAR_STR" " \ - RUBY_AUTHOR - #endif /* RUBY_TOPLEVEL_VERSION_H */ From be55b77cc75fe36b484a3feb6ad4178630d73242 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 31 Aug 2022 15:44:26 -0400 Subject: [PATCH 535/546] Better b.cond usage on AArch64 (#6305) * Better b.cond usage on AArch64 When we're lowering a conditional jump, we previously had a bit of a complicated setup where we could emit a conditional jump to skip over a jump that was the next instruction, and then write out the destination and use a branch register. Now instead we use the b.cond instruction if our offset fits (not common, but not unused either) and if it doesn't we write out an inverse condition to jump past loading the destination and branching directly. * Added an inverse fn for Condition (#443) Prevents the need to pass two params and potentially reduces errors. Co-authored-by: Jimmy Miller Co-authored-by: Maxime Chevalier-Boisvert Co-authored-by: Jimmy Miller --- yjit/src/asm/arm64/arg/condition.rs | 32 ++++++- yjit/src/asm/arm64/inst/branch_cond.rs | 12 +-- yjit/src/asm/arm64/mod.rs | 9 +- yjit/src/backend/arm64/mod.rs | 122 ++++++++++++------------- 4 files changed, 100 insertions(+), 75 deletions(-) diff --git a/yjit/src/asm/arm64/arg/condition.rs b/yjit/src/asm/arm64/arg/condition.rs index e791e4b0783c44..bb9ce570c30695 100644 --- a/yjit/src/asm/arm64/arg/condition.rs +++ b/yjit/src/asm/arm64/arg/condition.rs @@ -19,4 +19,34 @@ impl Condition { pub const GT: u8 = 0b1100; // greater than (signed) pub const LE: u8 = 0b1101; // less than or equal to (signed) pub const AL: u8 = 0b1110; // always -} + + pub const fn inverse(condition: u8) -> u8 { + match condition { + Condition::EQ => Condition::NE, + Condition::NE => Condition::EQ, + + Condition::CS => Condition::CC, + Condition::CC => Condition::CS, + + Condition::MI => Condition::PL, + Condition::PL => Condition::MI, + + Condition::VS => Condition::VC, + Condition::VC => Condition::VS, + + Condition::HI => Condition::LS, + Condition::LS => Condition::HI, + + Condition::LT => Condition::GE, + Condition::GE => Condition::LT, + + Condition::GT => Condition::LE, + Condition::LE => Condition::GT, + + Condition::AL => Condition::AL, + + _ => panic!("Unknown condition") + + } + } +} \ No newline at end of file diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs index a6bc79dffede0a..c489bacef05ac0 100644 --- a/yjit/src/asm/arm64/inst/branch_cond.rs +++ b/yjit/src/asm/arm64/inst/branch_cond.rs @@ -20,8 +20,8 @@ pub struct BranchCond { impl BranchCond { /// B.cond /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally- - pub fn bcond(cond: u8, byte_offset: i32) -> Self { - Self { cond, imm19: byte_offset >> 2 } + pub fn bcond(cond: u8, imm19: i32) -> Self { + Self { cond, imm19 } } } @@ -53,25 +53,25 @@ mod tests { #[test] fn test_b_eq() { - let result: u32 = BranchCond::bcond(Condition::EQ, 128).into(); + let result: u32 = BranchCond::bcond(Condition::EQ, 32).into(); assert_eq!(0x54000400, result); } #[test] fn test_b_vs() { - let result: u32 = BranchCond::bcond(Condition::VS, 128).into(); + let result: u32 = BranchCond::bcond(Condition::VS, 32).into(); assert_eq!(0x54000406, result); } #[test] fn test_b_eq_max() { - let result: u32 = BranchCond::bcond(Condition::EQ, (1 << 20) - 4).into(); + let result: u32 = BranchCond::bcond(Condition::EQ, (1 << 18) - 1).into(); assert_eq!(0x547fffe0, result); } #[test] fn test_b_eq_min() { - let result: u32 = BranchCond::bcond(Condition::EQ, -(1 << 20)).into(); + let result: u32 = BranchCond::bcond(Condition::EQ, -(1 << 18)).into(); assert_eq!(0x54800000, result); } } diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index a6aa8ffcbb46a4..b73b3125e27646 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -203,9 +203,10 @@ pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) { cb.write_bytes(&bytes); } -/// Whether or not the offset between two instructions fits into the b.cond -/// instruction. If it doesn't, then we have to load the value into a register -/// first, then use the b.cond instruction to skip past a direct jump. +/// Whether or not the offset in number of instructions between two instructions +/// fits into the b.cond instruction. If it doesn't, then we have to load the +/// value into a register first, then use the b.cond instruction to skip past a +/// direct jump. pub const fn bcond_offset_fits_bits(offset: i64) -> bool { imm_fits_bits(offset, 21) && (offset & 0b11 == 0) } @@ -216,7 +217,7 @@ pub fn bcond(cb: &mut CodeBlock, cond: u8, byte_offset: A64Opnd) { A64Opnd::Imm(imm) => { assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary."); - BranchCond::bcond(cond, imm as i32).into() + BranchCond::bcond(cond, (imm / 4) as i32).into() }, _ => panic!("Invalid operand combination to bcond instruction."), }; diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 69524be6113280..4f07bf80626d18 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -565,64 +565,42 @@ impl Assembler fn emit_conditional_jump(cb: &mut CodeBlock, target: Target) { match target { Target::CodePtr(dst_ptr) => { - let dst_addr = dst_ptr.into_u64(); - //let src_addr = cb.get_write_ptr().into_i64() + 4; - //let offset = dst_addr - src_addr; - - // If the condition is met, then we'll skip past the - // next instruction, put the address in a register, and - // jump to it. - bcond(cb, CONDITION, A64Opnd::new_imm(8)); - - // If we get to this instruction, then the condition - // wasn't met, in which case we'll jump past the - // next instruction that perform the direct jump. - - b(cb, A64Opnd::new_imm(2i64 + emit_load_size(dst_addr) as i64)); - let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr); - br(cb, Assembler::SCRATCH0); - for _ in num_insns..4 { - nop(cb); - } + let dst_addr = dst_ptr.into_i64(); + let src_addr = cb.get_write_ptr().into_i64(); + let offset = dst_addr - src_addr; - /* - // If the jump offset fits into the conditional jump as an - // immediate value and it's properly aligned, then we can - // use the b.cond instruction directly. Otherwise, we need - // to load the address into a register and use the branch - // register instruction. - if bcond_offset_fits_bits(offset) { - bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - src_addr)); + let num_insns = if bcond_offset_fits_bits(offset) { + // If the jump offset fits into the conditional jump as + // an immediate value and it's properly aligned, then we + // can use the b.cond instruction directly. + bcond(cb, CONDITION, A64Opnd::new_imm(offset)); + + // Here we're going to return 1 because we've only + // written out 1 instruction. + 1 } else { - // If the condition is met, then we'll skip past the - // next instruction, put the address in a register, and - // jump to it. - bcond(cb, CONDITION, A64Opnd::new_imm(8)); - - // If the offset fits into a direct jump, then we'll use - // that and the number of instructions will be shorter. - // Otherwise we'll use the branch register instruction. - if b_offset_fits_bits(offset) { - // If we get to this instruction, then the condition - // wasn't met, in which case we'll jump past the - // next instruction that performs the direct jump. - b(cb, A64Opnd::new_imm(1)); - - // Here we'll perform the direct jump to the target. - let offset = dst_addr - cb.get_write_ptr().into_i64() + 4; - b(cb, A64Opnd::new_imm(offset / 4)); - } else { - // If we get to this instruction, then the condition - // wasn't met, in which case we'll jump past the - // next instruction that perform the direct jump. - let value = dst_addr as u64; - - b(cb, A64Opnd::new_imm(emit_load_size(value).into())); - emit_load_value(cb, Assembler::SCRATCH0, value); - br(cb, Assembler::SCRATCH0); - } - } - */ + // Otherwise, we need to load the address into a + // register and use the branch register instruction. + let dst_addr = dst_ptr.into_u64(); + let load_insns: i64 = emit_load_size(dst_addr).into(); + + // We're going to write out the inverse condition so + // that if it doesn't match it will skip over the + // instructions used for branching. + bcond(cb, Condition::inverse(CONDITION), A64Opnd::new_imm((load_insns + 2) * 4)); + emit_load_value(cb, Assembler::SCRATCH0, dst_addr); + br(cb, Assembler::SCRATCH0); + + // Here we'll return the number of instructions that it + // took to write out the destination address + 1 for the + // b.cond and 1 for the br. + load_insns + 2 + }; + + // We need to make sure we have at least 6 instructions for + // every kind of jump for invalidation purposes, so we're + // going to write out padding nop instructions here. + for _ in num_insns..6 { nop(cb); } }, Target::Label(label_idx) => { // Here we're going to save enough space for ourselves and @@ -904,10 +882,10 @@ impl Assembler _ => unreachable!() }; }, - Insn::Je(target) => { + Insn::Je(target) | Insn::Jz(target) => { emit_conditional_jump::<{Condition::EQ}>(cb, *target); }, - Insn::Jne(target) => { + Insn::Jne(target) | Insn::Jnz(target) => { emit_conditional_jump::<{Condition::NE}>(cb, *target); }, Insn::Jl(target) => { @@ -916,12 +894,6 @@ impl Assembler Insn::Jbe(target) => { emit_conditional_jump::<{Condition::LS}>(cb, *target); }, - Insn::Jz(target) => { - emit_conditional_jump::<{Condition::EQ}>(cb, *target); - }, - Insn::Jnz(target) => { - emit_conditional_jump::<{Condition::NE}>(cb, *target); - }, Insn::Jo(target) => { emit_conditional_jump::<{Condition::VS}>(cb, *target); }, @@ -1053,6 +1025,28 @@ mod tests { asm.compile_with_num_regs(&mut cb, 0); } + #[test] + fn test_emit_je_fits_into_bcond() { + let (mut asm, mut cb) = setup_asm(); + + let offset = 80; + let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into(); + + asm.je(Target::CodePtr(target)); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_je_does_not_fit_into_bcond() { + let (mut asm, mut cb) = setup_asm(); + + let offset = 1 << 21; + let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into(); + + asm.je(Target::CodePtr(target)); + asm.compile_with_num_regs(&mut cb, 0); + } + #[test] fn test_emit_lea_label() { let (mut asm, mut cb) = setup_asm(); From 036bb55980e70e41d9017b177de77c26cf57a3f3 Mon Sep 17 00:00:00 2001 From: git Date: Thu, 1 Sep 2022 04:44:44 +0900 Subject: [PATCH 536/546] * 2022-09-01 [ci skip] --- version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version.h b/version.h index abc553b3a70fb9..687deae2cf030d 100644 --- a/version.h +++ b/version.h @@ -14,8 +14,8 @@ #define RUBY_PATCHLEVEL -1 #define RUBY_RELEASE_YEAR 2022 -#define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 31 +#define RUBY_RELEASE_MONTH 9 +#define RUBY_RELEASE_DAY 1 #include "ruby/version.h" #include "ruby/internal/abi.h" From d41be1ac37a91f901a0030bfd10979240621db4e Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Wed, 31 Aug 2022 16:36:22 -0500 Subject: [PATCH 537/546] [DOC] Enhanced RDoc for Time (#6308) More on timezones. --- doc/timezone_specifiers.rdoc | 46 --------------- doc/timezones.rdoc | 108 +++++++++++++++++++++++++++++++++++ time.c | 4 +- timev.rb | 40 ++----------- 4 files changed, 115 insertions(+), 83 deletions(-) delete mode 100644 doc/timezone_specifiers.rdoc create mode 100644 doc/timezones.rdoc diff --git a/doc/timezone_specifiers.rdoc b/doc/timezone_specifiers.rdoc deleted file mode 100644 index a6d57a1b21ea22..00000000000000 --- a/doc/timezone_specifiers.rdoc +++ /dev/null @@ -1,46 +0,0 @@ -=== Timezone Specifiers - -Certain methods in class Time accept arguments that specify timezones: - -- Time.at: keyword argument +in:+. -- Time.new: positional argument +zone+ or keyword argument +in:+. -- Time.now: keyword argument +in:+. -- Time#getlocal: positional argument +zone+. -- Time#localtime: positional argument +zone+. - -The value given with any of these must be one of the following: - -- A string offset from UTC in the form '+HH:MM' or -HH:MM, - where: - - - +HH+ is the 2-digit hour in the range 0..23. - - +MM+ is the 2-digit minute in the range 0..59. - - Examples: - - t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC - Time.at(t, in: '-23:59') # => 1999-12-31 20:16:01 -2359 - Time.at(t, in: '+23:59') # => 2000-01-02 20:14:01 +2359 - -- A letter in the range 'A'..'I' or 'K'..'Z'; - see {List of military time zones}[https://en.wikipedia.org/wiki/List_of_military_time_zones]: - - t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC - Time.at(t, in: 'A') # => 2000-01-01 21:15:01 +0100 - Time.at(t, in: 'I') # => 2000-01-02 05:15:01 +0900 - Time.at(t, in: 'K') # => 2000-01-02 06:15:01 +1000 - Time.at(t, in: 'Y') # => 2000-01-01 08:15:01 -1200 - Time.at(t, in: 'Z') # => 2000-01-01 20:15:01 UTC - -- An integer number of seconds in the range -86399..86399: - - t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC - Time.at(t, in: -86399) # => 1999-12-31 20:15:02 -235959 - Time.at(t, in: 86399) # => 2000-01-02 20:15:00 +235959 - --- -TODO: Pull in and revise the text at the link, -then add the example class TZ from the tests. -++ -- A timezone object; - see {Timezone Argument}[rdoc-ref:Time@Timezone+Argument] for details. diff --git a/doc/timezones.rdoc b/doc/timezones.rdoc new file mode 100644 index 00000000000000..c3aae88fdec809 --- /dev/null +++ b/doc/timezones.rdoc @@ -0,0 +1,108 @@ +== Timezones + +=== Timezone Specifiers + +Certain \Time methods accept arguments that specify timezones: + +- Time.at: keyword argument +in:+. +- Time.new: positional argument +zone+ or keyword argument +in:+. +- Time.now: keyword argument +in:+. +- Time#getlocal: positional argument +zone+. +- Time#localtime: positional argument +zone+. + +The value given with any of these must be one of the following +(each detailed below): + +- {Hours/minutes offset}[rdoc-ref:timezones.rdoc@Hours-2FMinutes+Offsets]. +- {Single-letter offset}[rdoc-ref:timezones.rdoc@Single-Letter+Offsets]. +- {Integer offset}[rdoc-ref:timezones.rdoc@Integer+Offsets]. +- {Timezone object}[rdoc-ref:timezones.rdoc@Timezone+Objects]. + +==== Hours/Minutes Offsets + +The zone value may be a string offset from UTC +in the form '+HH:MM' or '-HH:MM', +where: + +- +HH+ is the 2-digit hour in the range 0..23. +- +MM+ is the 2-digit minute in the range 0..59. + +Examples: + + t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + Time.at(t, in: '-23:59') # => 1999-12-31 20:16:01 -2359 + Time.at(t, in: '+23:59') # => 2000-01-02 20:14:01 +2359 + +==== Single-Letter Offsets + +The zone value may be a letter in the range 'A'..'I' +or 'K'..'Z'; +see {List of military time zones}[https://en.wikipedia.org/wiki/List_of_military_time_zones]: + + t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + Time.at(t, in: 'A') # => 2000-01-01 21:15:01 +0100 + Time.at(t, in: 'I') # => 2000-01-02 05:15:01 +0900 + Time.at(t, in: 'K') # => 2000-01-02 06:15:01 +1000 + Time.at(t, in: 'Y') # => 2000-01-01 08:15:01 -1200 + Time.at(t, in: 'Z') # => 2000-01-01 20:15:01 UTC + +==== \Integer Offsets + +The zone value may be an integer number of seconds +in the range -86399..86399: + + t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + Time.at(t, in: -86399) # => 1999-12-31 20:15:02 -235959 + Time.at(t, in: 86399) # => 2000-01-02 20:15:00 +235959 + +==== Timezone Objects + +In most cases, the zone value may be an object +responding to certain timezone methods. + +\Exceptions (timezone object not allowed): + +- Time.new with positional argument +zone+. +- Time.now with keyword argument +in:+. + +The timezone methods are: + +- +local_to_utc+: + + - Called when Time.new is invoked with +tz+ + as the value of positional argument +zone+ or keyword argument +in:+. + - Argument: a Time::tm object. + - Returns: a \Time-like object in the UTC timezone. + +- +utc_to_local+: + + - Called when Time.at or Time.now is invoked with +tz+ + as the value for keyword argument +in:+, + and when Time#getlocal or Time#localtime is called with +tz+ + as the value for positional argument +zone+. + - Argument: a Time::tm object. + - Returns: a \Time-like object in the local timezone. + +A custom timezone class may have these instance methods, +which will be called if defined: + +- +abbr+: + + - Called when Time#strftime is invoked with a format involving %Z. + - Argument: a Time::tm object. + - Returns: a string abbreviation for the timezone name. + +- +dst?+: + + - Called when Time.at or Time.now is invoked with +tz+ + as the value for keyword argument +in:+, + and when Time#getlocal or Time#localtime is called with +tz+ + as the value for positional argument +zone+. + - Argument: a Time::tm object. + - Returns: whether the time is daylight saving time. + +- +name+: + + - Called when Marshal.dump(t) is invoked + - Argument: none. + - Returns: the string name of the timezone. diff --git a/time.c b/time.c index 56ff35b34e2dbc..2b4323a5363c79 100644 --- a/time.c +++ b/time.c @@ -3852,7 +3852,7 @@ time_zonelocal(VALUE time, VALUE off) * t.localtime("-09:00") # => 2000-01-01 11:15:01 -0900 * * For forms of argument +zone+, see - * {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. + * {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. * */ @@ -3954,7 +3954,7 @@ time_fixoff(VALUE time) * t.getlocal('+12:00') # => 2000-01-01 12:00:00 +1200 * * For forms of argument +zone+, see - * {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. + * {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. * */ diff --git a/timev.rb b/timev.rb index 891142e0efc8f3..a2a8bb3cb1577d 100644 --- a/timev.rb +++ b/timev.rb @@ -208,38 +208,8 @@ # - #ceil: Returns a new time with subseconds raised to a ceiling. # - #floor: Returns a new time with subseconds lowered to a floor. # -# == Timezone Argument -# -# A timezone argument must have +local_to_utc+ and +utc_to_local+ -# methods, and may have +name+, +abbr+, and +dst?+ methods. -# -# The +local_to_utc+ method should convert a Time-like object from -# the timezone to UTC, and +utc_to_local+ is the opposite. The -# result also should be a Time or Time-like object (not necessary to -# be the same class). The #zone of the result is just ignored. -# Time-like argument to these methods is similar to a Time object in -# UTC without subsecond; it has attribute readers for the parts, -# e.g. #year, #month, and so on, and epoch time readers, #to_i. The -# subsecond attributes are fixed as 0, and #utc_offset, #zone, -# #isdst, and their aliases are same as a Time object in UTC. -# Also #to_time, #+, and #- methods are defined. -# -# The +name+ method is used for marshaling. If this method is not -# defined on a timezone object, Time objects using that timezone -# object can not be dumped by Marshal. -# -# The +abbr+ method is used by '%Z' in #strftime. -# -# The +dst?+ method is called with a +Time+ value and should return whether -# the +Time+ value is in daylight savings time in the zone. -# -# === Auto Conversion to Timezone -# -# At loading marshaled data, a timezone name will be converted to a timezone -# object by +find_timezone+ class method, if the method is defined. -# -# Similarly, that class method will be called when a timezone argument does -# not have the necessary methods mentioned above. +# For the forms of argument +zone+, see +# {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. class Time # Creates a new \Time object from the current system time. # This is the same as Time.new without arguments. @@ -248,7 +218,7 @@ class Time # Time.now(in: '+04:00') # => 2009-06-24 07:39:54 +0400 # # For forms of argument +zone+, see - # {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. + # {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. def self.now(in: nil) Primitive.time_s_now(Primitive.arg!(:in)) end @@ -306,7 +276,7 @@ def self.now(in: nil) # Time.at(secs, in: '-12:00') # => 2000-12-31 17:59:59 -1200 # # For the forms of argument +zone+, see - # {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]. + # {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. # def self.at(time, subsec = false, unit = :microsecond, in: nil) if Primitive.mandatory_only? @@ -383,7 +353,7 @@ def self.at(time, subsec = false, unit = :microsecond, in: nil) # When positional argument +zone+ or keyword argument +in:+ is given, # the new \Time object is in the specified timezone. # For the forms of argument +zone+, see - # {Timezone Specifiers}[rdoc-ref:timezone_specifiers.rdoc]: + # {Timezone Specifiers}[rdoc-ref:timezones.rdoc]: # # Time.new(2000, 1, 1, 0, 0, 0, '+12:00') # # => 2000-01-01 00:00:00 +1200 From 811ca75f3bdea4fb6d8271d848f3b79f20432880 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 31 Aug 2022 18:37:07 +0900 Subject: [PATCH 538/546] Remove -j option Close https://github.com/ruby/ruby/pull/6307 Co-authored-by: Takashi Kokubun --- .cirrus.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 4949e6cb3ea24d..ba4470321164a5 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -115,19 +115,19 @@ yjit_task: --with-ext=-test-/cxxanyargs,+ --prefix="$RUBY_PREFIX" --enable-yjit=dev - make_miniruby_script: source $HOME/.cargo/env && make -j miniruby + make_miniruby_script: source $HOME/.cargo/env && make miniruby make_bindgen_script: | if [[ "$CC" = "clang-12" ]]; then - source $HOME/.cargo/env && make -j yjit-bindgen + source $HOME/.cargo/env && make yjit-bindgen else echo "only running bindgen on clang image" fi boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 - full_build_script: source $HOME/.cargo/env && make -j + full_build_script: source $HOME/.cargo/env && make cargo_test_script: source $HOME/.cargo/env && cd yjit && cargo test - make_test_script: source $HOME/.cargo/env && make -j test RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" - make_test_all_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --test-order=alpha --name=!/TestGCCompact/' - test_gc_compact_script: source $HOME/.cargo/env && make -j test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" - make_test_spec_script: source $HOME/.cargo/env && make -j test-spec RUN_OPTS="--yjit-call-threshold=1" + make_test_script: source $HOME/.cargo/env && make test RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" + make_test_all_script: source $HOME/.cargo/env && make test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --test-order=alpha --name=!/TestGCCompact/' + test_gc_compact_script: source $HOME/.cargo/env && make test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" + make_test_spec_script: source $HOME/.cargo/env && make test-spec RUN_OPTS="--yjit-call-threshold=1" From a0d2320f30109bbfcb36d3ece1c67cad2f08541c Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 31 Aug 2022 17:16:29 -0700 Subject: [PATCH 539/546] Add a document about YJIT's Rust version --- doc/yjit/yjit.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md index f13fa027b99762..0bd222cacd369f 100644 --- a/doc/yjit/yjit.md +++ b/doc/yjit/yjit.md @@ -58,6 +58,7 @@ You will need to install: - A C compiler such as GCC or Clang - GNU Make and Autoconf - The Rust compiler `rustc` and Cargo (if you want to build in dev/debug mode) + - The Rust version must be [>= 1.58.1](yjit/Cargo.toml). To install the Rust build toolchain, we suggest following the [recommended installation method][rust-install]. Rust also provides first class [support][editor-tools] for many source code editors. From ce70cb646972a4208b8132a77233e1b2741e7a25 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 31 Aug 2022 17:18:18 -0700 Subject: [PATCH 540/546] It was a relative path [ci skip] --- doc/yjit/yjit.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md index 0bd222cacd369f..1eeb75824a950e 100644 --- a/doc/yjit/yjit.md +++ b/doc/yjit/yjit.md @@ -58,7 +58,7 @@ You will need to install: - A C compiler such as GCC or Clang - GNU Make and Autoconf - The Rust compiler `rustc` and Cargo (if you want to build in dev/debug mode) - - The Rust version must be [>= 1.58.1](yjit/Cargo.toml). + - The Rust version must be [>= 1.58.1](../../yjit/Cargo.toml). To install the Rust build toolchain, we suggest following the [recommended installation method][rust-install]. Rust also provides first class [support][editor-tools] for many source code editors. From 941e9be0d9c44db5932386152b42f0fd44079420 Mon Sep 17 00:00:00 2001 From: Mau Magnaguagno Date: Tue, 22 Feb 2022 08:47:20 -0300 Subject: [PATCH 541/546] [ruby/reline] Remove loose operation in Dialog#render_each_dialog https://github.com/ruby/reline/commit/a6d1c917ce --- lib/reline/line_editor.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/reline/line_editor.rb b/lib/reline/line_editor.rb index 8d0719ef7ce9e9..af5240547ab810 100644 --- a/lib/reline/line_editor.rb +++ b/lib/reline/line_editor.rb @@ -758,7 +758,6 @@ def add_dialog_proc(name, p, context = nil) @output.write @full_block elsif dialog.scrollbar_pos <= (i * 2) and (i * 2) < (dialog.scrollbar_pos + bar_height) @output.write @upper_half_block - str += '' elsif dialog.scrollbar_pos <= (i * 2 + 1) and (i * 2) < (dialog.scrollbar_pos + bar_height) @output.write @lower_half_block else From aded6971ad37a75e4eb0493159c19c94971e80e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=AA=E3=81=A4=E3=81=8D?= Date: Tue, 30 Aug 2022 22:08:45 -0700 Subject: [PATCH 542/546] [rubygems/rubygems] Support non gnu libc arm-linux-eabi platforms https://github.com/rubygems/rubygems/commit/394d7a6fc9 --- lib/rubygems/platform.rb | 2 +- test/rubygems/test_gem_platform.rb | 32 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/lib/rubygems/platform.rb b/lib/rubygems/platform.rb index 8c5e7993caf2d2..06de5ded8da6c9 100644 --- a/lib/rubygems/platform.rb +++ b/lib/rubygems/platform.rb @@ -180,7 +180,7 @@ def ===(other) # version ( (@os != "linux" && (@version.nil? || other.version.nil?)) || - (@os == "linux" && ((@version.nil? && ["gnu", "musl"].include?(other.version)) || (@version == "gnu" && other.version.nil?))) || + (@os == "linux" && (other.version == "gnu#{@version}" || other.version == "musl#{@version}" || @version == "gnu#{other.version}")) || @version == other.version ) end diff --git a/test/rubygems/test_gem_platform.rb b/test/rubygems/test_gem_platform.rb index e0e635e533adf9..576f150219cd3a 100644 --- a/test/rubygems/test_gem_platform.rb +++ b/test/rubygems/test_gem_platform.rb @@ -138,6 +138,10 @@ def test_initialize "x86_64-linux-gnu" => ["x86_64", "linux", "gnu"], "x86_64-linux-musl" => ["x86_64", "linux", "musl"], "x86_64-linux-uclibc" => ["x86_64", "linux", "uclibc"], + "arm-linux-eabi" => ["arm", "linux", "eabi"], + "arm-linux-gnueabi" => ["arm", "linux", "gnueabi"], + "arm-linux-musleabi" => ["arm", "linux", "musleabi"], + "arm-linux-uclibceabi" => ["arm", "linux", "uclibceabi"], "x86_64-openbsd3.9" => ["x86_64", "openbsd", "3.9"], "x86_64-openbsd4.0" => ["x86_64", "openbsd", "4.0"], "x86_64-openbsd" => ["x86_64", "openbsd", nil], @@ -301,6 +305,34 @@ def test_nil_version_is_stricter_for_linux_os refute(x86_linux_uclibc === x86_linux, "linux-uclibc =~ linux") end + def test_eabi_version_is_stricter_for_linux_os + arm_linux_eabi = Gem::Platform.new "arm-linux-eabi" + arm_linux_gnueabi = Gem::Platform.new "arm-linux-gnueabi" + arm_linux_musleabi = Gem::Platform.new "arm-linux-musleabi" + arm_linux_uclibceabi = Gem::Platform.new "arm-linux-uclibceabi" + + # a naked linux runtime is implicit gnu, as it represents the common glibc-linked runtime + assert(arm_linux_eabi === arm_linux_gnueabi, "linux-eabi =~ linux-gnueabi") + assert(arm_linux_gnueabi === arm_linux_eabi, "linux-gnueabi =~ linux-eabi") + + # musl and explicit gnu should differ + refute(arm_linux_gnueabi === arm_linux_musleabi, "linux-gnueabi =~ linux-musleabi") + refute(arm_linux_musleabi === arm_linux_gnueabi, "linux-musleabi =~ linux-gnueabi") + + # explicit libc differ + refute(arm_linux_uclibceabi === arm_linux_musleabi, "linux-uclibceabi =~ linux-musleabi") + refute(arm_linux_musleabi === arm_linux_uclibceabi, "linux-musleabi =~ linux-uclibceabi") + + # musl host runtime accepts libc-generic or statically linked gems... + assert(arm_linux_eabi === arm_linux_musleabi, "linux-eabi =~ linux-musleabi") + # ...but implicit gnu runtime generally does not accept musl-specific gems + refute(arm_linux_musleabi === arm_linux_eabi, "linux-musleabi =~ linux-eabi") + + # other libc are not glibc compatible + refute(arm_linux_eabi === arm_linux_uclibceabi, "linux-eabi =~ linux-uclibceabi") + refute(arm_linux_uclibceabi === arm_linux_eabi, "linux-uclibceabi =~ linux-eabi") + end + def test_equals3_cpu_arm arm = Gem::Platform.new "arm-linux" armv5 = Gem::Platform.new "armv5-linux" From aa5c1a048385f31307165bd88e2eced89c8298a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=AA=E3=81=A4=E3=81=8D?= Date: Wed, 31 Aug 2022 10:33:04 -0700 Subject: [PATCH 543/546] [rubygems/rubygems] Support non gnu libc arm-linux-eabi platforms https://github.com/rubygems/rubygems/commit/fcf62799f2 --- lib/bundler/rubygems_ext.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bundler/rubygems_ext.rb b/lib/bundler/rubygems_ext.rb index 056053a7832546..9b8455d0fca748 100644 --- a/lib/bundler/rubygems_ext.rb +++ b/lib/bundler/rubygems_ext.rb @@ -259,7 +259,7 @@ def ===(other) # version ( (@os != "linux" && (@version.nil? || other.version.nil?)) || - (@os == "linux" && ((@version.nil? && ["gnu", "musl"].include?(other.version)) || (@version == "gnu" && other.version.nil?))) || + (@os == "linux" && (other.version == "gnu#{@version}" || other.version == "musl#{@version}" || @version == "gnu#{other.version}")) || @version == other.version ) end From f229b36087f1b387d77af8f3fa50f9bffd2fd44e Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 1 Sep 2022 16:15:51 +0900 Subject: [PATCH 544/546] Fix test fail with assert_ractor outside of ruby/ruby repo Revert 806583c093ecc2d67830f0a8f0d94decf0ed71e5 --- tool/lib/core_assertions.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tool/lib/core_assertions.rb b/tool/lib/core_assertions.rb index 321ca59f56acdc..7cd598b1abd6f2 100644 --- a/tool/lib/core_assertions.rb +++ b/tool/lib/core_assertions.rb @@ -268,7 +268,7 @@ def assert_separately(args, file = nil, line = nil, src, ignore_stderr: nil, **o src = < Date: Thu, 1 Sep 2022 14:14:46 +0900 Subject: [PATCH 545/546] [ruby/reline] Support dumb terminal The "dumb" terminal is considered only on MSys tty now. However, the `TERM` feature has been used on many Unix-like systems for decades, not MSys specific. https://github.com/ruby/reline/commit/53fd51ab62 --- lib/reline.rb | 29 +++++++++++++---------------- test/reline/test_reline.rb | 6 ++++++ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/lib/reline.rb b/lib/reline.rb index 0487232a0d6246..f22b573e6d5539 100644 --- a/lib/reline.rb +++ b/lib/reline.rb @@ -601,24 +601,21 @@ def self.line_editor end require 'reline/general_io' -if RbConfig::CONFIG['host_os'] =~ /mswin|msys|mingw|cygwin|bccwin|wince|emc/ - require 'reline/windows' - if Reline::Windows.msys_tty? - Reline::IOGate = if ENV['TERM'] == 'dumb' - Reline::GeneralIO - else - require 'reline/ansi' - Reline::ANSI - end +io = Reline::GeneralIO +unless ENV['TERM'] == 'dumb' + case RbConfig::CONFIG['host_os'] + when /mswin|msys|mingw|cygwin|bccwin|wince|emc/ + require 'reline/windows' + tty = (io = Reline::Windows).msys_tty? else - Reline::IOGate = Reline::Windows + tty = $stdout.tty? end +end +Reline::IOGate = if tty + require 'reline/ansi' + Reline::ANSI else - Reline::IOGate = if $stdout.isatty - require 'reline/ansi' - Reline::ANSI - else - Reline::GeneralIO - end + io end + Reline::HISTORY = Reline::History.new(Reline.core.config) diff --git a/test/reline/test_reline.rb b/test/reline/test_reline.rb index 8828e419852577..82447fd16cb4df 100644 --- a/test/reline/test_reline.rb +++ b/test/reline/test_reline.rb @@ -397,6 +397,12 @@ def test_read_io # TODO in Reline::Core end + def test_dumb_terminal + lib = File.expand_path("../../lib", __dir__) + out = IO.popen([{"TERM"=>"dumb"}, "ruby", "-I#{lib}", "-rreline", "-e", "p Reline::IOGate"], &:read) + assert_equal("Reline::GeneralIO", out.chomp) + end + def get_reline_encoding if encoding = Reline::IOGate.encoding encoding From f3becd73e30b4049abfe29cba1bfe57ec8e78f65 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 1 Sep 2022 17:02:52 +0900 Subject: [PATCH 546/546] Ignore test libraries like assert_ractor from did_you_mean --- tool/sync_default_gems.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index 451e325e5ce80b..5415e0f2b4346d 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -302,6 +302,7 @@ def sync_default_gems(gem) cp_r(Dir.glob("#{upstream}/lib/did_you_mean*"), "lib") cp_r("#{upstream}/did_you_mean.gemspec", "lib/did_you_mean") cp_r("#{upstream}/test", "test/did_you_mean") + rm_rf("test/did_you_mean/lib") rm_rf(%w[test/did_you_mean/tree_spell/test_explore.rb]) when "erb" rm_rf(%w[lib/erb* test/erb libexec/erb])