From 8a6740c70edf39cdf6230659d191240c43dc6d22 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 23 Feb 2024 11:08:09 -0800 Subject: [PATCH] YJIT: Lazily push a frame for specialized C funcs (#10080) * YJIT: Lazily push a frame for specialized C funcs Co-authored-by: Maxime Chevalier-Boisvert * Fix a comment on pc_to_cfunc * Rename rb_yjit_check_pc to rb_yjit_lazy_push_frame * Rename it to jit_prepare_lazy_frame_call * Fix a typo * Optimize String#getbyte as well * Optimize String#byteslice as well --------- Co-authored-by: Maxime Chevalier-Boisvert --- bootstraptest/test_yjit.rb | 51 +++++++++++++++++ common.mk | 2 + error.c | 3 + object.c | 2 + string.c | 2 +- vm_insnhelper.c | 18 ++++++ yjit.h | 2 + yjit.rb | 4 ++ yjit/src/codegen.rs | 109 ++++++++++++++++++++++++++++++++++--- yjit/src/cruby.rs | 2 + yjit/src/stats.rs | 5 ++ yjit/src/yjit.rs | 16 ++++++ 12 files changed, 206 insertions(+), 10 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 2e475cc186bca4..a2c98c01c939aa 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -2046,6 +2046,57 @@ def getbyte(s, i) [getbyte("a", 0), getbyte("a", 1), getbyte("a", -1), getbyte("a", -2), getbyte("a", "a")] } unless rjit_enabled? # Not yet working on RJIT +# Basic test for String#setbyte +assert_equal 'AoZ', %q{ + s = "foo" + s.setbyte(0, 65) + s.setbyte(-1, 90) + s +} + +# String#setbyte IndexError +assert_equal 'String#setbyte', %q{ + def ccall = "".setbyte(1, 0) + begin + ccall + rescue => e + e.backtrace.first.split("'").last + end +} + +# String#setbyte TypeError +assert_equal 'String#setbyte', %q{ + def ccall = "".setbyte(nil, 0) + begin + ccall + rescue => e + e.backtrace.first.split("'").last + end +} + +# String#setbyte FrozenError +assert_equal 'String#setbyte', %q{ + def ccall = "a".freeze.setbyte(0, 0) + begin + ccall + rescue => e + e.backtrace.first.split("'").last + end +} + +# non-leaf String#setbyte +assert_equal 'String#setbyte', %q{ + def to_int + @caller = caller + 0 + end + + def ccall = "a".setbyte(self, 98) + ccall + + @caller.first.split("'").last +} + # non-leaf String#byteslice assert_equal 'TypeError', %q{ def ccall = "".byteslice(nil, nil) diff --git a/common.mk b/common.mk index 610ff015a7f1fd..f9d16b4ea2de6c 100644 --- a/common.mk +++ b/common.mk @@ -6711,6 +6711,7 @@ error.$(OBJEXT): {$(VPATH)}util.h error.$(OBJEXT): {$(VPATH)}vm_core.h error.$(OBJEXT): {$(VPATH)}vm_opts.h error.$(OBJEXT): {$(VPATH)}warning.rbinc +error.$(OBJEXT): {$(VPATH)}yjit.h eval.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h eval.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h eval.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -11425,6 +11426,7 @@ object.$(OBJEXT): {$(VPATH)}vm_core.h object.$(OBJEXT): {$(VPATH)}vm_debug.h object.$(OBJEXT): {$(VPATH)}vm_opts.h object.$(OBJEXT): {$(VPATH)}vm_sync.h +object.$(OBJEXT): {$(VPATH)}yjit.h pack.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h pack.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h pack.$(OBJEXT): $(CCAN_DIR)/list/list.h diff --git a/error.c b/error.c index 385da82f3865f0..ac579777bebc98 100644 --- a/error.c +++ b/error.c @@ -48,6 +48,7 @@ #include "ruby/util.h" #include "ruby_assert.h" #include "vm_core.h" +#include "yjit.h" #include "builtin.h" @@ -1409,6 +1410,7 @@ rb_exc_new_cstr(VALUE etype, const char *s) VALUE rb_exc_new_str(VALUE etype, VALUE str) { + rb_yjit_lazy_push_frame(GET_EC()->cfp->pc); StringValue(str); return rb_class_new_instance(1, &str, etype); } @@ -3827,6 +3829,7 @@ inspect_frozen_obj(VALUE obj, VALUE mesg, int recur) void rb_error_frozen_object(VALUE frozen_obj) { + rb_yjit_lazy_push_frame(GET_EC()->cfp->pc); VALUE debug_info; const ID created_info = id_debug_created_info; VALUE mesg = rb_sprintf("can't modify frozen %"PRIsVALUE": ", diff --git a/object.c b/object.c index 69602ac8d09035..7ec944cb7a13c3 100644 --- a/object.c +++ b/object.c @@ -42,6 +42,7 @@ #include "ruby/assert.h" #include "builtin.h" #include "shape.h" +#include "yjit.h" /* Flags of RObject * @@ -3238,6 +3239,7 @@ rb_to_integer_with_id_exception(VALUE val, const char *method, ID mid, int raise VALUE v; if (RB_INTEGER_TYPE_P(val)) return val; + rb_yjit_lazy_push_frame(GET_EC()->cfp->pc); v = try_to_int(val, mid, raise); if (!raise && NIL_P(v)) return Qnil; if (!RB_INTEGER_TYPE_P(v)) { diff --git a/string.c b/string.c index 8ed058eead8954..d8b145db06a5fa 100644 --- a/string.c +++ b/string.c @@ -6173,7 +6173,7 @@ rb_str_getbyte(VALUE str, VALUE index) * * Related: String#getbyte. */ -static VALUE +VALUE rb_str_setbyte(VALUE str, VALUE index, VALUE value) { long pos = NUM2LONG(index); diff --git a/vm_insnhelper.c b/vm_insnhelper.c index bee3d0a8c68490..6136cb146596e9 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3535,6 +3535,24 @@ vm_call_cfunc_with_frame_(rb_execution_context_t *ec, rb_control_frame_t *reg_cf return val; } +// Push a C method frame for a given cme. This is called when JIT code skipped +// pushing a frame but the C method reached a point where a frame is needed. +void +rb_vm_push_cfunc_frame(const rb_callable_method_entry_t *cme, int recv_idx) +{ + VM_ASSERT(cme->def->type == VM_METHOD_TYPE_CFUNC); + rb_execution_context_t *ec = GET_EC(); + VALUE *sp = ec->cfp->sp; + VALUE recv = *(sp - recv_idx - 1); + VALUE frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL; + VALUE block_handler = VM_BLOCK_HANDLER_NONE; +#if VM_CHECK_MODE > 0 + // Clean up the stack canary since we're about to satisfy the "leaf or lazy push" assumption + *(GET_EC()->cfp->sp) = Qfalse; +#endif + vm_push_frame(ec, NULL, frame_type, recv, block_handler, (VALUE)cme, 0, ec->cfp->sp, 0, 0); +} + // If true, cc->call needs to include `CALLER_SETUP_ARG` (i.e. can't be skipped in fastpath) bool rb_splat_or_kwargs_p(const struct rb_callinfo *restrict ci) diff --git a/yjit.h b/yjit.h index 5e71b2f276f3b5..46218a47d72d44 100644 --- a/yjit.h +++ b/yjit.h @@ -45,6 +45,7 @@ void rb_yjit_before_ractor_spawn(void); void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic, unsigned insn_idx); void rb_yjit_tracing_invalidate_all(void); void rb_yjit_show_usage(int help, int highlight, unsigned int width, int columns); +void rb_yjit_lazy_push_frame(const VALUE *pc); #else // !USE_YJIT @@ -66,6 +67,7 @@ static inline void rb_yjit_iseq_free(void *payload) {} static inline void rb_yjit_before_ractor_spawn(void) {} static inline void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic, unsigned insn_idx) {} static inline void rb_yjit_tracing_invalidate_all(void) {} +static inline void rb_yjit_lazy_push_frame(const VALUE *pc) {} #endif // #if USE_YJIT diff --git a/yjit.rb b/yjit.rb index f2cd369faf0f64..50cb2483980fec 100644 --- a/yjit.rb +++ b/yjit.rb @@ -332,6 +332,10 @@ def _print_stats(out: $stderr) # :nodoc: out.puts "num_throw_break: " + format_number_pct(13, stats[:num_throw_break], stats[:num_throw]) out.puts "num_throw_retry: " + format_number_pct(13, stats[:num_throw_retry], stats[:num_throw]) out.puts "num_throw_return: " + format_number_pct(13, stats[:num_throw_return], stats[:num_throw]) + out.puts "num_lazy_frame_check: " + format_number(13, stats[:num_lazy_frame_check]) + out.puts "num_lazy_frame_push: " + format_number_pct(13, stats[:num_lazy_frame_push], stats[:num_lazy_frame_check]) + out.puts "lazy_frame_count: " + format_number(13, stats[:lazy_frame_count]) + out.puts "lazy_frame_failure: " + format_number(13, stats[:lazy_frame_failure]) out.puts "iseq_stack_too_large: " + format_number(13, stats[:iseq_stack_too_large]) out.puts "iseq_too_long: " + format_number(13, stats[:iseq_too_long]) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 333e8500743794..7148f4a4e60abb 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -411,6 +411,54 @@ fn gen_save_sp_with_offset(asm: &mut Assembler, offset: i8) { } } +/// Basically jit_prepare_non_leaf_call(), but this registers the current PC +/// to lazily push a C method frame when it's necessary. +fn jit_prepare_lazy_frame_call( + jit: &mut JITState, + asm: &mut Assembler, + cme: *const rb_callable_method_entry_t, + recv_opnd: YARVOpnd, +) -> bool { + // We can use this only when the receiver is on stack. + let recv_idx = match recv_opnd { + StackOpnd(recv_idx) => recv_idx, + _ => unreachable!("recv_opnd must be on stack, but got: {:?}", recv_opnd), + }; + + // Get the next PC. jit_save_pc() saves that PC. + let pc: *mut VALUE = unsafe { + let cur_insn_len = insn_len(jit.get_opcode()) as isize; + jit.get_pc().offset(cur_insn_len) + }; + + let pc_to_cfunc = CodegenGlobals::get_pc_to_cfunc(); + match pc_to_cfunc.get(&pc) { + Some(&(other_cme, _)) if other_cme != cme => { + // Bail out if it's not the only cme on this callsite. + incr_counter!(lazy_frame_failure); + return false; + } + _ => { + // Let rb_yjit_lazy_push_frame() lazily push a C frame on this PC. + incr_counter!(lazy_frame_count); + pc_to_cfunc.insert(pc, (cme, recv_idx)); + } + } + + // Save the PC to trigger a lazy frame push, and save the SP to get the receiver. + // The C func may call a method that doesn't raise, so prepare for invalidation too. + jit_prepare_non_leaf_call(jit, asm); + + // Make sure we're ready for calling rb_vm_push_cfunc_frame(). + let cfunc_argc = unsafe { get_mct_argc(get_cme_def_body_cfunc(cme)) }; + if cfunc_argc != -1 { + assert_eq!(recv_idx as i32, cfunc_argc); // verify the receiver index if possible + } + assert!(asm.get_leaf_ccall()); // It checks the stack canary we set for known_cfunc_codegen. + + true +} + /// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could: /// - Perform GC allocation /// - Take the VM lock through RB_VM_LOCK_ENTER() @@ -5395,7 +5443,7 @@ fn jit_rb_str_byteslice( asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, - _cme: *const rb_callable_method_entry_t, + cme: *const rb_callable_method_entry_t, _block: Option, argc: i32, _known_recv_class: Option, @@ -5409,7 +5457,9 @@ fn jit_rb_str_byteslice( (Type::Fixnum, Type::Fixnum) => {}, // Raises when non-integers are passed in, which requires the method frame // to be pushed for the backtrace - _ => return false, + _ => if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) { + return false; + } } asm_comment!(asm, "String#byteslice"); @@ -5431,11 +5481,11 @@ fn jit_rb_str_byteslice( } fn jit_rb_str_getbyte( - _jit: &mut JITState, + jit: &mut JITState, asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, - _cme: *const rb_callable_method_entry_t, + cme: *const rb_callable_method_entry_t, _block: Option, _argc: i32, _known_recv_class: Option, @@ -5444,17 +5494,19 @@ fn jit_rb_str_getbyte( fn rb_str_getbyte(str: VALUE, index: VALUE) -> VALUE; } - let index = asm.stack_opnd(0); - let recv = asm.stack_opnd(1); - // rb_str_getbyte should be leaf if the index is a fixnum - if asm.ctx.get_opnd_type(index.into()) != Type::Fixnum { + if asm.ctx.get_opnd_type(StackOpnd(0)) != Type::Fixnum { // Raises when non-integers are passed in, which requires the method frame // to be pushed for the backtrace - return false; + if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(1)) { + return false; + } } asm_comment!(asm, "String#getbyte"); + let index = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + let ret_opnd = asm.ccall(rb_str_getbyte as *const u8, vec![recv, index]); asm.stack_pop(2); // Keep them on stack during ccall for GC @@ -5465,6 +5517,35 @@ fn jit_rb_str_getbyte( true } +fn jit_rb_str_setbyte( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option, + _argc: i32, + _known_recv_class: Option, +) -> bool { + // Raises when index is out of range. Lazily push a frame in that case. + if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) { + return false; + } + asm_comment!(asm, "String#setbyte"); + + let value = asm.stack_opnd(0); + let index = asm.stack_opnd(1); + let recv = asm.stack_opnd(2); + + let ret_opnd = asm.ccall(rb_str_setbyte as *const u8, vec![recv, index, value]); + asm.stack_pop(3); // Keep them on stack during ccall for GC + + let out_opnd = asm.stack_push(Type::UnknownImm); + asm.mov(out_opnd, ret_opnd); + + true +} + // Codegen for rb_str_to_s() // When String#to_s is called on a String instance, the method returns self and // most of the overhead comes from setting up the method call. We observed that @@ -9693,6 +9774,7 @@ pub fn yjit_reg_method_codegen_fns() { yjit_reg_method(rb_cString, "size", jit_rb_str_length); yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize); yjit_reg_method(rb_cString, "getbyte", jit_rb_str_getbyte); + yjit_reg_method(rb_cString, "setbyte", jit_rb_str_setbyte); yjit_reg_method(rb_cString, "byteslice", jit_rb_str_byteslice); yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); @@ -9769,6 +9851,10 @@ pub struct CodegenGlobals { /// Page indexes for outlined code that are not associated to any ISEQ. ocb_pages: Vec, + + /// Map of cfunc YARV PCs to CMEs and receiver indexes, used to lazily push + /// a frame when rb_yjit_lazy_push_frame() is called with a PC in this HashMap. + pc_to_cfunc: HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)>, } /// For implementing global code invalidation. A position in the inline @@ -9860,6 +9946,7 @@ impl CodegenGlobals { entry_stub_hit_trampoline, global_inval_patches: Vec::new(), ocb_pages, + pc_to_cfunc: HashMap::new(), }; // Initialize the codegen globals instance @@ -9938,6 +10025,10 @@ impl CodegenGlobals { pub fn get_ocb_pages() -> &'static Vec { &CodegenGlobals::get_instance().ocb_pages } + + pub fn get_pc_to_cfunc() -> &'static mut HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)> { + &mut CodegenGlobals::get_instance().pc_to_cfunc + } } #[cfg(test)] diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 488d4798a2a33f..e23f755388df47 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -117,6 +117,7 @@ extern "C" { ci: *const rb_callinfo, ) -> *const rb_callable_method_entry_t; pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; + pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; pub fn rb_vm_concat_to_array(ary1: VALUE, ary2st: VALUE) -> VALUE; @@ -142,6 +143,7 @@ extern "C" { ) -> VALUE; pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool; pub fn rb_vm_stack_canary() -> VALUE; + pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int); } // Renames diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 3637de1613c0aa..6cdf1d0616a2e4 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -583,6 +583,11 @@ make_counters! { num_throw_retry, num_throw_return, + num_lazy_frame_check, + num_lazy_frame_push, + lazy_frame_count, + lazy_frame_failure, + iseq_stack_too_large, iseq_too_long, diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs index 66d802ec5e6ee7..cc2c8fe0663195 100644 --- a/yjit/src/yjit.rs +++ b/yjit/src/yjit.rs @@ -207,3 +207,19 @@ pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VA return Qnil; } + +/// Push a C method frame if the given PC is supposed to lazily push one. +/// This is called from rb_raise() (at rb_exc_new_str()) and other functions +/// that may make a method call (e.g. rb_to_int()). +#[no_mangle] +pub extern "C" fn rb_yjit_lazy_push_frame(pc: *mut VALUE) { + if !yjit_enabled_p() { + return; + } + + incr_counter!(num_lazy_frame_check); + if let Some(&(cme, recv_idx)) = CodegenGlobals::get_pc_to_cfunc().get(&pc) { + incr_counter!(num_lazy_frame_push); + unsafe { rb_vm_push_cfunc_frame(cme, recv_idx as i32) } + } +}