Skip to content

Commit

Permalink
YJIT: Optimize local variables when EP == BP (#10487)
Browse files Browse the repository at this point in the history
  • Loading branch information
k0kubun committed Apr 17, 2024
1 parent ca76406 commit 4cc58ea
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 24 deletions.
5 changes: 5 additions & 0 deletions vm.c
Expand Up @@ -1007,6 +1007,11 @@ vm_make_env_each(const rb_execution_context_t * const ec, rb_control_frame_t *co
}
#endif

// Invalidate JIT code that assumes cfp->ep == vm_base_ptr(cfp).
if (env->iseq) {
rb_yjit_invalidate_ep_is_bp(env->iseq);
}

return (VALUE)env;
}

Expand Down
6 changes: 6 additions & 0 deletions yjit.c
Expand Up @@ -629,6 +629,12 @@ rb_get_iseq_body_stack_max(const rb_iseq_t *iseq)
return iseq->body->stack_max;
}

enum rb_iseq_type
rb_get_iseq_body_type(const rb_iseq_t *iseq)
{
return iseq->body->type;
}

bool
rb_get_iseq_flags_has_lead(const rb_iseq_t *iseq)
{
Expand Down
2 changes: 2 additions & 0 deletions yjit.h
Expand Up @@ -48,6 +48,7 @@ void rb_yjit_tracing_invalidate_all(void);
void rb_yjit_show_usage(int help, int highlight, unsigned int width, int columns);
void rb_yjit_lazy_push_frame(const VALUE *pc);
void rb_yjit_invalidate_no_singleton_class(VALUE klass);
void rb_yjit_invalidate_ep_is_bp(const rb_iseq_t *iseq);

#else
// !USE_YJIT
Expand All @@ -71,6 +72,7 @@ static inline void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic
static inline void rb_yjit_tracing_invalidate_all(void) {}
static inline void rb_yjit_lazy_push_frame(const VALUE *pc) {}
static inline void rb_yjit_invalidate_no_singleton_class(VALUE klass) {}
static inline void rb_yjit_invalidate_ep_is_bp(const rb_iseq_t *iseq) {}

#endif // #if USE_YJIT

Expand Down
2 changes: 2 additions & 0 deletions yjit/bindgen/src/main.rs
Expand Up @@ -298,6 +298,7 @@ fn main() {
.allowlist_type("ruby_tag_type")
.allowlist_type("ruby_vm_throw_flags")
.allowlist_type("vm_check_match_type")
.allowlist_type("rb_iseq_type")

// From yjit.c
.allowlist_function("rb_iseq_(get|set)_yjit_payload")
Expand Down Expand Up @@ -415,6 +416,7 @@ fn main() {
.allowlist_function("rb_get_iseq_body_parent_iseq")
.allowlist_function("rb_get_iseq_body_iseq_encoded")
.allowlist_function("rb_get_iseq_body_stack_max")
.allowlist_function("rb_get_iseq_body_type")
.allowlist_function("rb_get_iseq_flags_has_lead")
.allowlist_function("rb_get_iseq_flags_has_opt")
.allowlist_function("rb_get_iseq_flags_has_kw")
Expand Down
96 changes: 73 additions & 23 deletions yjit/src/codegen.rs
Expand Up @@ -46,7 +46,7 @@ type InsnGenFn = fn(
/// Represents a [core::Block] while we build it.
pub struct JITState {
/// Instruction sequence for the compiling block
iseq: IseqPtr,
pub iseq: IseqPtr,

/// The iseq index of the first instruction in the block
starting_insn_idx: IseqIdx,
Expand Down Expand Up @@ -101,6 +101,9 @@ pub struct JITState {
/// A list of classes that are not supposed to have a singleton class.
pub no_singleton_class_assumptions: Vec<VALUE>,

/// When true, the block is valid only when base pointer is equal to environment pointer.
pub no_ep_escape: bool,

/// When true, the block is valid only when there is a total of one ractor running
pub block_assumes_single_ractor: bool,

Expand Down Expand Up @@ -130,6 +133,7 @@ impl JITState {
bop_assumptions: vec![],
stable_constant_names_assumption: None,
no_singleton_class_assumptions: vec![],
no_ep_escape: false,
block_assumes_single_ractor: false,
perf_map: Rc::default(),
perf_stack: vec![],
Expand Down Expand Up @@ -171,6 +175,23 @@ impl JITState {
unsafe { *(self.pc.offset(arg_idx + 1)) }
}

/// Return true if the current ISEQ could escape an environment.
///
/// As of vm_push_frame(), EP is always equal to BP. However, after pushing
/// a frame, some ISEQ setups call vm_bind_update_env(), which redirects EP.
/// Also, some method calls escape the environment to the heap.
fn escapes_ep(&self) -> bool {
match unsafe { get_iseq_body_type(self.iseq) } {
// <main> frame is always associated to TOPLEVEL_BINDING.
ISEQ_TYPE_MAIN |
// Kernel#eval uses a heap EP when a Binding argument is not nil.
ISEQ_TYPE_EVAL => true,
// If this ISEQ has previously escaped EP, give up the optimization.
_ if iseq_escapes_ep(self.iseq) => true,
_ => false,
}
}

// Get the index of the next instruction
fn next_insn_idx(&self) -> u16 {
self.insn_idx + insn_len(self.get_opcode()) as u16
Expand Down Expand Up @@ -250,6 +271,19 @@ impl JITState {
true
}

/// Assume that base pointer is equal to environment pointer in the current ISEQ.
/// Return true if it's safe to assume so.
fn assume_no_ep_escape(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb) -> bool {
if jit_ensure_block_entry_exit(self, asm, ocb).is_none() {
return false; // out of space, give up
}
if self.escapes_ep() {
return false; // EP has been escaped in this ISEQ. disable the optimization to avoid an invalidation loop.
}
self.no_ep_escape = true;
true
}

fn get_cfp(&self) -> *mut rb_control_frame_struct {
unsafe { get_ec_cfp(self.ec) }
}
Expand Down Expand Up @@ -2203,16 +2237,22 @@ fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd {
fn gen_getlocal_generic(
jit: &mut JITState,
asm: &mut Assembler,
ocb: &mut OutlinedCb,
ep_offset: u32,
level: u32,
) -> Option<CodegenStatus> {
// Load environment pointer EP (level 0) from CFP
let ep_opnd = gen_get_ep(asm, level);
let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm, ocb) {
// Load the local using SP register
asm.ctx.ep_opnd(-(ep_offset as i32))
} else {
// Load environment pointer EP (level 0) from CFP
let ep_opnd = gen_get_ep(asm, level);

// Load the local from the block
// val = *(vm_get_ep(GET_EP(), level) - idx);
let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
let local_opnd = Opnd::mem(64, ep_opnd, offs);
// Load the local from the block
// val = *(vm_get_ep(GET_EP(), level) - idx);
let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
Opnd::mem(64, ep_opnd, offs)
};

// Write the local at SP
let stack_top = if level == 0 {
Expand All @@ -2230,29 +2270,29 @@ fn gen_getlocal_generic(
fn gen_getlocal(
jit: &mut JITState,
asm: &mut Assembler,
_ocb: &mut OutlinedCb,
ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
let level = jit.get_arg(1).as_u32();
gen_getlocal_generic(jit, asm, idx, level)
gen_getlocal_generic(jit, asm, ocb, idx, level)
}

fn gen_getlocal_wc0(
jit: &mut JITState,
asm: &mut Assembler,
_ocb: &mut OutlinedCb,
ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
gen_getlocal_generic(jit, asm, idx, 0)
gen_getlocal_generic(jit, asm, ocb, idx, 0)
}

fn gen_getlocal_wc1(
jit: &mut JITState,
asm: &mut Assembler,
_ocb: &mut OutlinedCb,
ocb: &mut OutlinedCb,
) -> Option<CodegenStatus> {
let idx = jit.get_arg(0).as_u32();
gen_getlocal_generic(jit, asm, idx, 1)
gen_getlocal_generic(jit, asm, ocb, idx, 1)
}

fn gen_setlocal_generic(
Expand All @@ -2264,11 +2304,11 @@ fn gen_setlocal_generic(
) -> Option<CodegenStatus> {
let value_type = asm.ctx.get_opnd_type(StackOpnd(0));

// Load environment pointer EP at level
let ep_opnd = gen_get_ep(asm, level);

// Fallback because of write barrier
if asm.ctx.get_chain_depth() > 0 {
// Load environment pointer EP at level
let ep_opnd = gen_get_ep(asm, level);

// This function should not yield to the GC.
// void rb_vm_env_write(const VALUE *ep, int index, VALUE v)
let index = -(ep_offset as i64);
Expand All @@ -2286,16 +2326,27 @@ fn gen_setlocal_generic(
return Some(KeepCompiling);
}

// Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers
// only affect heap objects being written. If we know an immediate value is being written we
// can skip this check.
if !value_type.is_imm() {
// flags & VM_ENV_FLAG_WB_REQUIRED
let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm, ocb) {
// Load flags and the local using SP register
let local_opnd = asm.ctx.ep_opnd(-(ep_offset as i32));
let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32);
(flags_opnd, local_opnd)
} else {
// Load flags and the local for the level
let ep_opnd = gen_get_ep(asm, level);
let flags_opnd = Opnd::mem(
64,
ep_opnd,
SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32,
);
(flags_opnd, Opnd::mem(64, ep_opnd, -SIZEOF_VALUE_I32 * ep_offset as i32))
};

// Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers
// only affect heap objects being written. If we know an immediate value is being written we
// can skip this check.
if !value_type.is_imm() {
// flags & VM_ENV_FLAG_WB_REQUIRED
asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into());

// if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
Expand All @@ -2319,8 +2370,7 @@ fn gen_setlocal_generic(
let stack_top = asm.stack_pop(1);

// Write the value at the environment pointer
let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top);
asm.mov(local_opnd, stack_top);

Some(KeepCompiling)
}
Expand Down
19 changes: 18 additions & 1 deletion yjit/src/core.rs
Expand Up @@ -1657,6 +1657,9 @@ impl JITState {
for klass in self.no_singleton_class_assumptions {
track_no_singleton_class_assumption(blockref, klass);
}
if self.no_ep_escape {
track_no_ep_escape_assumption(blockref, self.iseq);
}

blockref
}
Expand Down Expand Up @@ -1798,6 +1801,13 @@ impl Context {
return Opnd::mem(64, SP, offset);
}

/// Get an operand for the adjusted environment pointer address using SP register.
/// This is valid only when a Binding object hasn't been created for the frame.
pub fn ep_opnd(&self, offset: i32) -> Opnd {
let ep_offset = self.get_stack_size() as i32 + 1;
self.sp_opnd(-ep_offset + offset)
}

/// Stop using a register for a given stack temp.
/// This allows us to reuse the register for a value that we know is dead
/// and will no longer be used (e.g. popped stack temp).
Expand Down Expand Up @@ -3124,6 +3134,12 @@ pub fn defer_compilation(
// Likely a stub due to the increased chain depth
let target0_address = branch.set_target(0, blockid, &next_ctx, ocb);

// Pad the block if it has the potential to be invalidated. This must be
// done before gen_fn() in case the jump is overwritten by a fallthrough.
if jit.block_entry_exit.is_some() {
asm.pad_inval_patch();
}

// Call the branch generation function
asm_comment!(asm, "defer_compilation");
asm.mark_branch_start(&branch);
Expand Down Expand Up @@ -3307,9 +3323,10 @@ pub fn invalidate_block_version(blockref: &BlockRef) {

assert!(
cb.get_write_ptr() <= block_end,
"invalidation wrote past end of block (code_size: {:?}, new_size: {})",
"invalidation wrote past end of block (code_size: {:?}, new_size: {}, start_addr: {:?})",
block.code_size(),
cb.get_write_ptr().as_offset() - block_start.as_offset(),
block.start_addr.raw_ptr(cb),
);
cb.set_write_ptr(cur_pos);
cb.set_dropped_bytes(cur_dropped_bytes);
Expand Down
1 change: 1 addition & 0 deletions yjit/src/cruby.rs
Expand Up @@ -170,6 +170,7 @@ pub use rb_iseq_encoded_size as get_iseq_encoded_size;
pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq;
pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded;
pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max;
pub use rb_get_iseq_body_type as get_iseq_body_type;
pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead;
pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt;
pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw;
Expand Down
11 changes: 11 additions & 0 deletions yjit/src/cruby_bindings.inc.rs
Expand Up @@ -478,6 +478,16 @@ pub struct iseq_inline_iv_cache_entry {
pub struct iseq_inline_cvar_cache_entry {
pub entry: *mut rb_cvar_class_tbl_entry,
}
pub const ISEQ_TYPE_TOP: rb_iseq_type = 0;
pub const ISEQ_TYPE_METHOD: rb_iseq_type = 1;
pub const ISEQ_TYPE_BLOCK: rb_iseq_type = 2;
pub const ISEQ_TYPE_CLASS: rb_iseq_type = 3;
pub const ISEQ_TYPE_RESCUE: rb_iseq_type = 4;
pub const ISEQ_TYPE_ENSURE: rb_iseq_type = 5;
pub const ISEQ_TYPE_EVAL: rb_iseq_type = 6;
pub const ISEQ_TYPE_MAIN: rb_iseq_type = 7;
pub const ISEQ_TYPE_PLAIN: rb_iseq_type = 8;
pub type rb_iseq_type = u32;
pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1;
pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2;
pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4;
Expand Down Expand Up @@ -1153,6 +1163,7 @@ extern "C" {
pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE;
pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_get_iseq_body_type(iseq: *const rb_iseq_t) -> rb_iseq_type;
pub fn rb_get_iseq_flags_has_lead(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool;
pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool;
Expand Down

0 comments on commit 4cc58ea

Please sign in to comment.