Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up IV writes #6767

Merged
merged 9 commits into from Dec 2, 2022
6 changes: 6 additions & 0 deletions shape.c
Expand Up @@ -306,6 +306,12 @@ rb_shape_id_num_bits(void)
return SHAPE_ID_NUM_BITS;
}

int32_t
rb_shape_id_offset(void)
{
return 8 - rb_shape_id_num_bits() / 8;
}

rb_shape_t *
rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape)
{
Expand Down
1 change: 1 addition & 0 deletions shape.h
Expand Up @@ -124,6 +124,7 @@ static inline shape_id_t RCLASS_SHAPE_ID(VALUE obj)
bool rb_shape_root_shape_p(rb_shape_t* shape);
rb_shape_t * rb_shape_get_root_shape(void);
uint8_t rb_shape_id_num_bits(void);
int32_t rb_shape_id_offset(void);

rb_shape_t* rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id);
rb_shape_t * rb_shape_get_parent(rb_shape_t * shape);
Expand Down
7 changes: 7 additions & 0 deletions yjit/bindgen/src/main.rs
Expand Up @@ -86,10 +86,15 @@ fn main() {
.allowlist_function("rb_shape_get_shape_id")
.allowlist_function("rb_shape_get_shape_by_id")
.allowlist_function("rb_shape_id_num_bits")
.allowlist_function("rb_shape_id_offset")
.allowlist_function("rb_shape_get_iv_index")
.allowlist_function("rb_shape_get_next")
.allowlist_function("rb_shape_id")
.allowlist_function("rb_shape_transition_shape_capa")

// From ruby/internal/intern/object.h
.allowlist_function("rb_obj_is_kind_of")
.allowlist_function("rb_obj_frozen_p")

// From ruby/internal/encoding/encoding.h
.allowlist_type("ruby_encoding_consts")
Expand Down Expand Up @@ -131,6 +136,7 @@ fn main() {
.allowlist_function("rb_gc_mark")
.allowlist_function("rb_gc_mark_movable")
.allowlist_function("rb_gc_location")
.allowlist_function("rb_gc_writebarrier")

// VALUE variables for Ruby class objects
// From include/ruby/internal/globals.h
Expand Down Expand Up @@ -314,6 +320,7 @@ fn main() {

// From internal/variable.h
.allowlist_function("rb_gvar_(get|set)")
.allowlist_function("rb_ensure_iv_list_size")

// From include/ruby/internal/intern/variable.h
.allowlist_function("rb_attr_get")
Expand Down
259 changes: 237 additions & 22 deletions yjit/src/codegen.rs
Expand Up @@ -1886,6 +1886,9 @@ fn jit_chain_guard(
// up to 5 different classes, and embedded or not for each
pub const GET_IVAR_MAX_DEPTH: i32 = 10;

// up to 5 different classes, and embedded or not for each
pub const SET_IVAR_MAX_DEPTH: i32 = 10;

// hashes and arrays
pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2;

Expand Down Expand Up @@ -2010,7 +2013,7 @@ fn gen_get_ivar(
}

let ivar_index = unsafe {
let shape_id = comptime_receiver.shape_of();
let shape_id = comptime_receiver.shape_id_of();
let shape = rb_shape_get_shape_by_id(shape_id);
let mut ivar_index: u32 = 0;
if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) {
Expand Down Expand Up @@ -2043,8 +2046,8 @@ fn gen_get_ivar(

let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) };
let shape_bit_size = unsafe { rb_shape_id_num_bits() }; // either 16 or 32 depending on RUBY_DEBUG
let shape_byte_size = shape_bit_size / 8;
let shape_opnd = Opnd::mem(shape_bit_size, recv, RUBY_OFFSET_RBASIC_FLAGS + (8 - shape_byte_size as i32));
let shape_id_offset = unsafe { rb_shape_id_offset() };
let shape_opnd = Opnd::mem(shape_bit_size, recv, shape_id_offset);

asm.comment("guard shape");
asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64));
Expand Down Expand Up @@ -2134,33 +2137,245 @@ fn gen_getinstancevariable(
)
}

// Generate an IV write.
// This function doesn't deal with writing the shape, or expanding an object
// to use an IV buffer if necessary. That is the callers responsibility
fn gen_write_iv(
asm: &mut Assembler,
comptime_receiver: VALUE,
recv: Opnd,
ivar_index: usize,
set_value: Opnd,
extension_needed: bool)
{
// Compile time self is embedded and the ivar index lands within the object
let embed_test_result = comptime_receiver.embedded_p() && !extension_needed;

if embed_test_result {
// Find the IV offset
let offs = ROBJECT_OFFSET_AS_ARY + (ivar_index * SIZEOF_VALUE) as i32;
let ivar_opnd = Opnd::mem(64, recv, offs);

// Write the IV
asm.comment("write IV");
asm.mov(ivar_opnd, set_value);
} else {
// Compile time value is *not* embedded.

// Get a pointer to the extended table
let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR));

// Write the ivar in to the extended table
let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32);

asm.comment("write IV");
asm.mov(ivar_opnd, set_value);
}
}

fn gen_setinstancevariable(
jit: &mut JITState,
ctx: &mut Context,
asm: &mut Assembler,
_ocb: &mut OutlinedCb,
ocb: &mut OutlinedCb,
) -> CodegenStatus {
let id = jit_get_arg(jit, 0).as_usize();
let ic = jit_get_arg(jit, 1).as_u64(); // type IVC
let starting_context = *ctx; // make a copy for use with jit_chain_guard

// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
jit_prepare_routine_call(jit, ctx, asm);
// Defer compilation so we can specialize on a runtime `self`
if !jit_at_current_insn(jit) {
defer_compilation(jit, ctx, asm, ocb);
return EndBlock;
}

// Get the operands from the stack
let val_opnd = ctx.stack_pop(1);
let ivar_name = jit_get_arg(jit, 0).as_u64();
let comptime_receiver = jit_peek_at_self(jit);
let comptime_val_klass = comptime_receiver.class_of();

// Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
asm.ccall(
rb_vm_setinstancevariable as *const u8,
vec![
Opnd::const_ptr(jit.iseq as *const u8),
Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF),
id.into(),
val_opnd,
Opnd::const_ptr(ic as *const u8),
]
);
// If the comptime receiver is frozen, writing an IV will raise an exception
// and we don't want to JIT code to deal with that situation.
if comptime_receiver.is_frozen() {
return CantCompile;
}

let (_, stack_type) = ctx.get_opnd_mapping(StackOpnd(0));

// Check if the comptime class uses a custom allocator
let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) };
let uses_custom_allocator = match custom_allocator {
Some(alloc_fun) => {
let allocate_instance = rb_class_allocate_instance as *const u8;
alloc_fun as *const u8 != allocate_instance
}
None => false,
};

// Check if the comptime receiver is a T_OBJECT
let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) };

// If the receiver isn't a T_OBJECT, or uses a custom allocator,
// then just write out the IV write as a function call
if !receiver_t_object || uses_custom_allocator {
asm.comment("call rb_vm_setinstancevariable()");

let ic = jit_get_arg(jit, 1).as_u64(); // type IVC

// The function could raise exceptions.
jit_prepare_routine_call(jit, ctx, asm);

// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
jit_prepare_routine_call(jit, ctx, asm);

// Get the operands from the stack
let val_opnd = ctx.stack_pop(1);

// Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
asm.ccall(
rb_vm_setinstancevariable as *const u8,
vec![
Opnd::const_ptr(jit.iseq as *const u8),
Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF),
ivar_name.into(),
val_opnd,
Opnd::const_ptr(ic as *const u8),
]
);
} else {
// Get the iv index
let ivar_index = unsafe {
let shape_id = comptime_receiver.shape_id_of();
let shape = rb_shape_get_shape_by_id(shape_id);
let mut ivar_index: u32 = 0;
if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) {
Some(ivar_index as usize)
} else {
None
}
};

// Get the receiver
let mut recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF));

let recv_opnd = SelfOpnd;
let recv_type = ctx.get_opnd_type(recv_opnd);

// Generate a side exit
let side_exit = get_side_exit(jit, ocb, ctx);

// Upgrade type
if !recv_type.is_heap() { // Must be a heap type
ctx.upgrade_opnd_type(recv_opnd, Type::UnknownHeap);
guard_object_is_heap(asm, recv, side_exit);
}
maximecb marked this conversation as resolved.
Show resolved Hide resolved

let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) };
let shape_bit_size = unsafe { rb_shape_id_num_bits() }; // either 16 or 32 depending on RUBY_DEBUG
let shape_id_offset = unsafe { rb_shape_id_offset() };
let shape_opnd = Opnd::mem(shape_bit_size, recv, shape_id_offset);

asm.comment("guard shape");
asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64));
let megamorphic_side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic).into();
jit_chain_guard(
JCC_JNE,
jit,
&starting_context,
asm,
ocb,
SET_IVAR_MAX_DEPTH,
megamorphic_side_exit,
);

let write_val = ctx.stack_pop(1);

match ivar_index {
// If we don't have an instance variable index, then we need to
// transition out of the current shape.
None => {
let mut shape = comptime_receiver.shape_of();

// If the object doesn't have the capacity to store the IV,
// then we'll need to allocate it.
let needs_extension = unsafe { (*shape).next_iv_index >= (*shape).capacity };

// We can write to the object, but we need to transition the shape
let ivar_index = unsafe { (*shape).next_iv_index } as usize;

if needs_extension {
let current_capacity = unsafe { (*shape).capacity };
let newsize = current_capacity * 2;

// We need to add an extended table to the object
// First, create an outgoing transition that increases the
// capacity
shape = unsafe {
rb_shape_transition_shape_capa(shape, newsize)
};

// Generate the C call so that runtime code will increase
// the capacity and set the buffer.
asm.ccall(rb_ensure_iv_list_size as *const u8,
vec![
recv,
Opnd::UImm(current_capacity.into()),
Opnd::UImm(newsize.into())
]
);

// Load the receiver again after the function call
recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF))
}
maximecb marked this conversation as resolved.
Show resolved Hide resolved

let new_shape_id = unsafe {
rb_shape_id(rb_shape_get_next(shape, comptime_receiver, ivar_name))
};

gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, needs_extension);

asm.comment("write shape");

let shape_bit_size = unsafe { rb_shape_id_num_bits() }; // either 16 or 32 depending on RUBY_DEBUG
let shape_id_offset = unsafe { rb_shape_id_offset() };
let shape_opnd = Opnd::mem(shape_bit_size, recv, shape_id_offset);

// Store the new shape
asm.store(shape_opnd, Opnd::UImm(new_shape_id as u64));
},

Some(ivar_index) => {
// If the iv index already exists, then we don't need to
// transition to a new shape. The reason is because we find
// the iv index by searching up the shape tree. If we've
// made the transition already, then there's no reason to
// update the shape on the object. Just set the IV.
gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, false);
},
}

// If we know the stack value is an immediate, there's no need to
// generate WB code.
if !stack_type.is_imm() {
let skip_wb = asm.new_label("skip_wb");
// If the value we're writing is an immediate, we don't need to WB
asm.test(write_val, (RUBY_IMMEDIATE_MASK as u64).into());
asm.jnz(skip_wb);

// If the value we're writing is nil or false, we don't need to WB
asm.cmp(write_val, Qnil.into());
asm.jbe(skip_wb);

asm.comment("write barrier");
asm.ccall(
rb_gc_writebarrier as *const u8,
vec![
recv,
write_val,
]
);

asm.write_label(skip_wb);
}
}

KeepCompiling
}
Expand Down
24 changes: 23 additions & 1 deletion yjit/src/cruby.rs
Expand Up @@ -394,10 +394,32 @@ impl VALUE {
unsafe { CLASS_OF(self) }
}

pub fn shape_of(self) -> u32 {
pub fn is_frozen(self) -> bool {
unsafe { rb_obj_frozen_p(self) != VALUE(0) }
}

pub fn shape_id_of(self) -> u32 {
unsafe { rb_shape_get_shape_id(self) }
}

pub fn shape_of(self) -> *mut rb_shape {
unsafe {
let shape = rb_shape_get_shape_by_id(self.shape_id_of());

if shape.is_null() {
panic!("Shape should not be null");
} else {
shape
}
}
}

pub fn embedded_p(self) -> bool {
unsafe {
FL_TEST_RAW(self, VALUE(ROBJECT_EMBED as usize)) != VALUE(0)
}
}

pub fn as_isize(self) -> isize {
let VALUE(is) = self;
is as isize
Expand Down