Skip to content

Commit

Permalink
YJIT: Use u32 for CodePtr to save 4 bytes each
Browse files Browse the repository at this point in the history
We've long had a size restriction on the code memory region such that a
u32 could refer to everything. This commit capitalizes on this
restriction by shrinking the size of `CodePtr` to be 4 bytes from 8.

To derive a full raw pointer from a `CodePtr`, one needs a base pointer.
Both `CodeBlock` and `VirtualMemory` can be used for this purpose. The
base pointer is readily available everywhere, except for in the case of
the `jit_return` "branch". Generalize lea_label() to lea_jump_target()
in the IR to delay deriving the `jit_return` address until `compile()`,
when the base pointer is available.

On railsbench, this yields roughly a 1% reduction to `yjit_alloc_size`
(58,397,765 to 57,742,248).
  • Loading branch information
XrXr committed Nov 7, 2023
1 parent aa6642d commit a1c61f0
Show file tree
Hide file tree
Showing 14 changed files with 175 additions and 161 deletions.
39 changes: 23 additions & 16 deletions yjit/src/asm/mod.rs
Expand Up @@ -325,10 +325,10 @@ impl CodeBlock {
/// Return the address ranges of a given address range that this CodeBlock can write.
#[allow(dead_code)]
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
let region_start = self.get_ptr(0).into_usize();
let region_end = self.get_ptr(self.get_mem_size()).into_usize();
let mut start = start_ptr.into_usize();
let end = std::cmp::min(end_ptr.into_usize(), region_end);
let region_start = self.get_ptr(0).raw_addr(self);
let region_end = self.get_ptr(self.get_mem_size()).raw_addr(self);
let mut start = start_ptr.raw_addr(self);
let end = std::cmp::min(end_ptr.raw_addr(self), region_end);

let freed_pages = self.freed_pages.as_ref().as_ref();
let mut addrs = vec![];
Expand Down Expand Up @@ -366,7 +366,7 @@ impl CodeBlock {
/// If not, this becomes an inline no-op.
#[cfg(feature = "disasm")]
pub fn add_comment(&mut self, comment: &str) {
let cur_ptr = self.get_write_ptr().into_usize();
let cur_ptr = self.get_write_ptr().raw_addr(self);

// If there's no current list of comments for this line number, add one.
let this_line_comments = self.asm_comments.entry(cur_ptr).or_default();
Expand All @@ -388,7 +388,7 @@ impl CodeBlock {
#[allow(unused_variables)]
#[cfg(feature = "disasm")]
pub fn remove_comments(&mut self, start_addr: CodePtr, end_addr: CodePtr) {
for addr in start_addr.into_usize()..end_addr.into_usize() {
for addr in start_addr.raw_addr(self)..end_addr.raw_addr(self) {
self.asm_comments.remove(&addr);
}
}
Expand Down Expand Up @@ -424,8 +424,8 @@ impl CodeBlock {

// Set the current write position from a pointer
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize();
self.set_pos(pos);
let pos = code_ptr.as_offset() - self.mem_block.borrow().start_ptr().as_offset();
self.set_pos(pos.try_into().unwrap());
}

/// Get a (possibly dangling) direct pointer into the executable memory block
Expand All @@ -435,19 +435,19 @@ impl CodeBlock {

/// Convert an address range to memory page indexes against a num_pages()-sized array.
pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> {
let mem_start = self.mem_block.borrow().start_ptr().into_usize();
let mem_end = self.mem_block.borrow().mapped_end_ptr().into_usize();
assert!(mem_start <= start_addr.into_usize());
assert!(start_addr.into_usize() <= end_addr.into_usize());
assert!(end_addr.into_usize() <= mem_end);
let mem_start = self.mem_block.borrow().start_ptr().raw_addr(self);
let mem_end = self.mem_block.borrow().mapped_end_ptr().raw_addr(self);
assert!(mem_start <= start_addr.raw_addr(self));
assert!(start_addr.raw_addr(self) <= end_addr.raw_addr(self));
assert!(end_addr.raw_addr(self) <= mem_end);

// Ignore empty code ranges
if start_addr == end_addr {
return vec![];
}

let start_page = (start_addr.into_usize() - mem_start) / self.page_size;
let end_page = (end_addr.into_usize() - mem_start - 1) / self.page_size;
let start_page = (start_addr.raw_addr(self) - mem_start) / self.page_size;
let end_page = (end_addr.raw_addr(self) - mem_start - 1) / self.page_size;
(start_page..=end_page).collect() // TODO: consider returning an iterator
}

Expand Down Expand Up @@ -716,13 +716,20 @@ impl CodeBlock {
impl fmt::LowerHex for CodeBlock {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
for pos in 0..self.write_pos {
let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() };
let mem_block = &*self.mem_block.borrow();
let byte = unsafe { mem_block.start_ptr().raw_ptr(mem_block).add(pos).read() };
fmtr.write_fmt(format_args!("{:02x}", byte))?;
}
Ok(())
}
}

impl crate::virtualmem::CodePtrBase for CodeBlock {
fn base_ptr(&self) -> std::ptr::NonNull<u8> {
self.mem_block.borrow().base_ptr()
}
}

/// Wrapper struct so we can use the type system to distinguish
/// Between the inlined and outlined code blocks
pub struct OutlinedCb {
Expand Down
9 changes: 2 additions & 7 deletions yjit/src/asm/x86_64/mod.rs
Expand Up @@ -362,11 +362,6 @@ pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd
uimm_opnd(ptr as u64)
}

pub fn code_ptr_opnd(code_ptr: CodePtr) -> X86Opnd
{
uimm_opnd(code_ptr.raw_ptr() as u64)
}

/// Write the REX byte
fn write_rex(cb: &mut CodeBlock, w_flag: bool, reg_no: u8, idx_reg_no: u8, rm_reg_no: u8) {
// 0 1 0 0 w r x b
Expand Down Expand Up @@ -696,7 +691,7 @@ pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) {
let end_ptr = cb.get_ptr(cb.write_pos + 5);

// Compute the jump offset
let rel64: i64 = dst_ptr as i64 - end_ptr.into_i64();
let rel64: i64 = dst_ptr as i64 - end_ptr.raw_ptr(cb) as i64;

// If the offset fits in 32-bit
if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
Expand Down Expand Up @@ -897,7 +892,7 @@ fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
let end_ptr = cb.get_ptr(cb.write_pos + 4);

// Compute the jump offset
let rel64 = dst_ptr.into_i64() - end_ptr.into_i64();
let rel64 = dst_ptr.as_offset() - end_ptr.as_offset();

if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
// Write the relative 32-bit jump offset
Expand Down
8 changes: 4 additions & 4 deletions yjit/src/asm/x86_64/tests.rs
Expand Up @@ -68,7 +68,7 @@ fn test_call_ptr() {
// calling a lower address
check_bytes("e8fbffffff", |cb| {
let ptr = cb.get_write_ptr();
call_ptr(cb, RAX, ptr.raw_ptr());
call_ptr(cb, RAX, ptr.raw_ptr(cb));
});
}

Expand Down Expand Up @@ -442,15 +442,15 @@ fn basic_capstone_usage() -> std::result::Result<(), capstone::Error> {
fn block_comments() {
let mut cb = super::CodeBlock::new_dummy(4096);

let first_write_ptr = cb.get_write_ptr().into_usize();
let first_write_ptr = cb.get_write_ptr().raw_addr(&cb);
cb.add_comment("Beginning");
xor(&mut cb, EAX, EAX); // 2 bytes long
let second_write_ptr = cb.get_write_ptr().into_usize();
let second_write_ptr = cb.get_write_ptr().raw_addr(&cb);
cb.add_comment("Two bytes in");
cb.add_comment("Still two bytes in");
cb.add_comment("Still two bytes in"); // Duplicate, should be ignored
test(&mut cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)); // 8 bytes long
let third_write_ptr = cb.get_write_ptr().into_usize();
let third_write_ptr = cb.get_write_ptr().raw_addr(&cb);
cb.add_comment("Ten bytes in");

assert_eq!(&vec!( "Beginning".to_string() ), cb.comments_at(first_write_ptr).unwrap());
Expand Down
44 changes: 25 additions & 19 deletions yjit/src/backend/arm64/mod.rs
Expand Up @@ -5,6 +5,7 @@ use crate::asm::arm64::*;
use crate::cruby::*;
use crate::backend::ir::*;
use crate::virtualmem::CodePtr;
use crate::utils::*;

// Use the arm64 register type for this platform
pub type Reg = A64Reg;
Expand Down Expand Up @@ -97,13 +98,13 @@ fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
#[cfg(not(test))]
{
let end = cb.get_write_ptr();
unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) };
unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) };
}
}

fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
let src_addr = cb.get_write_ptr().into_i64();
let dst_addr = dst_ptr.into_i64();
let src_addr = cb.get_write_ptr().as_offset();
let dst_addr = dst_ptr.as_offset();

// If the offset is short enough, then we'll use the
// branch instruction. Otherwise, we'll move the
Expand Down Expand Up @@ -716,8 +717,8 @@ impl Assembler
fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) {
match target {
Target::CodePtr(dst_ptr) | Target::SideExitPtr(dst_ptr) => {
let dst_addr = dst_ptr.into_i64();
let src_addr = cb.get_write_ptr().into_i64();
let dst_addr = dst_ptr.as_offset();
let src_addr = cb.get_write_ptr().as_offset();

let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) {
// If the jump offset fits into the conditional jump as
Expand Down Expand Up @@ -746,7 +747,7 @@ impl Assembler
} else {
// Otherwise, we need to load the address into a
// register and use the branch register instruction.
let dst_addr = dst_ptr.into_u64();
let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64();
let load_insns: i32 = emit_load_size(dst_addr).into();

// We're going to write out the inverse condition so
Expand Down Expand Up @@ -1023,14 +1024,20 @@ impl Assembler
}
};
},
Insn::LeaLabel { out, target, .. } => {
let label_idx = target.unwrap_label_idx();
Insn::LeaJumpTarget { out, target, .. } => {
if let Target::Label(label_idx) = target {
// Set output to the raw address of the label
cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
});

cb.label_ref(label_idx, 4, |cb, end_addr, dst_addr| {
adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
});

mov(cb, out.into(), Self::SCRATCH0);
mov(cb, out.into(), Self::SCRATCH0);
} else {
// Set output to the jump target's raw address
let target_code = target.unwrap_code_ptr();
let target_addr = target_code.raw_addr(cb).as_u64();
emit_load_value(cb, out.into(), target_addr);
}
},
Insn::CPush(opnd) => {
emit_push(cb, opnd.into());
Expand Down Expand Up @@ -1065,7 +1072,7 @@ impl Assembler
},
Insn::CCall { fptr, .. } => {
// The offset to the call target in bytes
let src_addr = cb.get_write_ptr().into_i64();
let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64;
let dst_addr = *fptr as i64;

// Use BL if the offset is short enough to encode as an immediate.
Expand Down Expand Up @@ -1317,8 +1324,7 @@ mod tests {
fn test_emit_je_fits_into_bcond() {
let (mut asm, mut cb) = setup_asm();

let offset = 80;
let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into();
let target: CodePtr = cb.get_write_ptr().add_bytes(80);

asm.je(Target::CodePtr(target));
asm.compile_with_num_regs(&mut cb, 0);
Expand All @@ -1329,7 +1335,7 @@ mod tests {
let (mut asm, mut cb) = setup_asm();

let offset = 1 << 21;
let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into();
let target: CodePtr = cb.get_write_ptr().add_bytes(offset);

asm.je(Target::CodePtr(target));
asm.compile_with_num_regs(&mut cb, 0);
Expand All @@ -1340,7 +1346,7 @@ mod tests {
let (mut asm, mut cb) = setup_asm();

let label = asm.new_label("label");
let opnd = asm.lea_label(label);
let opnd = asm.lea_jump_target(label);

asm.write_label(label);
asm.bake_string("Hello, world!");
Expand Down Expand Up @@ -1590,7 +1596,7 @@ mod tests {
assert!(gap > 0b1111111111111111111);

let instruction_at_starting_pos: [u8; 4] = unsafe {
std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(), 4)
std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(&cb), 4)
}.try_into().unwrap();
assert_eq!(
0b000101 << 26_u32,
Expand Down
23 changes: 11 additions & 12 deletions yjit/src/backend/ir.rs
Expand Up @@ -447,9 +447,8 @@ pub enum Insn {
// Add a label into the IR at the point that this instruction is added.
Label(Target),

// Load effective address relative to the current instruction pointer. It
// accepts a single signed immediate operand.
LeaLabel { target: Target, out: Opnd },
/// Get the code address of a jump target
LeaJumpTarget { target: Target, out: Opnd },

// Load effective address
Lea { opnd: Opnd, out: Opnd },
Expand Down Expand Up @@ -539,7 +538,7 @@ impl Insn {
Insn::Jo(target) |
Insn::Jz(target) |
Insn::Label(target) |
Insn::LeaLabel { target, .. } => {
Insn::LeaJumpTarget { target, .. } => {
Some(target)
}
_ => None,
Expand Down Expand Up @@ -587,7 +586,7 @@ impl Insn {
Insn::JoMul(_) => "JoMul",
Insn::Jz(_) => "Jz",
Insn::Label(_) => "Label",
Insn::LeaLabel { .. } => "LeaLabel",
Insn::LeaJumpTarget { .. } => "LeaJumpTarget",
Insn::Lea { .. } => "Lea",
Insn::LiveReg { .. } => "LiveReg",
Insn::Load { .. } => "Load",
Expand Down Expand Up @@ -626,7 +625,7 @@ impl Insn {
Insn::CSelNZ { out, .. } |
Insn::CSelZ { out, .. } |
Insn::Lea { out, .. } |
Insn::LeaLabel { out, .. } |
Insn::LeaJumpTarget { out, .. } |
Insn::LiveReg { out, .. } |
Insn::Load { out, .. } |
Insn::LoadSExt { out, .. } |
Expand Down Expand Up @@ -659,7 +658,7 @@ impl Insn {
Insn::CSelNZ { out, .. } |
Insn::CSelZ { out, .. } |
Insn::Lea { out, .. } |
Insn::LeaLabel { out, .. } |
Insn::LeaJumpTarget { out, .. } |
Insn::LiveReg { out, .. } |
Insn::Load { out, .. } |
Insn::LoadSExt { out, .. } |
Expand Down Expand Up @@ -688,7 +687,7 @@ impl Insn {
Insn::Jnz(target) |
Insn::Jo(target) |
Insn::Jz(target) |
Insn::LeaLabel { target, .. } => Some(target),
Insn::LeaJumpTarget { target, .. } => Some(target),
_ => None
}
}
Expand Down Expand Up @@ -741,7 +740,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::JoMul(_) |
Insn::Jz(_) |
Insn::Label(_) |
Insn::LeaLabel { .. } |
Insn::LeaJumpTarget { .. } |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
Expand Down Expand Up @@ -842,7 +841,7 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::JoMul(_) |
Insn::Jz(_) |
Insn::Label(_) |
Insn::LeaLabel { .. } |
Insn::LeaJumpTarget { .. } |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
Expand Down Expand Up @@ -1830,9 +1829,9 @@ impl Assembler {
}

#[must_use]
pub fn lea_label(&mut self, target: Target) -> Opnd {
pub fn lea_jump_target(&mut self, target: Target) -> Opnd {
let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS);
self.push_insn(Insn::LeaLabel { target, out });
self.push_insn(Insn::LeaJumpTarget { target, out });
out
}

Expand Down
6 changes: 3 additions & 3 deletions yjit/src/backend/tests.rs
Expand Up @@ -231,7 +231,7 @@ fn test_jcc_ptr()
{
let (mut asm, mut cb) = setup_asm();

let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK));
asm.test(
Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
Expand All @@ -248,7 +248,7 @@ fn test_jmp_ptr()
{
let (mut asm, mut cb) = setup_asm();

let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
let stub = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
asm.jmp(stub);

asm.compile_with_num_regs(&mut cb, 0);
Expand All @@ -259,7 +259,7 @@ fn test_jo()
{
let (mut asm, mut cb) = setup_asm();

let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4));

let arg1 = Opnd::mem(64, SP, 0);
let arg0 = Opnd::mem(64, SP, 8);
Expand Down

0 comments on commit a1c61f0

Please sign in to comment.