Skip to content

Commit a1c61f0

Browse files
committed
YJIT: Use u32 for CodePtr to save 4 bytes each
We've long had a size restriction on the code memory region such that a u32 could refer to everything. This commit capitalizes on this restriction by shrinking the size of `CodePtr` to be 4 bytes from 8. To derive a full raw pointer from a `CodePtr`, one needs a base pointer. Both `CodeBlock` and `VirtualMemory` can be used for this purpose. The base pointer is readily available everywhere, except for in the case of the `jit_return` "branch". Generalize lea_label() to lea_jump_target() in the IR to delay deriving the `jit_return` address until `compile()`, when the base pointer is available. On railsbench, this yields roughly a 1% reduction to `yjit_alloc_size` (58,397,765 to 57,742,248).
1 parent aa6642d commit a1c61f0

File tree

14 files changed

+175
-161
lines changed

14 files changed

+175
-161
lines changed

yjit/src/asm/mod.rs

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,10 @@ impl CodeBlock {
325325
/// Return the address ranges of a given address range that this CodeBlock can write.
326326
#[allow(dead_code)]
327327
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
328-
let region_start = self.get_ptr(0).into_usize();
329-
let region_end = self.get_ptr(self.get_mem_size()).into_usize();
330-
let mut start = start_ptr.into_usize();
331-
let end = std::cmp::min(end_ptr.into_usize(), region_end);
328+
let region_start = self.get_ptr(0).raw_addr(self);
329+
let region_end = self.get_ptr(self.get_mem_size()).raw_addr(self);
330+
let mut start = start_ptr.raw_addr(self);
331+
let end = std::cmp::min(end_ptr.raw_addr(self), region_end);
332332

333333
let freed_pages = self.freed_pages.as_ref().as_ref();
334334
let mut addrs = vec![];
@@ -366,7 +366,7 @@ impl CodeBlock {
366366
/// If not, this becomes an inline no-op.
367367
#[cfg(feature = "disasm")]
368368
pub fn add_comment(&mut self, comment: &str) {
369-
let cur_ptr = self.get_write_ptr().into_usize();
369+
let cur_ptr = self.get_write_ptr().raw_addr(self);
370370

371371
// If there's no current list of comments for this line number, add one.
372372
let this_line_comments = self.asm_comments.entry(cur_ptr).or_default();
@@ -388,7 +388,7 @@ impl CodeBlock {
388388
#[allow(unused_variables)]
389389
#[cfg(feature = "disasm")]
390390
pub fn remove_comments(&mut self, start_addr: CodePtr, end_addr: CodePtr) {
391-
for addr in start_addr.into_usize()..end_addr.into_usize() {
391+
for addr in start_addr.raw_addr(self)..end_addr.raw_addr(self) {
392392
self.asm_comments.remove(&addr);
393393
}
394394
}
@@ -424,8 +424,8 @@ impl CodeBlock {
424424

425425
// Set the current write position from a pointer
426426
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
427-
let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize();
428-
self.set_pos(pos);
427+
let pos = code_ptr.as_offset() - self.mem_block.borrow().start_ptr().as_offset();
428+
self.set_pos(pos.try_into().unwrap());
429429
}
430430

431431
/// Get a (possibly dangling) direct pointer into the executable memory block
@@ -435,19 +435,19 @@ impl CodeBlock {
435435

436436
/// Convert an address range to memory page indexes against a num_pages()-sized array.
437437
pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> {
438-
let mem_start = self.mem_block.borrow().start_ptr().into_usize();
439-
let mem_end = self.mem_block.borrow().mapped_end_ptr().into_usize();
440-
assert!(mem_start <= start_addr.into_usize());
441-
assert!(start_addr.into_usize() <= end_addr.into_usize());
442-
assert!(end_addr.into_usize() <= mem_end);
438+
let mem_start = self.mem_block.borrow().start_ptr().raw_addr(self);
439+
let mem_end = self.mem_block.borrow().mapped_end_ptr().raw_addr(self);
440+
assert!(mem_start <= start_addr.raw_addr(self));
441+
assert!(start_addr.raw_addr(self) <= end_addr.raw_addr(self));
442+
assert!(end_addr.raw_addr(self) <= mem_end);
443443

444444
// Ignore empty code ranges
445445
if start_addr == end_addr {
446446
return vec![];
447447
}
448448

449-
let start_page = (start_addr.into_usize() - mem_start) / self.page_size;
450-
let end_page = (end_addr.into_usize() - mem_start - 1) / self.page_size;
449+
let start_page = (start_addr.raw_addr(self) - mem_start) / self.page_size;
450+
let end_page = (end_addr.raw_addr(self) - mem_start - 1) / self.page_size;
451451
(start_page..=end_page).collect() // TODO: consider returning an iterator
452452
}
453453

@@ -716,13 +716,20 @@ impl CodeBlock {
716716
impl fmt::LowerHex for CodeBlock {
717717
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
718718
for pos in 0..self.write_pos {
719-
let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() };
719+
let mem_block = &*self.mem_block.borrow();
720+
let byte = unsafe { mem_block.start_ptr().raw_ptr(mem_block).add(pos).read() };
720721
fmtr.write_fmt(format_args!("{:02x}", byte))?;
721722
}
722723
Ok(())
723724
}
724725
}
725726

727+
impl crate::virtualmem::CodePtrBase for CodeBlock {
728+
fn base_ptr(&self) -> std::ptr::NonNull<u8> {
729+
self.mem_block.borrow().base_ptr()
730+
}
731+
}
732+
726733
/// Wrapper struct so we can use the type system to distinguish
727734
/// Between the inlined and outlined code blocks
728735
pub struct OutlinedCb {

yjit/src/asm/x86_64/mod.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -362,11 +362,6 @@ pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd
362362
uimm_opnd(ptr as u64)
363363
}
364364

365-
pub fn code_ptr_opnd(code_ptr: CodePtr) -> X86Opnd
366-
{
367-
uimm_opnd(code_ptr.raw_ptr() as u64)
368-
}
369-
370365
/// Write the REX byte
371366
fn write_rex(cb: &mut CodeBlock, w_flag: bool, reg_no: u8, idx_reg_no: u8, rm_reg_no: u8) {
372367
// 0 1 0 0 w r x b
@@ -696,7 +691,7 @@ pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) {
696691
let end_ptr = cb.get_ptr(cb.write_pos + 5);
697692

698693
// Compute the jump offset
699-
let rel64: i64 = dst_ptr as i64 - end_ptr.into_i64();
694+
let rel64: i64 = dst_ptr as i64 - end_ptr.raw_ptr(cb) as i64;
700695

701696
// If the offset fits in 32-bit
702697
if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
@@ -897,7 +892,7 @@ fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
897892
let end_ptr = cb.get_ptr(cb.write_pos + 4);
898893

899894
// Compute the jump offset
900-
let rel64 = dst_ptr.into_i64() - end_ptr.into_i64();
895+
let rel64 = dst_ptr.as_offset() - end_ptr.as_offset();
901896

902897
if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
903898
// Write the relative 32-bit jump offset

yjit/src/asm/x86_64/tests.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ fn test_call_ptr() {
6868
// calling a lower address
6969
check_bytes("e8fbffffff", |cb| {
7070
let ptr = cb.get_write_ptr();
71-
call_ptr(cb, RAX, ptr.raw_ptr());
71+
call_ptr(cb, RAX, ptr.raw_ptr(cb));
7272
});
7373
}
7474

@@ -442,15 +442,15 @@ fn basic_capstone_usage() -> std::result::Result<(), capstone::Error> {
442442
fn block_comments() {
443443
let mut cb = super::CodeBlock::new_dummy(4096);
444444

445-
let first_write_ptr = cb.get_write_ptr().into_usize();
445+
let first_write_ptr = cb.get_write_ptr().raw_addr(&cb);
446446
cb.add_comment("Beginning");
447447
xor(&mut cb, EAX, EAX); // 2 bytes long
448-
let second_write_ptr = cb.get_write_ptr().into_usize();
448+
let second_write_ptr = cb.get_write_ptr().raw_addr(&cb);
449449
cb.add_comment("Two bytes in");
450450
cb.add_comment("Still two bytes in");
451451
cb.add_comment("Still two bytes in"); // Duplicate, should be ignored
452452
test(&mut cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)); // 8 bytes long
453-
let third_write_ptr = cb.get_write_ptr().into_usize();
453+
let third_write_ptr = cb.get_write_ptr().raw_addr(&cb);
454454
cb.add_comment("Ten bytes in");
455455

456456
assert_eq!(&vec!( "Beginning".to_string() ), cb.comments_at(first_write_ptr).unwrap());

yjit/src/backend/arm64/mod.rs

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use crate::asm::arm64::*;
55
use crate::cruby::*;
66
use crate::backend::ir::*;
77
use crate::virtualmem::CodePtr;
8+
use crate::utils::*;
89

910
// Use the arm64 register type for this platform
1011
pub type Reg = A64Reg;
@@ -97,13 +98,13 @@ fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
9798
#[cfg(not(test))]
9899
{
99100
let end = cb.get_write_ptr();
100-
unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) };
101+
unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) };
101102
}
102103
}
103104

104105
fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
105-
let src_addr = cb.get_write_ptr().into_i64();
106-
let dst_addr = dst_ptr.into_i64();
106+
let src_addr = cb.get_write_ptr().as_offset();
107+
let dst_addr = dst_ptr.as_offset();
107108

108109
// If the offset is short enough, then we'll use the
109110
// branch instruction. Otherwise, we'll move the
@@ -716,8 +717,8 @@ impl Assembler
716717
fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) {
717718
match target {
718719
Target::CodePtr(dst_ptr) | Target::SideExitPtr(dst_ptr) => {
719-
let dst_addr = dst_ptr.into_i64();
720-
let src_addr = cb.get_write_ptr().into_i64();
720+
let dst_addr = dst_ptr.as_offset();
721+
let src_addr = cb.get_write_ptr().as_offset();
721722

722723
let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) {
723724
// If the jump offset fits into the conditional jump as
@@ -746,7 +747,7 @@ impl Assembler
746747
} else {
747748
// Otherwise, we need to load the address into a
748749
// register and use the branch register instruction.
749-
let dst_addr = dst_ptr.into_u64();
750+
let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64();
750751
let load_insns: i32 = emit_load_size(dst_addr).into();
751752

752753
// We're going to write out the inverse condition so
@@ -1023,14 +1024,20 @@ impl Assembler
10231024
}
10241025
};
10251026
},
1026-
Insn::LeaLabel { out, target, .. } => {
1027-
let label_idx = target.unwrap_label_idx();
1027+
Insn::LeaJumpTarget { out, target, .. } => {
1028+
if let Target::Label(label_idx) = target {
1029+
// Set output to the raw address of the label
1030+
cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
1031+
adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
1032+
});
10281033

1029-
cb.label_ref(label_idx, 4, |cb, end_addr, dst_addr| {
1030-
adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
1031-
});
1032-
1033-
mov(cb, out.into(), Self::SCRATCH0);
1034+
mov(cb, out.into(), Self::SCRATCH0);
1035+
} else {
1036+
// Set output to the jump target's raw address
1037+
let target_code = target.unwrap_code_ptr();
1038+
let target_addr = target_code.raw_addr(cb).as_u64();
1039+
emit_load_value(cb, out.into(), target_addr);
1040+
}
10341041
},
10351042
Insn::CPush(opnd) => {
10361043
emit_push(cb, opnd.into());
@@ -1065,7 +1072,7 @@ impl Assembler
10651072
},
10661073
Insn::CCall { fptr, .. } => {
10671074
// The offset to the call target in bytes
1068-
let src_addr = cb.get_write_ptr().into_i64();
1075+
let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64;
10691076
let dst_addr = *fptr as i64;
10701077

10711078
// Use BL if the offset is short enough to encode as an immediate.
@@ -1317,8 +1324,7 @@ mod tests {
13171324
fn test_emit_je_fits_into_bcond() {
13181325
let (mut asm, mut cb) = setup_asm();
13191326

1320-
let offset = 80;
1321-
let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into();
1327+
let target: CodePtr = cb.get_write_ptr().add_bytes(80);
13221328

13231329
asm.je(Target::CodePtr(target));
13241330
asm.compile_with_num_regs(&mut cb, 0);
@@ -1329,7 +1335,7 @@ mod tests {
13291335
let (mut asm, mut cb) = setup_asm();
13301336

13311337
let offset = 1 << 21;
1332-
let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into();
1338+
let target: CodePtr = cb.get_write_ptr().add_bytes(offset);
13331339

13341340
asm.je(Target::CodePtr(target));
13351341
asm.compile_with_num_regs(&mut cb, 0);
@@ -1340,7 +1346,7 @@ mod tests {
13401346
let (mut asm, mut cb) = setup_asm();
13411347

13421348
let label = asm.new_label("label");
1343-
let opnd = asm.lea_label(label);
1349+
let opnd = asm.lea_jump_target(label);
13441350

13451351
asm.write_label(label);
13461352
asm.bake_string("Hello, world!");
@@ -1590,7 +1596,7 @@ mod tests {
15901596
assert!(gap > 0b1111111111111111111);
15911597

15921598
let instruction_at_starting_pos: [u8; 4] = unsafe {
1593-
std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(), 4)
1599+
std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(&cb), 4)
15941600
}.try_into().unwrap();
15951601
assert_eq!(
15961602
0b000101 << 26_u32,

yjit/src/backend/ir.rs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,8 @@ pub enum Insn {
447447
// Add a label into the IR at the point that this instruction is added.
448448
Label(Target),
449449

450-
// Load effective address relative to the current instruction pointer. It
451-
// accepts a single signed immediate operand.
452-
LeaLabel { target: Target, out: Opnd },
450+
/// Get the code address of a jump target
451+
LeaJumpTarget { target: Target, out: Opnd },
453452

454453
// Load effective address
455454
Lea { opnd: Opnd, out: Opnd },
@@ -539,7 +538,7 @@ impl Insn {
539538
Insn::Jo(target) |
540539
Insn::Jz(target) |
541540
Insn::Label(target) |
542-
Insn::LeaLabel { target, .. } => {
541+
Insn::LeaJumpTarget { target, .. } => {
543542
Some(target)
544543
}
545544
_ => None,
@@ -587,7 +586,7 @@ impl Insn {
587586
Insn::JoMul(_) => "JoMul",
588587
Insn::Jz(_) => "Jz",
589588
Insn::Label(_) => "Label",
590-
Insn::LeaLabel { .. } => "LeaLabel",
589+
Insn::LeaJumpTarget { .. } => "LeaJumpTarget",
591590
Insn::Lea { .. } => "Lea",
592591
Insn::LiveReg { .. } => "LiveReg",
593592
Insn::Load { .. } => "Load",
@@ -626,7 +625,7 @@ impl Insn {
626625
Insn::CSelNZ { out, .. } |
627626
Insn::CSelZ { out, .. } |
628627
Insn::Lea { out, .. } |
629-
Insn::LeaLabel { out, .. } |
628+
Insn::LeaJumpTarget { out, .. } |
630629
Insn::LiveReg { out, .. } |
631630
Insn::Load { out, .. } |
632631
Insn::LoadSExt { out, .. } |
@@ -659,7 +658,7 @@ impl Insn {
659658
Insn::CSelNZ { out, .. } |
660659
Insn::CSelZ { out, .. } |
661660
Insn::Lea { out, .. } |
662-
Insn::LeaLabel { out, .. } |
661+
Insn::LeaJumpTarget { out, .. } |
663662
Insn::LiveReg { out, .. } |
664663
Insn::Load { out, .. } |
665664
Insn::LoadSExt { out, .. } |
@@ -688,7 +687,7 @@ impl Insn {
688687
Insn::Jnz(target) |
689688
Insn::Jo(target) |
690689
Insn::Jz(target) |
691-
Insn::LeaLabel { target, .. } => Some(target),
690+
Insn::LeaJumpTarget { target, .. } => Some(target),
692691
_ => None
693692
}
694693
}
@@ -741,7 +740,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
741740
Insn::JoMul(_) |
742741
Insn::Jz(_) |
743742
Insn::Label(_) |
744-
Insn::LeaLabel { .. } |
743+
Insn::LeaJumpTarget { .. } |
745744
Insn::PadInvalPatch |
746745
Insn::PosMarker(_) => None,
747746
Insn::CPopInto(opnd) |
@@ -842,7 +841,7 @@ impl<'a> InsnOpndMutIterator<'a> {
842841
Insn::JoMul(_) |
843842
Insn::Jz(_) |
844843
Insn::Label(_) |
845-
Insn::LeaLabel { .. } |
844+
Insn::LeaJumpTarget { .. } |
846845
Insn::PadInvalPatch |
847846
Insn::PosMarker(_) => None,
848847
Insn::CPopInto(opnd) |
@@ -1830,9 +1829,9 @@ impl Assembler {
18301829
}
18311830

18321831
#[must_use]
1833-
pub fn lea_label(&mut self, target: Target) -> Opnd {
1832+
pub fn lea_jump_target(&mut self, target: Target) -> Opnd {
18341833
let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS);
1835-
self.push_insn(Insn::LeaLabel { target, out });
1834+
self.push_insn(Insn::LeaJumpTarget { target, out });
18361835
out
18371836
}
18381837

yjit/src/backend/tests.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ fn test_jcc_ptr()
231231
{
232232
let (mut asm, mut cb) = setup_asm();
233233

234-
let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
234+
let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
235235
let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK));
236236
asm.test(
237237
Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
@@ -248,7 +248,7 @@ fn test_jmp_ptr()
248248
{
249249
let (mut asm, mut cb) = setup_asm();
250250

251-
let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
251+
let stub = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
252252
asm.jmp(stub);
253253

254254
asm.compile_with_num_regs(&mut cb, 0);
@@ -259,7 +259,7 @@ fn test_jo()
259259
{
260260
let (mut asm, mut cb) = setup_asm();
261261

262-
let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
262+
let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
263263

264264
let arg1 = Opnd::mem(64, SP, 0);
265265
let arg0 = Opnd::mem(64, SP, 8);

0 commit comments

Comments
 (0)