diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 9787b1a4e9fd7a..d3a386d42ec776 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -74,7 +74,6 @@ impl From for A64Opnd { Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => { panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") }, - Opnd::CArg(_) => panic!("attempted to lower an Opnd::CArg"), Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), Opnd::Stack { .. } => panic!("attempted to lower an Opnd::Stack"), @@ -186,10 +185,9 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { impl Assembler { - // Special scratch registers for intermediate processing. + // A special scratch register for intermediate processing. // This register is caller-saved (so we don't have to save it before using it) - pub const SCRATCH_REG: Reg = X16_REG; - const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG); + const SCRATCH0: A64Opnd = A64Opnd::Reg(X16_REG); const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG); /// List of registers that can be used for stack temps. @@ -282,7 +280,7 @@ impl Assembler /// do follow that encoding, and if they don't then we load them first. fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd { match opnd { - Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd, + Opnd::Reg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd, Opnd::Mem(_) => split_load_operand(asm, opnd), Opnd::Imm(imm) => { if imm == 0 { @@ -315,7 +313,7 @@ impl Assembler /// a certain size. If they don't then we need to load them first. fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { - Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, Opnd::Mem(_) => split_load_operand(asm, opnd), Opnd::Imm(_) => asm.load(opnd), Opnd::UImm(uimm) => { @@ -454,7 +452,7 @@ impl Assembler _ => *opnd }; - asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), value); + asm.load_into(C_ARG_OPNDS[idx], value); } // Now we push the CCall without any arguments so that it @@ -926,9 +924,6 @@ impl Assembler let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); insn_gc_offsets.push(ptr_offset); }, - Opnd::CArg { .. } => { - unreachable!("C argument operand was not lowered before arm64_emit"); - } Opnd::Stack { .. } => { unreachable!("Stack operand was not lowered before arm64_emit"); } diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index d09bf892abd997..8c4a6c1b6d0ddc 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -72,9 +72,6 @@ pub enum Opnd // Immediate Ruby value, may be GC'd, movable Value(VALUE), - /// C argument register. The alloc_regs resolves its register dependencies. - CArg(Reg), - // Output of a preceding instruction in this block InsnOut{ idx: usize, num_bits: u8 }, @@ -105,7 +102,6 @@ impl fmt::Debug for Opnd { match self { Self::None => write!(fmt, "None"), Value(val) => write!(fmt, "Value({val:?})"), - CArg(reg) => write!(fmt, "CArg({reg:?})"), Stack { idx, sp_offset, .. } => write!(fmt, "SP[{}]", *sp_offset as i32 - idx - 1), InsnOut { idx, num_bits } => write!(fmt, "Out{num_bits}({idx})"), Imm(signed) => write!(fmt, "{signed:x}_i64"), @@ -149,14 +145,6 @@ impl Opnd Opnd::UImm(ptr as u64) } - /// Constructor for a C argument operand - pub fn c_arg(reg_opnd: Opnd) -> Self { - match reg_opnd { - Opnd::Reg(reg) => Opnd::CArg(reg), - _ => unreachable!(), - } - } - pub fn is_some(&self) -> bool { match *self { Opnd::None => false, @@ -1236,55 +1224,6 @@ impl Assembler } } - // Reorder C argument moves, sometimes adding extra moves using SCRATCH_REG, - // so that they will not rewrite each other before they are used. - fn reorder_c_args(c_args: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> { - // Return the index of a move whose destination is not used as a source if any. - fn find_safe_arg(c_args: &Vec<(Reg, Opnd)>) -> Option { - c_args.iter().enumerate().find(|(_, &(dest_reg, _))| { - c_args.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg)) - }).map(|(index, _)| index) - } - - // Remove moves whose source and destination are the same - let mut c_args: Vec<(Reg, Opnd)> = c_args.clone().into_iter() - .filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect(); - - let mut moves = vec![]; - while c_args.len() > 0 { - // Keep taking safe moves - while let Some(index) = find_safe_arg(&c_args) { - moves.push(c_args.remove(index)); - } - - // No safe move. Load the source of one move into SCRATCH_REG, and - // then load SCRATCH_REG into the destination when it's safe. - if c_args.len() > 0 { - // Make sure it's safe to use SCRATCH_REG - assert!(c_args.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG))); - - // Move SCRATCH <- opnd, and delay reg <- SCRATCH - let (reg, opnd) = c_args.remove(0); - moves.push((Assembler::SCRATCH_REG, opnd)); - c_args.push((reg, Opnd::Reg(Assembler::SCRATCH_REG))); - } - } - moves - } - - // Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes. - fn shift_live_ranges(live_ranges: &mut Vec, start_index: usize, shift_offset: isize) { - if shift_offset >= 0 { - for index in 0..(shift_offset as usize) { - live_ranges.insert(start_index + index, start_index + index); - } - } else { - for _ in 0..-shift_offset { - live_ranges.remove(start_index); - } - } - } - // Dump live registers for register spill debugging. fn dump_live_regs(insns: Vec, live_ranges: Vec, num_regs: usize, spill_index: usize) { // Convert live_ranges to live_regs: the number of live registers at each index @@ -1308,18 +1247,11 @@ impl Assembler } } - // We may need to reorder LoadInto instructions with a C argument operand. - // This buffers the operands of such instructions to process them in batches. - let mut c_args: Vec<(Reg, Opnd)> = vec![]; - - // live_ranges is indexed by original `index` given by the iterator. let live_ranges: Vec = take(&mut self.live_ranges); - // shifted_live_ranges is indexed by mapped indexes in insn operands. - let mut shifted_live_ranges: Vec = live_ranges.clone(); let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); let mut iterator = self.into_draining_iter(); - while let Some((index, mut insn)) = iterator.next_mapped() { + while let Some((index, mut insn)) = iterator.next_unmapped() { // Check if this is the last instruction that uses an operand that // spans more than one instruction. In that case, return the // allocated register to the pool. @@ -1330,11 +1262,12 @@ impl Assembler // Since we have an InsnOut, we know it spans more that one // instruction. let start_index = *idx; + assert!(start_index < index); // We're going to check if this is the last instruction that // uses this operand. If it is, we can return the allocated // register to the pool. - if shifted_live_ranges[start_index] == index { + if live_ranges[start_index] == index { if let Some(Opnd::Reg(reg)) = asm.insns[start_index].out_opnd() { dealloc_reg(&mut pool, ®s, reg); } else { @@ -1438,27 +1371,7 @@ impl Assembler } } - // Push instruction(s). Batch and reorder C argument operations if needed. - if let Insn::LoadInto { dest: Opnd::CArg(reg), opnd } = insn { - // Buffer C arguments - c_args.push((reg, opnd)); - } else { - // C arguments are buffered until CCall - if c_args.len() > 0 { - // Resolve C argument dependencies - let c_args_len = c_args.len() as isize; - let moves = reorder_c_args(&c_args.drain(..).into_iter().collect()); - shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len); - - // Push batched C arguments - for (reg, opnd) in moves { - asm.load_into(Opnd::Reg(reg), opnd); - } - } - // Other instructions are pushed as is - asm.push_insn(insn); - } - iterator.map_insn_index(&mut asm); + asm.push_insn(insn); } assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); @@ -1529,7 +1442,7 @@ impl AssemblerDrainingIterator { /// end of the current list of instructions in order to maintain that /// alignment. pub fn map_insn_index(&mut self, asm: &mut Assembler) { - self.indices.push(asm.insns.len().saturating_sub(1)); + self.indices.push(asm.insns.len() - 1); } /// Map an operand by using this iterator's list of mapped indices. diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index b40d8b2382d253..a2ee94cf66700c 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -87,9 +87,9 @@ impl From<&Opnd> for X86Opnd { impl Assembler { // A special scratch register for intermediate processing. - // This register is caller-saved (so we don't have to save it before using it) - pub const SCRATCH_REG: Reg = R11_REG; - const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); + // Note: right now this is only used by LeaLabel because label_ref accepts + // a closure and we don't want it to have to capture anything. + const SCRATCH0: X86Opnd = X86Opnd::Reg(R11_REG); /// List of registers that can be used for stack temps. pub const TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG]; @@ -347,7 +347,7 @@ impl Assembler // Load each operand into the corresponding argument // register. for (idx, opnd) in opnds.into_iter().enumerate() { - asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd); + asm.load_into(C_ARG_OPNDS[idx], *opnd); } // Now we push the CCall without any arguments so that it @@ -1055,118 +1055,4 @@ mod tests { assert_eq!(format!("{:x}", cb), "4983f540"); } - - #[test] - fn test_reorder_c_args_no_cycle() { - let (mut asm, mut cb) = setup_asm(); - - asm.ccall(0 as _, vec![ - C_ARG_OPNDS[0], // mov rdi, rdi (optimized away) - C_ARG_OPNDS[1], // mov rsi, rsi (optimized away) - ]); - asm.compile_with_num_regs(&mut cb, 0); - - assert_disasm!(cb, "b800000000ffd0", {" - 0x0: mov eax, 0 - 0x5: call rax - "}); - } - - #[test] - fn test_reorder_c_args_single_cycle() { - let (mut asm, mut cb) = setup_asm(); - - // rdi and rsi form a cycle - asm.ccall(0 as _, vec![ - C_ARG_OPNDS[1], // mov rdi, rsi - C_ARG_OPNDS[0], // mov rsi, rdi - C_ARG_OPNDS[2], // mov rdx, rdx (optimized away) - ]); - asm.compile_with_num_regs(&mut cb, 0); - - assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {" - 0x0: mov r11, rsi - 0x3: mov rsi, rdi - 0x6: mov rdi, r11 - 0x9: mov eax, 0 - 0xe: call rax - "}); - } - - #[test] - fn test_reorder_c_args_two_cycles() { - let (mut asm, mut cb) = setup_asm(); - - // rdi and rsi form a cycle, and rdx and rcx form another cycle - asm.ccall(0 as _, vec![ - C_ARG_OPNDS[1], // mov rdi, rsi - C_ARG_OPNDS[0], // mov rsi, rdi - C_ARG_OPNDS[3], // mov rdx, rcx - C_ARG_OPNDS[2], // mov rcx, rdx - ]); - asm.compile_with_num_regs(&mut cb, 0); - - assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {" - 0x0: mov r11, rsi - 0x3: mov rsi, rdi - 0x6: mov rdi, r11 - 0x9: mov r11, rcx - 0xc: mov rcx, rdx - 0xf: mov rdx, r11 - 0x12: mov eax, 0 - 0x17: call rax - "}); - } - - #[test] - fn test_reorder_c_args_large_cycle() { - let (mut asm, mut cb) = setup_asm(); - - // rdi, rsi, and rdx form a cycle - asm.ccall(0 as _, vec![ - C_ARG_OPNDS[1], // mov rdi, rsi - C_ARG_OPNDS[2], // mov rsi, rdx - C_ARG_OPNDS[0], // mov rdx, rdi - ]); - asm.compile_with_num_regs(&mut cb, 0); - - assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {" - 0x0: mov r11, rsi - 0x3: mov rsi, rdx - 0x6: mov rdx, rdi - 0x9: mov rdi, r11 - 0xc: mov eax, 0 - 0x11: call rax - "}); - } - - #[test] - fn test_reorder_c_args_with_insn_out() { - let (mut asm, mut cb) = setup_asm(); - - let rax = asm.load(Opnd::UImm(1)); - let rcx = asm.load(Opnd::UImm(2)); - let rdx = asm.load(Opnd::UImm(3)); - // rcx and rdx form a cycle - asm.ccall(0 as _, vec![ - rax, // mov rdi, rax - rcx, // mov rsi, rcx - rcx, // mov rdx, rcx - rdx, // mov rcx, rdx - ]); - asm.compile_with_num_regs(&mut cb, 3); - - assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {" - 0x0: mov eax, 1 - 0x5: mov ecx, 2 - 0xa: mov edx, 3 - 0xf: mov rdi, rax - 0x12: mov rsi, rcx - 0x15: mov r11, rcx - 0x18: mov rcx, rdx - 0x1b: mov rdx, r11 - 0x1e: mov eax, 0 - 0x23: call rax - "}); - } }