diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 975baa7e2e504..d6d604d288fc6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10758,7 +10758,7 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, DAG.getConstant(ShiftAmt, DL, XLenVT)); SDValue Mask = DAG.getConstant(PosMask, DL, XLenVT); SDValue Result = - DAG.getNode(RISCVISD::MVM, DL, XLenVT, Vec, ShiftedVal, Mask); + DAG.getNode(RISCVISD::MERGE, DL, XLenVT, Mask, Vec, ShiftedVal); return DAG.getBitcast(VecVT, Result); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 774e1e024a4be..9ea45e77bccf0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1517,13 +1517,15 @@ def riscv_mulhr : RVSDNode<"MULHR", SDTIntBinOp>; def riscv_mulhru : RVSDNode<"MULHRU", SDTIntBinOp>; def riscv_mulhrsu : RVSDNode<"MULHRSU", SDTIntBinOp>; -def SDT_RISCVMVM : SDTypeProfile<1, 3, [SDTCisInt<0>, - SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>; -def riscv_mvm : RVSDNode<"MVM", SDT_RISCVMVM>; +// Bitwise merge: res = (~op0 & op1) | (op0 & op2) +def SDT_RISCVMERGE : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; +def riscv_merge : RVSDNode<"MERGE", SDT_RISCVMERGE>; let Predicates = [HasStdExtP] in { + def : PatGpr; def : PatGpr; @@ -1532,9 +1534,17 @@ let Predicates = [HasStdExtP] in { def : Pat<(XLenVT (fshr GPR:$rs1, GPR:$rd, shiftMaskXLen:$rs2)), (SRX GPR:$rd, GPR:$rs1, shiftMaskXLen:$rs2)>; + // Pseudo version of MERGE without the tied constraint. Will be expanded to + // MERGE, MVM, or MVMN after register allocation. + // dst = (~rd & rs1) | (rd & rs2) + def PseudoMERGE : Pseudo<(outs GPR:$dst), (ins GPR:$rd, GPR:$rs1, GPR:$rs2), + []>; + def : Pat<(XLenVT (or (and GPR:$rd, GPR:$rs2), (and (not GPR:$rd), GPR:$rs1))), + (PseudoMERGE GPR:$rd, GPR:$rs1, GPR:$rs2)>; + // Pattern for insert_vector_elt - def : Pat<(XLenVT (riscv_mvm GPR:$rd, GPR:$rs1, GPR:$rs2)), - (MVM GPR:$rd, GPR:$rs1, GPR:$rs2)>; + def : Pat<(XLenVT (riscv_merge GPR:$rd, GPR:$rs1, GPR:$rs2)), + (PseudoMERGE GPR:$rd, GPR:$rs1, GPR:$rs2)>; // Basic 8-bit arithmetic patterns def: Pat<(XLenVecI8VT (add GPR:$rs1, GPR:$rs2)), (PADD_B GPR:$rs1, GPR:$rs2)>; diff --git a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp index 08e2b835547ca..7fe67e3fb8ea0 100644 --- a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp @@ -43,6 +43,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass { MachineBasicBlock::iterator &NextMBBI); bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandMovAddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandMERGE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; char RISCVPostRAExpandPseudo::ID = 0; @@ -76,6 +77,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandMovImm(MBB, MBBI); case RISCV::PseudoMovAddr: return expandMovAddr(MBB, MBBI); + case RISCV::PseudoMERGE: + return expandMERGE(MBB, MBBI); default: return false; } @@ -118,6 +121,73 @@ bool RISCVPostRAExpandPseudo::expandMovAddr(MachineBasicBlock &MBB, return true; } +/// Transfer implicit operands on the pseudo instruction to the +/// instructions created from the expansion. +static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &MI) { + const MCInstrDesc &Desc = OldMI.getDesc(); + for (const MachineOperand &MO : + llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { + assert(MO.isReg() && MO.getReg()); + MI.add(MO); + } +} + +// Expand PseudoMERGE to MERGE, MVM, or MVMN. +bool RISCVPostRAExpandPseudo::expandMERGE(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + Register DstReg = MI.getOperand(0).getReg(); + if (DstReg == MI.getOperand(3).getReg()) { + // Expand to MVMN + auto I = BuildMI(MBB, MBBI, DL, TII->get(RISCV::MVMN)) + .add(MI.getOperand(0)) + .add(MI.getOperand(3)) + .add(MI.getOperand(2)) + .add(MI.getOperand(1)); + transferImpOps(*MBBI, I); + } else if (DstReg == MBBI->getOperand(2).getReg()) { + // Expand to MVM + auto I = BuildMI(MBB, MBBI, DL, TII->get(RISCV::MVM)) + .add(MI.getOperand(0)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(1)); + transferImpOps(*MBBI, I); + } else if (DstReg == MI.getOperand(1).getReg()) { + // Expand to MERGE + auto I = BuildMI(MBB, MBBI, DL, TII->get(RISCV::MERGE)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + transferImpOps(*MBBI, I); + } else { + // Use an additional move. + RegState RegState = + getRenamableRegState(MI.getOperand(1).isRenamable()) | + getKillRegState(MI.getOperand(1).isKill() && + MI.getOperand(1).getReg() != + MI.getOperand(2).getReg() && + MI.getOperand(1).getReg() != MI.getOperand(3).getReg()); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(RISCV::ADDI)) + .addDef(DstReg, getRenamableRegState(MI.getOperand(0).isRenamable())) + .addReg(MI.getOperand(1).getReg(), RegState) + .addImm(0); + auto I = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(RISCV::MERGE)) + .add(MI.getOperand(0)) + .addReg(DstReg, + RegState::Kill | getRenamableRegState( + MI.getOperand(0).isRenamable())) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + transferImpOps(*MBBI, I); + } + MI.eraseFromParent(); + return true; +} + } // end of anonymous namespace INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-post-ra-expand-pseudo", diff --git a/llvm/test/CodeGen/RISCV/rv32p.ll b/llvm/test/CodeGen/RISCV/rv32p.ll index 1e31983df0b8c..e4d4c68109dea 100644 --- a/llvm/test/CodeGen/RISCV/rv32p.ll +++ b/llvm/test/CodeGen/RISCV/rv32p.ll @@ -725,3 +725,138 @@ define void @wmaccu_multiple_uses(i32 %a, i32 %b, i64 %c, ptr %out1, ptr %out2) store i64 %mul, ptr %out2 ret void } + +; Test bitwise merge: (mask & b) | (~mask & a) +define i32 @merge_i32(i32 %mask, i32 %a, i32 %b) nounwind { +; CHECK-LABEL: merge_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: merge a0, a1, a2 +; CHECK-NEXT: ret + %and1 = and i32 %mask, %b + %not = xor i32 %mask, -1 + %and2 = and i32 %not, %a + %or = or i32 %and1, %and2 + ret i32 %or +} + +; Test MERGE with swapped a/b arguments +define i32 @merge_i32_2(i32 %mask, i32 %b, i32 %a) nounwind { +; CHECK-LABEL: merge_i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: merge a0, a2, a1 +; CHECK-NEXT: ret + %and1 = and i32 %mask, %b + %not = xor i32 %mask, -1 + %and2 = and i32 %not, %a + %or = or i32 %and1, %and2 + ret i32 %or +} + +; Test MVM: result overwrites rs1 (%a) +define i32 @mvm_i32(i32 %a, i32 %mask, i32 %b) nounwind { +; CHECK-LABEL: mvm_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mvm a0, a2, a1 +; CHECK-NEXT: ret + %and1 = and i32 %mask, %b + %not = xor i32 %mask, -1 + %and2 = and i32 %not, %a + %or = or i32 %and1, %and2 + ret i32 %or +} + +; Test MVM with mask as last argument +define i32 @mvm_i32_2(i32 %a, i32 %b, i32 %mask) nounwind { +; CHECK-LABEL: mvm_i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: mvm a0, a1, a2 +; CHECK-NEXT: ret + %and1 = and i32 %mask, %b + %not = xor i32 %mask, -1 + %and2 = and i32 %not, %a + %or = or i32 %and1, %and2 + ret i32 %or +} + +; Test MVMN: result overwrites rs2 (%b) +define i32 @mvmn_i32(i32 %b, i32 %mask, i32 %a) nounwind { +; CHECK-LABEL: mvmn_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mvmn a0, a2, a1 +; CHECK-NEXT: ret + %and1 = and i32 %mask, %b + %not = xor i32 %mask, -1 + %and2 = and i32 %not, %a + %or = or i32 %and1, %and2 + ret i32 %or +} + +; Test MVMN with mask as last argument +define i32 @mvmn_i32_2(i32 %b, i32 %a, i32 %mask) nounwind { +; CHECK-LABEL: mvmn_i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: mvmn a0, a1, a2 +; CHECK-NEXT: ret + %and1 = and i32 %mask, %b + %not = xor i32 %mask, -1 + %and2 = and i32 %not, %a + %or = or i32 %and1, %and2 + ret i32 %or +} + +; Test case where none of the source operands can be overwritten, +; requiring a mv before merge +define i32 @merge_i32_mv(i32 %mask, i32 %a, i32 %b) nounwind { +; CHECK-LABEL: merge_i32_mv: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: merge a3, a1, a2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, a3, a0 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: ret + %and1 = and i32 %mask, %b + %not = xor i32 %mask, -1 + %and2 = and i32 %not, %a + %or = or i32 %and1, %and2 + %sum1 = add i32 %or, %mask + %sum2 = add i32 %sum1, %a + %sum3 = add i32 %sum2, %b + ret i32 %sum3 +} + +; Test alternate merge pattern: (a ^ b) & mask ^ a +define i32 @merge_xor_i32(i32 %mask, i32 %a, i32 %b) nounwind { +; CHECK-LABEL: merge_xor_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: merge a0, a1, a2 +; CHECK-NEXT: ret + %xor1 = xor i32 %a, %b + %and = and i32 %xor1, %mask + %xor2 = xor i32 %and, %a + ret i32 %xor2 +} + +; Test alternate merge pattern with different argument order for MVM +define i32 @mvm_xor_i32(i32 %a, i32 %mask, i32 %b) nounwind { +; CHECK-LABEL: mvm_xor_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mvm a0, a2, a1 +; CHECK-NEXT: ret + %xor1 = xor i32 %a, %b + %and = and i32 %xor1, %mask + %xor2 = xor i32 %and, %a + ret i32 %xor2 +} + +; Test alternate merge pattern with different argument order for MVMN +define i32 @mvmn_xor_i32(i32 %b, i32 %mask, i32 %a) nounwind { +; CHECK-LABEL: mvmn_xor_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mvmn a0, a2, a1 +; CHECK-NEXT: ret + %xor1 = xor i32 %a, %b + %and = and i32 %xor1, %mask + %xor2 = xor i32 %and, %a + ret i32 %xor2 +} diff --git a/llvm/test/CodeGen/RISCV/rv64p.ll b/llvm/test/CodeGen/RISCV/rv64p.ll index 2d6d615d9f7b9..53ca8476034a1 100644 --- a/llvm/test/CodeGen/RISCV/rv64p.ll +++ b/llvm/test/CodeGen/RISCV/rv64p.ll @@ -378,3 +378,138 @@ define i128 @srxi_i128(i128 %x) { %a = lshr i128 %x, 49 ret i128 %a } + +; Test bitwise merge: (mask & b) | (~mask & a) +define i64 @merge_i64(i64 %mask, i64 %a, i64 %b) nounwind { +; CHECK-LABEL: merge_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: merge a0, a1, a2 +; CHECK-NEXT: ret + %and1 = and i64 %mask, %b + %not = xor i64 %mask, -1 + %and2 = and i64 %not, %a + %or = or i64 %and1, %and2 + ret i64 %or +} + +; Test MERGE with swapped a/b arguments +define i64 @merge_i64_2(i64 %mask, i64 %b, i64 %a) nounwind { +; CHECK-LABEL: merge_i64_2: +; CHECK: # %bb.0: +; CHECK-NEXT: merge a0, a2, a1 +; CHECK-NEXT: ret + %and1 = and i64 %mask, %b + %not = xor i64 %mask, -1 + %and2 = and i64 %not, %a + %or = or i64 %and1, %and2 + ret i64 %or +} + +; Test MVM: result overwrites rs1 (%a) +define i64 @mvm_i64(i64 %a, i64 %mask, i64 %b) nounwind { +; CHECK-LABEL: mvm_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mvm a0, a2, a1 +; CHECK-NEXT: ret + %and1 = and i64 %mask, %b + %not = xor i64 %mask, -1 + %and2 = and i64 %not, %a + %or = or i64 %and1, %and2 + ret i64 %or +} + +; Test MVM with mask as last argument +define i64 @mvm_i64_2(i64 %a, i64 %b, i64 %mask) nounwind { +; CHECK-LABEL: mvm_i64_2: +; CHECK: # %bb.0: +; CHECK-NEXT: mvm a0, a1, a2 +; CHECK-NEXT: ret + %and1 = and i64 %mask, %b + %not = xor i64 %mask, -1 + %and2 = and i64 %not, %a + %or = or i64 %and1, %and2 + ret i64 %or +} + +; Test MVMN: result overwrites rs2 (%b) +define i64 @mvmn_i64(i64 %b, i64 %mask, i64 %a) nounwind { +; CHECK-LABEL: mvmn_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mvmn a0, a2, a1 +; CHECK-NEXT: ret + %and1 = and i64 %mask, %b + %not = xor i64 %mask, -1 + %and2 = and i64 %not, %a + %or = or i64 %and1, %and2 + ret i64 %or +} + +; Test MVMN with mask as last argument +define i64 @mvmn_i64_2(i64 %b, i64 %a, i64 %mask) nounwind { +; CHECK-LABEL: mvmn_i64_2: +; CHECK: # %bb.0: +; CHECK-NEXT: mvmn a0, a1, a2 +; CHECK-NEXT: ret + %and1 = and i64 %mask, %b + %not = xor i64 %mask, -1 + %and2 = and i64 %not, %a + %or = or i64 %and1, %and2 + ret i64 %or +} + +; Test case where none of the source operands can be overwritten, +; requiring a mv before merge +define i64 @merge_i64_mv(i64 %mask, i64 %a, i64 %b) nounwind { +; CHECK-LABEL: merge_i64_mv: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: merge a3, a1, a2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, a3, a0 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: ret + %and1 = and i64 %mask, %b + %not = xor i64 %mask, -1 + %and2 = and i64 %not, %a + %or = or i64 %and1, %and2 + %sum1 = add i64 %or, %mask + %sum2 = add i64 %sum1, %a + %sum3 = add i64 %sum2, %b + ret i64 %sum3 +} + +; Test alternate merge pattern: (a ^ b) & mask ^ a +define i64 @merge_xor_i64(i64 %mask, i64 %a, i64 %b) nounwind { +; CHECK-LABEL: merge_xor_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: merge a0, a1, a2 +; CHECK-NEXT: ret + %xor1 = xor i64 %a, %b + %and = and i64 %xor1, %mask + %xor2 = xor i64 %and, %a + ret i64 %xor2 +} + +; Test alternate merge pattern with different argument order for MVM +define i64 @mvm_xor_i64(i64 %a, i64 %mask, i64 %b) nounwind { +; CHECK-LABEL: mvm_xor_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mvm a0, a2, a1 +; CHECK-NEXT: ret + %xor1 = xor i64 %a, %b + %and = and i64 %xor1, %mask + %xor2 = xor i64 %and, %a + ret i64 %xor2 +} + +; Test alternate merge pattern with different argument order for MVMN +define i64 @mvmn_xor_i64(i64 %b, i64 %mask, i64 %a) nounwind { +; CHECK-LABEL: mvmn_xor_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mvmn a0, a2, a1 +; CHECK-NEXT: ret + %xor1 = xor i64 %a, %b + %and = and i64 %xor1, %mask + %xor2 = xor i64 %and, %a + ret i64 %xor2 +}