diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index a7f18c04a19079..0838d58220adb2 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -76,6 +76,13 @@ class RISCVInstructionSelector : public InstructionSelector { return selectSHXADDOp(Root, ShAmt); } + ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root, + unsigned ShAmt) const; + template + ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root) const { + return selectSHXADD_UWOp(Root, ShAmt); + } + // Custom renderers for tablegen void renderNegImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; @@ -91,6 +98,9 @@ class RISCVInstructionSelector : public InstructionSelector { MachineRegisterInfo &MRI, RISCVCC::CondCode &CC, Register &LHS, Register &RHS) const; + void renderTrailingZeros(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + const RISCVSubtarget &STI; const RISCVInstrInfo &TII; const RISCVRegisterInfo &TRI; @@ -239,6 +249,47 @@ RISCVInstructionSelector::selectSHXADDOp(MachineOperand &Root, return std::nullopt; } +InstructionSelector::ComplexRendererFns +RISCVInstructionSelector::selectSHXADD_UWOp(MachineOperand &Root, + unsigned ShAmt) const { + using namespace llvm::MIPatternMatch; + MachineFunction &MF = *Root.getParent()->getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (!Root.isReg()) + return std::nullopt; + Register RootReg = Root.getReg(); + + // Given (and (shl x, c2), mask) in which mask is a shifted mask with + // 32 - ShAmt leading zeros and c2 trailing zeros. We can use SLLI by + // c2 - ShAmt followed by SHXADD_UW with ShAmt for x amount. + APInt Mask, C2; + Register RegX; + if (mi_match( + RootReg, MRI, + m_OneNonDBGUse(m_GAnd(m_OneNonDBGUse(m_GShl(m_Reg(RegX), m_ICst(C2))), + m_ICst(Mask))))) { + Mask &= maskTrailingZeros(C2.getLimitedValue()); + + if (Mask.isShiftedMask()) { + unsigned Leading = Mask.countl_zero(); + unsigned Trailing = Mask.countr_zero(); + if (Leading == 32 - ShAmt && C2 == Trailing && Trailing > ShAmt) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(RootReg)); + return {{[=](MachineInstrBuilder &MIB) { + MachineIRBuilder(*MIB.getInstr()) + .buildInstr(RISCV::SLLI, {DstReg}, {RegX}) + .addImm(C2.getLimitedValue() - ShAmt); + MIB.addReg(DstReg); + }}}; + } + } + } + + return std::nullopt; +} + InstructionSelector::ComplexRendererFns RISCVInstructionSelector::selectAddrRegImm(MachineOperand &Root) const { // TODO: Need to get the immediate from a G_PTR_ADD. Should this be done in @@ -383,6 +434,15 @@ void RISCVInstructionSelector::renderImm(MachineInstrBuilder &MIB, MIB.addImm(CstVal); } +void RISCVInstructionSelector::renderTrailingZeros(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + uint64_t C = MI.getOperand(1).getCImm()->getZExtValue(); + MIB.addImm(llvm::countr_zero(C)); +} + const TargetRegisterClass *RISCVInstructionSelector::getRegClassForTypeOnBank( LLT Ty, const RegisterBank &RB) const { if (RB.getID() == RISCV::GPRRegBankID) { diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 8d0d088c111623..60896106bc0b5b 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -57,6 +57,9 @@ def as_i64imm : SDNodeXForm, GISDNodeXFormEquiv; +def gi_trailing_zero : GICustomOperandRenderer<"renderTrailingZeros">, + GISDNodeXFormEquiv; + // FIXME: This is labelled as handling 's32', however the ComplexPattern it // refers to handles both i32 and i64 based on the HwMode. Currently this LLT // parameter appears to be ignored so this pattern works for both, however we @@ -73,6 +76,13 @@ def gi_sh2add_op : GIComplexOperandMatcher">, def gi_sh3add_op : GIComplexOperandMatcher">, GIComplexPatternEquiv; +def gi_sh1add_uw_op : GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_sh2add_uw_op : GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_sh3add_uw_op : GIComplexOperandMatcher">, + GIComplexPatternEquiv; + // FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier. def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)), (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>; @@ -111,8 +121,14 @@ def : Pat<(i32 (udiv GPR:$rs1, GPR:$rs2)), (DIVUW GPR:$rs1, GPR:$rs2)>; def : Pat<(i32 (urem GPR:$rs1, GPR:$rs2)), (REMUW GPR:$rs1, GPR:$rs2)>; } -let Predicates = [HasStdExtZba, IsRV64] in +let Predicates = [HasStdExtZba, IsRV64] in { +// This pattern is put here due to the fact that i32 is not a legal type +// in SDISel for RV64, which is not the case in GISel. +def : Pat<(shl (i64 (zext i32:$rs1)), uimm5:$shamt), + (SLLI_UW GPR:$rs1, uimm5:$shamt)>; + def : Pat<(i64 (zext i32:$rs)), (ADD_UW GPR:$rs, (XLenVT X0))>; +} // Predicates = [HasStdExtZba, IsRV64] let Predicates = [IsRV64, NotHasStdExtZba] in def: Pat<(i64 (zext i32:$rs)), (SRLI (SLLI GPR:$rs, 32), 32)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td index d20ed70e1a5290..41e139e3c7a9eb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -540,11 +540,11 @@ def : Pat<(add (XLenVT GPR:$rs1), (shl GPR:$rs2, uimm2:$uimm2)), (TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>; // Reuse complex patterns from StdExtZba -def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2), +def : Pat<(add_non_imm12 sh1add_op:$rs1, (XLenVT GPR:$rs2)), (TH_ADDSL GPR:$rs2, sh1add_op:$rs1, 1)>; -def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2), +def : Pat<(add_non_imm12 sh2add_op:$rs1, (XLenVT GPR:$rs2)), (TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>; -def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2), +def : Pat<(add_non_imm12 sh3add_op:$rs1, (XLenVT GPR:$rs2)), (TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>; def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index a7572e908b56b8..4a62a61dadcf3b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -230,16 +230,9 @@ def SimmShiftRightBy3XForm : SDNodeXFormgetValueType(0)); }]>; -// Pattern to exclude simm12 immediates from matching. -// Note: this will be removed once the GISel complex patterns for -// SHXADD_UW is landed. -def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ - auto *C = dyn_cast(N); - return !C || !isInt<12>(C->getSExtValue()); -}]>; - +// Pattern to exclude simm12 immediates from matching, namely `non_imm12`. // GISel currently doesn't support PatFrag for leaf nodes, so `non_imm12` -// cannot be directly supported in GISel. To reuse patterns between the two +// cannot be implemented in that way. To reuse patterns between the two // ISels, we instead create PatFrag on operators that use `non_imm12`. class binop_with_non_imm12 : PatFrag<(ops node:$x, node:$y), (binop node:$x, node:$y), [{ @@ -264,12 +257,11 @@ class binop_with_non_imm12 def add_non_imm12 : binop_with_non_imm12; def or_is_add_non_imm12 : binop_with_non_imm12; -def Shifted32OnesMask : PatLeaf<(imm), [{ - uint64_t Imm = N->getZExtValue(); - if (!isShiftedMask_64(Imm)) +def Shifted32OnesMask : IntImmLeaf 0 && TrailingZeros < 32 && Imm == UINT64_C(0xFFFFFFFF) << TrailingZeros; }], TrailingZeros>; @@ -776,12 +768,11 @@ def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (XLenV (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; // More complex cases use a ComplexPattern. -def : Pat<(i64 (add sh1add_uw_op:$rs1, non_imm12:$rs2)), - (SH1ADD_UW sh1add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add sh2add_uw_op:$rs1, non_imm12:$rs2)), - (SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add sh3add_uw_op:$rs1, non_imm12:$rs2)), - (SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>; +foreach i = {1,2,3} in { + defvar pat = !cast("sh"#i#"add_uw_op"); + def : Pat<(i64 (add_non_imm12 pat:$rs1, (XLenVT GPR:$rs2))), + (!cast("SH"#i#"ADD_UW") pat:$rs1, GPR:$rs2)>; +} def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), (XLenVT GPR:$rs2))), (SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv64.mir index 092a3305b3453d..dc8571bcc7c006 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv64.mir @@ -150,3 +150,102 @@ body: | %6:gprb(s64) = G_ADD %5, %1 $x10 = COPY %6(s64) ... +--- +name: shXadd_uw_complex_shl_and +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: shXadd_uw_complex_shl_and + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 1 + ; CHECK-NEXT: [[SH2ADD_UW:%[0-9]+]]:gpr = SH2ADD_UW [[SLLI]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH2ADD_UW]] + %0:gprb(s64) = COPY $x10 + %1:gprb(s64) = COPY $x11 + + %2:gprb(s64) = G_CONSTANT i64 3 + %3:gprb(s64) = G_SHL %0, %2 + %4:gprb(s64) = G_CONSTANT i64 17179869183 + %5:gprb(s64) = G_AND %3, %4 + + %6:gprb(s64) = G_ADD %5, %1 + $x10 = COPY %6(s64) +... +--- +name: slli_uw +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: slli_uw + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SLLI_UW:%[0-9]+]]:gpr = SLLI_UW [[COPY]], 7 + ; CHECK-NEXT: $x10 = COPY [[SLLI_UW]] + %0:gprb(s64) = COPY $x10 + + %1:gprb(s64) = G_CONSTANT i64 4294967295 + %2:gprb(s64) = G_AND %0, %1 + %3:gprb(s64) = G_CONSTANT i64 7 + %4:gprb(s64) = G_SHL %2, %3 + + $x10 = COPY %4(s64) +... +--- +name: slli_uw_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: slli_uw_s32 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SLLI_UW:%[0-9]+]]:gpr = SLLI_UW [[COPY]], 7 + ; CHECK-NEXT: $x10 = COPY [[SLLI_UW]] + %0:gprb(s64) = COPY $x10 + %1:gprb(s32) = G_TRUNC %0(s64) + + %2:gprb(s64) = G_ZEXT %1(s32) + %3:gprb(s64) = G_CONSTANT i64 7 + %4:gprb(s64) = G_SHL %2, %3 + + $x10 = COPY %4(s64) +... +--- +name: slli_uw_complex +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: slli_uw_complex + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[COPY]], 2 + ; CHECK-NEXT: [[SLLI_UW:%[0-9]+]]:gpr = SLLI_UW [[SRLI]], 2 + ; CHECK-NEXT: $x10 = COPY [[SLLI_UW]] + %0:gprb(s64) = COPY $x10 + + %1:gprb(s64) = G_CONSTANT i64 17179869180 + %2:gprb(s64) = G_AND %0, %1 + + $x10 = COPY %2(s64) +...