diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index c7f15415ebb91..dda6023b37f7b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3204,9 +3204,7 @@ static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, // If we have a SHXADD instruction, prefer that over reassociating an ADDI. assert(Shift.getOpcode() == ISD::SHL); unsigned ShiftAmt = Shift.getConstantOperandVal(1); - if ((ShiftAmt <= 3 && - (Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa())) || - (ShiftAmt >= 4 && ShiftAmt <= 7 && Subtarget.hasVendorXqciac())) + if (Subtarget.hasShlAdd(ShiftAmt)) return false; // All users of the ADDI should be load/store. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5485b916c2031..9d90eb0a65218 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15349,11 +15349,9 @@ static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, // (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31. static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - const bool HasStdExtZba = Subtarget.hasStdExtZba(); - const bool HasVendorXAndesPerf = Subtarget.hasVendorXAndesPerf(); - const bool HasVendorXqciac = Subtarget.hasVendorXqciac(); - // Perform this optimization only in the zba/xandesperf/xqciac extension. - if (!HasStdExtZba && !HasVendorXAndesPerf && !HasVendorXqciac) + // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba + // extension. + if (!Subtarget.hasShlAdd(3)) return SDValue(); // Skip for vector types and larger types. @@ -15379,16 +15377,7 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, return SDValue(); int64_t Diff = std::abs(C0 - C1); - bool IsShXaddDiff = Diff == 1 || Diff == 2 || Diff == 3; - bool HasShXadd = HasStdExtZba || HasVendorXAndesPerf; - - // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. - if ((!IsShXaddDiff && HasShXadd && !HasVendorXqciac) || - (IsShXaddDiff && !HasShXadd && HasVendorXqciac)) - return SDValue(); - - // Skip if QC_SHLADD is not applicable. - if (Diff == 0 || Diff > 31) + if (!Subtarget.hasShlAdd(Diff)) return SDValue(); // Build nodes. @@ -15445,7 +15434,7 @@ static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { // Perform this optimization only in the zba extension. - if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba()) + if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3)) return SDValue(); // Skip for vector types and larger types. @@ -16375,17 +16364,13 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue())) return SDValue(); - const bool HasShlAdd = Subtarget.hasStdExtZba() || - Subtarget.hasVendorXTHeadBa() || - Subtarget.hasVendorXAndesPerf(); - // WARNING: The code below is knowingly incorrect with regards to undef semantics. // We're adding additional uses of X here, and in principle, we should be freezing // X before doing so. However, adding freeze here causes real regressions, and no // other target properly freezes X in these cases either. SDValue X = N->getOperand(0); - if (HasShlAdd) { + if (Subtarget.hasShlAdd(3)) { for (uint64_t Divisor : {3, 5, 9}) { if (MulAmt % Divisor != 0) continue; @@ -21333,14 +21318,8 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( auto *C1 = dyn_cast(N0->getOperand(1)); auto *C2 = dyn_cast(N->getOperand(1)); - bool IsShXAdd = - (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 && - C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3; - bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 && - C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31; - // Bail if we might break a sh{1,2,3}add/qc.shladd pattern. - if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() && + if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() && N->user_begin()->getOpcode() == ISD::ADD && !isUsedByLdSt(*N->user_begin(), nullptr) && !isa(N->user_begin()->getOperand(1))) @@ -24398,7 +24377,7 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, return true; // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. - if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) && + if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) && ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || (Imm - 8).isPowerOf2())) return true; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index d0bb57a3eaa13..f816112f70140 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -4492,7 +4492,7 @@ void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB, .addReg(DestReg, RegState::Kill) .addImm(ShiftAmount) .setMIFlag(Flag); - } else if (STI.hasStdExtZba() && + } else if (STI.hasShlAdd(3) && ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) || (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) || (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) { diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 0d9cd16a77937..aef084dd80d2a 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -209,6 +209,14 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { hasShortForwardBranchOpt(); } + bool hasShlAdd(int64_t ShAmt) const { + if (ShAmt <= 0) + return false; + if (ShAmt <= 3) + return HasStdExtZba || HasVendorXAndesPerf || HasVendorXTHeadBa; + return ShAmt <= 31 && HasVendorXqciac; + } + bool is64Bit() const { return IsRV64; } MVT getXLenVT() const { return is64Bit() ? MVT::i64 : MVT::i32; diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll index 0fc0adbfa83d9..0e4a5c07020ee 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll @@ -656,12 +656,18 @@ define i32 @add8192(i32 %a) { } define i32 @addshl_5_6(i32 %a, i32 %b) { -; CHECK-LABEL: addshl_5_6: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: slli a1, a1, 6 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: addshl_5_6: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: slli a1, a1, 6 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addshl_5_6: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a1, 1 +; RV32XTHEADBA-NEXT: slli a0, a0, 5 +; RV32XTHEADBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 6 %e = add i32 %c, %d @@ -669,12 +675,18 @@ define i32 @addshl_5_6(i32 %a, i32 %b) { } define i32 @addshl_5_7(i32 %a, i32 %b) { -; CHECK-LABEL: addshl_5_7: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: slli a1, a1, 7 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: addshl_5_7: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: slli a1, a1, 7 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addshl_5_7: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a1, 2 +; RV32XTHEADBA-NEXT: slli a0, a0, 5 +; RV32XTHEADBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 7 %e = add i32 %c, %d @@ -682,12 +694,18 @@ define i32 @addshl_5_7(i32 %a, i32 %b) { } define i32 @addshl_5_8(i32 %a, i32 %b) { -; CHECK-LABEL: addshl_5_8: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: slli a1, a1, 8 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: addshl_5_8: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addshl_5_8: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a1, 3 +; RV32XTHEADBA-NEXT: slli a0, a0, 5 +; RV32XTHEADBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 8 %e = add i32 %c, %d diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll index d20fb66dbbeea..50bd22bf5fd69 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -1104,12 +1104,18 @@ define i64 @add8192(i64 %a) { } define signext i32 @addshl32_5_6(i32 signext %a, i32 signext %b) { -; CHECK-LABEL: addshl32_5_6: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: slli a1, a1, 6 -; CHECK-NEXT: addw a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addshl32_5_6: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addshl32_5_6: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 1 +; RV64XTHEADBA-NEXT: slliw a0, a0, 5 +; RV64XTHEADBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 6 %e = add i32 %c, %d @@ -1117,12 +1123,18 @@ define signext i32 @addshl32_5_6(i32 signext %a, i32 signext %b) { } define i64 @addshl64_5_6(i64 %a, i64 %b) { -; CHECK-LABEL: addshl64_5_6: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: slli a1, a1, 6 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addshl64_5_6: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addshl64_5_6: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 1 +; RV64XTHEADBA-NEXT: slli a0, a0, 5 +; RV64XTHEADBA-NEXT: ret %c = shl i64 %a, 5 %d = shl i64 %b, 6 %e = add i64 %c, %d @@ -1130,12 +1142,18 @@ define i64 @addshl64_5_6(i64 %a, i64 %b) { } define signext i32 @addshl32_5_7(i32 signext %a, i32 signext %b) { -; CHECK-LABEL: addshl32_5_7: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: slli a1, a1, 7 -; CHECK-NEXT: addw a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addshl32_5_7: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: slli a1, a1, 7 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addshl32_5_7: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 2 +; RV64XTHEADBA-NEXT: slliw a0, a0, 5 +; RV64XTHEADBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 7 %e = add i32 %c, %d @@ -1143,12 +1161,18 @@ define signext i32 @addshl32_5_7(i32 signext %a, i32 signext %b) { } define i64 @addshl64_5_7(i64 %a, i64 %b) { -; CHECK-LABEL: addshl64_5_7: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: slli a1, a1, 7 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addshl64_5_7: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: slli a1, a1, 7 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addshl64_5_7: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 2 +; RV64XTHEADBA-NEXT: slli a0, a0, 5 +; RV64XTHEADBA-NEXT: ret %c = shl i64 %a, 5 %d = shl i64 %b, 7 %e = add i64 %c, %d @@ -1156,12 +1180,18 @@ define i64 @addshl64_5_7(i64 %a, i64 %b) { } define signext i32 @addshl32_5_8(i32 signext %a, i32 signext %b) { -; CHECK-LABEL: addshl32_5_8: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: slli a1, a1, 8 -; CHECK-NEXT: addw a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addshl32_5_8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addshl32_5_8: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3 +; RV64XTHEADBA-NEXT: slliw a0, a0, 5 +; RV64XTHEADBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 8 %e = add i32 %c, %d @@ -1169,12 +1199,18 @@ define signext i32 @addshl32_5_8(i32 signext %a, i32 signext %b) { } define i64 @addshl64_5_8(i64 %a, i64 %b) { -; CHECK-LABEL: addshl64_5_8: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: slli a1, a1, 8 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addshl64_5_8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addshl64_5_8: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3 +; RV64XTHEADBA-NEXT: slli a0, a0, 5 +; RV64XTHEADBA-NEXT: ret %c = shl i64 %a, 5 %d = shl i64 %b, 8 %e = add i64 %c, %d @@ -1192,9 +1228,8 @@ define i64 @sh6_sh3_add1(i64 noundef %x, i64 noundef %y, i64 noundef %z) { ; ; RV64XTHEADBA-LABEL: sh6_sh3_add1: ; RV64XTHEADBA: # %bb.0: # %entry -; RV64XTHEADBA-NEXT: slli a1, a1, 6 -; RV64XTHEADBA-NEXT: th.addsl a1, a1, a2, 3 -; RV64XTHEADBA-NEXT: add a0, a1, a0 +; RV64XTHEADBA-NEXT: th.addsl a1, a2, a1, 3 +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3 ; RV64XTHEADBA-NEXT: ret entry: %shl = shl i64 %z, 3 @@ -1238,9 +1273,8 @@ define i64 @sh6_sh3_add3(i64 noundef %x, i64 noundef %y, i64 noundef %z) { ; ; RV64XTHEADBA-LABEL: sh6_sh3_add3: ; RV64XTHEADBA: # %bb.0: # %entry -; RV64XTHEADBA-NEXT: slli a1, a1, 6 -; RV64XTHEADBA-NEXT: th.addsl a1, a1, a2, 3 -; RV64XTHEADBA-NEXT: add a0, a0, a1 +; RV64XTHEADBA-NEXT: th.addsl a1, a2, a1, 3 +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3 ; RV64XTHEADBA-NEXT: ret entry: %shl = shl i64 %z, 3 diff --git a/llvm/test/CodeGen/RISCV/xqciac.ll b/llvm/test/CodeGen/RISCV/xqciac.ll index c76e1a9d64f17..ec83fd7a28f32 100644 --- a/llvm/test/CodeGen/RISCV/xqciac.ll +++ b/llvm/test/CodeGen/RISCV/xqciac.ll @@ -361,8 +361,8 @@ define dso_local i32 @shladdc1c2(i32 %a, i32 %b) local_unnamed_addr #0 { ; ; RV32IMXQCIAC-LABEL: shladdc1c2: ; RV32IMXQCIAC: # %bb.0: # %entry -; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 5 -; RV32IMXQCIAC-NEXT: slli a0, a0, 26 +; RV32IMXQCIAC-NEXT: slli a1, a1, 26 +; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 31 ; RV32IMXQCIAC-NEXT: ret ; ; RV32IZBAMXQCIAC-LABEL: shladdc1c2: