diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b8605629e2dfe..51cec7a66bee7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16496,32 +16496,42 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, } static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX, - unsigned ShY) { + unsigned ShY, bool AddX) { SDLoc DL(N); EVT VT = N->getValueType(0); SDValue X = N->getOperand(0); SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, DAG.getTargetConstant(ShY, DL, VT), X); return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, - DAG.getTargetConstant(ShX, DL, VT), Mul359); + DAG.getTargetConstant(ShX, DL, VT), AddX ? X : Mul359); } static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt) { + // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X)) switch (MulAmt) { case 5 * 3: - return getShlAddShlAdd(N, DAG, 2, 1); + return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false); case 9 * 3: - return getShlAddShlAdd(N, DAG, 3, 1); + return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false); case 5 * 5: - return getShlAddShlAdd(N, DAG, 2, 2); + return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false); case 9 * 5: - return getShlAddShlAdd(N, DAG, 3, 2); + return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false); case 9 * 9: - return getShlAddShlAdd(N, DAG, 3, 3); + return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false); default: - return SDValue(); + break; } + + // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X) + int ShX; + if (int ShY = isShifted359(MulAmt - 1, ShX)) { + assert(ShX != 0 && "MulAmt=4,6,10 handled before"); + if (ShX <= 3) + return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true); + } + return SDValue(); } // Try to expand a scalar multiply to a faster sequence. @@ -16581,41 +16591,30 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, DAG.getConstant(Shift, DL, VT)); } - // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) - if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt)) - return V; + // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples + // of 25 which happen to be quite common. + // (2/4/8 * 3/5/9 + 1) * 2^N + Shift = llvm::countr_zero(MulAmt); + if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) { + if (Shift == 0) + return V; + SDLoc DL(N); + return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT)); + } // If this is a power 2 + 2/4/8, we can use a shift followed by a single // shXadd. First check if this a sum of two power of 2s because that's // easy. Then count how many zeros are up to the first bit. - if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { - unsigned ScaleShift = llvm::countr_zero(MulAmt); - if (ScaleShift >= 1 && ScaleShift < 4) { - unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); - SDLoc DL(N); - SDValue Shift1 = - DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); - return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getTargetConstant(ScaleShift, DL, VT), Shift1); - } + if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) { + unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1))); + SDLoc DL(N); + SDValue Shift1 = + DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); + return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getTargetConstant(Shift, DL, VT), Shift1); } - // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x) - // This is the two instruction form, there are also three instruction - // variants we could implement. e.g. - // (2^(1,2,3) * 3,5,9 + 1) << C2 - // 2^(C1>3) * 3,5,9 +/- 1 - if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) { - assert(Shift != 0 && "MulAmt=4,6,10 handled before"); - if (Shift <= 3) { - SDLoc DL(N); - SDValue Mul359 = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getTargetConstant(ShXAmount, DL, VT), X); - return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, - DAG.getTargetConstant(Shift, DL, VT), X); - } - } + // TODO: 2^(C1>3) * 3,5,9 +/- 1 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X)) if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) { @@ -16647,14 +16646,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359); } } - - // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples - // of 25 which happen to be quite common. - Shift = llvm::countr_zero(MulAmt); - if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) { - SDLoc DL(N); - return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT)); - } } if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt)) diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll index 50bd22bf5fd69..f4964288e3541 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -205,12 +205,19 @@ define i64 @addmul20(i64 %a, i64 %b) { } define i64 @addmul22(i64 %a, i64 %b) { -; CHECK-LABEL: addmul22: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 22 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addmul22: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 22 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul22: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a2, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a2, 1 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: ret %c = mul i64 %a, 22 %d = add i64 %c, %b ret i64 %d diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 7fd76262d547a..d4b228828c04d 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -585,6 +585,33 @@ define i64 @addmul12(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul14(i64 %a, i64 %b) { +; RV64I-LABEL: addmul14: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul14: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a2, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul14: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 14 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @addmul18(i64 %a, i64 %b) { ; RV64I-LABEL: addmul18: ; RV64I: # %bb.0: @@ -636,12 +663,26 @@ define i64 @addmul20(i64 %a, i64 %b) { } define i64 @addmul22(i64 %a, i64 %b) { -; CHECK-LABEL: addmul22: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 22 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addmul22: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 22 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul22: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a2, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul22: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 22 %d = add i64 %c, %b ret i64 %d @@ -672,6 +713,32 @@ define i64 @addmul24(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul26(i64 %a, i64 %b) { +; RV64I-LABEL: addmul26: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 26 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul26: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a2, a0, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul26: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 26 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @addmul36(i64 %a, i64 %b) { ; RV64I-LABEL: addmul36: ; RV64I: # %bb.0: @@ -722,6 +789,58 @@ define i64 @addmul40(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul38(i64 %a, i64 %b) { +; RV64I-LABEL: addmul38: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 38 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul38: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a2, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul38: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 38 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul42(i64 %a, i64 %b) { +; RV64I-LABEL: addmul42: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 42 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul42: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a2, a0, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul42: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 42 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @addmul72(i64 %a, i64 %b) { ; RV64I-LABEL: addmul72: ; RV64I: # %bb.0: @@ -747,6 +866,84 @@ define i64 @addmul72(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul74(i64 %a, i64 %b) { +; RV64I-LABEL: addmul74: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 74 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul74: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a2, a0, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul74: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 74 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul82(i64 %a, i64 %b) { +; RV64I-LABEL: addmul82: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 82 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul82: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a2, a0, a0 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul82: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 82 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul146(i64 %a, i64 %b) { +; RV64I-LABEL: addmul146: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 146 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul146: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a2, a0, a0 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul146: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 146 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @mul50(i64 %a) { ; RV64I-LABEL: mul50: ; RV64I: # %bb.0: