[RISCV] Strength reduce mul by 2^M - 3/5/9 #88993

preames · 2024-04-16T21:41:09Z

We can expand these as the three instruction sequence: (sub (shl X, C1), (shXadd X, x))

Note: This is a companion to https://github.com/llvm/llvm-project/pull/88983/files which handles the 2^N - 2^M case, but neither is stacked on the other. Whichever lands second needs rebased to adjust the shadd check.

We can expand these as the three instruction sequence: (sub (shl X, C1), (shXadd X, x)) Note: This is a companion to https://github.com/llvm/llvm-project/pull/88983/files which handles the 2^N - 2^M case, but neither is stacked on the other. Whichever lands second needs rebased to adjust the shadd check.

llvmbot · 2024-04-16T21:41:38Z

@llvm/pr-subscribers-backend-risc-v

Author: Philip Reames (preames)

Changes

We can expand these as the three instruction sequence: (sub (shl X, C1), (shXadd X, x))

Note: This is a companion to https://github.com/llvm/llvm-project/pull/88983/files which handles the 2^N - 2^M case, but neither is stacked on the other. Whichever lands second needs rebased to adjust the shadd check.

Full diff: https://github.com/llvm/llvm-project/pull/88993.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+17-1)
(modified) llvm/test/CodeGen/RISCV/addimm-mulimm.ll (+115-70)
(modified) llvm/test/CodeGen/RISCV/rv64zba.ll (+36-15)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7b4bec2f65b741..4443e08394211a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13447,7 +13447,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
   case 21:
   case 25:
   case 27:
-  case 29:
   case 37:
   case 41:
   case 45:
@@ -13474,6 +13473,23 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
+  for (uint64_t Offset : {3, 5, 9}) {
+    if ((VT == MVT::i64 || MulAmt + Offset <= UINT32_MAX) &&
+        isPowerOf2_64(MulAmt + Offset)) {
+      SDLoc DL(N);
+      SDValue Shift1 =
+          DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                      DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
+      SDValue Shift2 =
+          DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                      DAG.getConstant(Log2_64(Offset - 1), DL, VT));
+      return DAG.getNode(
+          ISD::SUB, DL, VT, Shift1,
+          DAG.getNode(ISD::ADD, DL, VT, Shift2, N->getOperand(0)));
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
index 48fa69e1045656..a35d0c1d7b9db1 100644
--- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
+++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
@@ -10,15 +10,17 @@
 define i32 @add_mul_combine_accept_a1(i32 %x) {
 ; RV32IMB-LABEL: add_mul_combine_accept_a1:
 ; RV32IMB:       # %bb.0:
-; RV32IMB-NEXT:    li a1, 29
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh1add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    addi a0, a0, 1073
 ; RV32IMB-NEXT:    ret
 ;
 ; RV64IMB-LABEL: add_mul_combine_accept_a1:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 1073
 ; RV64IMB-NEXT:    ret
   %tmp0 = add i32 %x, 37
@@ -29,15 +31,17 @@ define i32 @add_mul_combine_accept_a1(i32 %x) {
 define signext i32 @add_mul_combine_accept_a2(i32 signext %x) {
 ; RV32IMB-LABEL: add_mul_combine_accept_a2:
 ; RV32IMB:       # %bb.0:
-; RV32IMB-NEXT:    li a1, 29
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh1add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    addi a0, a0, 1073
 ; RV32IMB-NEXT:    ret
 ;
 ; RV64IMB-LABEL: add_mul_combine_accept_a2:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 1073
 ; RV64IMB-NEXT:    ret
   %tmp0 = add i32 %x, 37
@@ -49,10 +53,14 @@ define i64 @add_mul_combine_accept_a3(i64 %x) {
 ; RV32IMB-LABEL: add_mul_combine_accept_a3:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    li a2, 29
-; RV32IMB-NEXT:    mul a1, a1, a2
-; RV32IMB-NEXT:    mulhu a3, a0, a2
-; RV32IMB-NEXT:    add a1, a3, a1
-; RV32IMB-NEXT:    mul a2, a0, a2
+; RV32IMB-NEXT:    mulhu a2, a0, a2
+; RV32IMB-NEXT:    sh1add a3, a1, a1
+; RV32IMB-NEXT:    slli a1, a1, 5
+; RV32IMB-NEXT:    sub a1, a1, a3
+; RV32IMB-NEXT:    add a1, a2, a1
+; RV32IMB-NEXT:    sh1add a2, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a2, a0, a2
 ; RV32IMB-NEXT:    addi a0, a2, 1073
 ; RV32IMB-NEXT:    sltu a2, a0, a2
 ; RV32IMB-NEXT:    add a1, a1, a2
@@ -60,8 +68,9 @@ define i64 @add_mul_combine_accept_a3(i64 %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_accept_a3:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    sub a0, a0, a1
 ; RV64IMB-NEXT:    addi a0, a0, 1073
 ; RV64IMB-NEXT:    ret
   %tmp0 = add i64 %x, 37
@@ -72,8 +81,9 @@ define i64 @add_mul_combine_accept_a3(i64 %x) {
 define i32 @add_mul_combine_accept_b1(i32 %x) {
 ; RV32IMB-LABEL: add_mul_combine_accept_b1:
 ; RV32IMB:       # %bb.0:
-; RV32IMB-NEXT:    li a1, 23
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh3add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    lui a1, 50
 ; RV32IMB-NEXT:    addi a1, a1, 1119
 ; RV32IMB-NEXT:    add a0, a0, a1
@@ -81,8 +91,9 @@ define i32 @add_mul_combine_accept_b1(i32 %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_accept_b1:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    li a1, 23
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh3add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    lui a1, 50
 ; RV64IMB-NEXT:    addi a1, a1, 1119
 ; RV64IMB-NEXT:    addw a0, a0, a1
@@ -95,8 +106,9 @@ define i32 @add_mul_combine_accept_b1(i32 %x) {
 define signext i32 @add_mul_combine_accept_b2(i32 signext %x) {
 ; RV32IMB-LABEL: add_mul_combine_accept_b2:
 ; RV32IMB:       # %bb.0:
-; RV32IMB-NEXT:    li a1, 23
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh3add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    lui a1, 50
 ; RV32IMB-NEXT:    addi a1, a1, 1119
 ; RV32IMB-NEXT:    add a0, a0, a1
@@ -104,8 +116,9 @@ define signext i32 @add_mul_combine_accept_b2(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_accept_b2:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    li a1, 23
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh3add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    lui a1, 50
 ; RV64IMB-NEXT:    addi a1, a1, 1119
 ; RV64IMB-NEXT:    addw a0, a0, a1
@@ -119,10 +132,14 @@ define i64 @add_mul_combine_accept_b3(i64 %x) {
 ; RV32IMB-LABEL: add_mul_combine_accept_b3:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    li a2, 23
-; RV32IMB-NEXT:    mul a1, a1, a2
-; RV32IMB-NEXT:    mulhu a3, a0, a2
-; RV32IMB-NEXT:    add a1, a3, a1
-; RV32IMB-NEXT:    mul a2, a0, a2
+; RV32IMB-NEXT:    mulhu a2, a0, a2
+; RV32IMB-NEXT:    sh3add a3, a1, a1
+; RV32IMB-NEXT:    slli a1, a1, 5
+; RV32IMB-NEXT:    sub a1, a1, a3
+; RV32IMB-NEXT:    add a1, a2, a1
+; RV32IMB-NEXT:    sh3add a2, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a2, a0, a2
 ; RV32IMB-NEXT:    lui a0, 50
 ; RV32IMB-NEXT:    addi a0, a0, 1119
 ; RV32IMB-NEXT:    add a0, a2, a0
@@ -132,8 +149,9 @@ define i64 @add_mul_combine_accept_b3(i64 %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_accept_b3:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    li a1, 23
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh3add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    sub a0, a0, a1
 ; RV64IMB-NEXT:    lui a1, 50
 ; RV64IMB-NEXT:    addiw a1, a1, 1119
 ; RV64IMB-NEXT:    add a0, a0, a1
@@ -147,15 +165,17 @@ define i32 @add_mul_combine_reject_a1(i32 %x) {
 ; RV32IMB-LABEL: add_mul_combine_reject_a1:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    addi a0, a0, 1971
-; RV32IMB-NEXT:    li a1, 29
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh1add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    ret
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_a1:
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 1971
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mulw a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    ret
   %tmp0 = add i32 %x, 1971
   %tmp1 = mul i32 %tmp0, 29
@@ -166,15 +186,17 @@ define signext i32 @add_mul_combine_reject_a2(i32 signext %x) {
 ; RV32IMB-LABEL: add_mul_combine_reject_a2:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    addi a0, a0, 1971
-; RV32IMB-NEXT:    li a1, 29
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh1add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    ret
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_a2:
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 1971
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mulw a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    ret
   %tmp0 = add i32 %x, 1971
   %tmp1 = mul i32 %tmp0, 29
@@ -185,10 +207,14 @@ define i64 @add_mul_combine_reject_a3(i64 %x) {
 ; RV32IMB-LABEL: add_mul_combine_reject_a3:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    li a2, 29
-; RV32IMB-NEXT:    mul a1, a1, a2
-; RV32IMB-NEXT:    mulhu a3, a0, a2
-; RV32IMB-NEXT:    add a1, a3, a1
-; RV32IMB-NEXT:    mul a2, a0, a2
+; RV32IMB-NEXT:    mulhu a2, a0, a2
+; RV32IMB-NEXT:    sh1add a3, a1, a1
+; RV32IMB-NEXT:    slli a1, a1, 5
+; RV32IMB-NEXT:    sub a1, a1, a3
+; RV32IMB-NEXT:    add a1, a2, a1
+; RV32IMB-NEXT:    sh1add a2, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a2, a0, a2
 ; RV32IMB-NEXT:    lui a0, 14
 ; RV32IMB-NEXT:    addi a0, a0, -185
 ; RV32IMB-NEXT:    add a0, a2, a0
@@ -199,8 +225,9 @@ define i64 @add_mul_combine_reject_a3(i64 %x) {
 ; RV64IMB-LABEL: add_mul_combine_reject_a3:
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 1971
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    sub a0, a0, a1
 ; RV64IMB-NEXT:    ret
   %tmp0 = add i64 %x, 1971
   %tmp1 = mul i64 %tmp0, 29
@@ -343,15 +370,17 @@ define i32 @add_mul_combine_reject_e1(i32 %x) {
 ; RV32IMB-LABEL: add_mul_combine_reject_e1:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    addi a0, a0, 1971
-; RV32IMB-NEXT:    li a1, 29
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh1add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    ret
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_e1:
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 1971
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mulw a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    ret
   %tmp0 = mul i32 %x, 29
   %tmp1 = add i32 %tmp0, 57159
@@ -362,15 +391,17 @@ define signext i32 @add_mul_combine_reject_e2(i32 signext %x) {
 ; RV32IMB-LABEL: add_mul_combine_reject_e2:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    addi a0, a0, 1971
-; RV32IMB-NEXT:    li a1, 29
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh1add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    ret
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_e2:
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 1971
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mulw a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    ret
   %tmp0 = mul i32 %x, 29
   %tmp1 = add i32 %tmp0, 57159
@@ -381,10 +412,14 @@ define i64 @add_mul_combine_reject_e3(i64 %x) {
 ; RV32IMB-LABEL: add_mul_combine_reject_e3:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    li a2, 29
-; RV32IMB-NEXT:    mul a1, a1, a2
-; RV32IMB-NEXT:    mulhu a3, a0, a2
-; RV32IMB-NEXT:    add a1, a3, a1
-; RV32IMB-NEXT:    mul a2, a0, a2
+; RV32IMB-NEXT:    mulhu a2, a0, a2
+; RV32IMB-NEXT:    sh1add a3, a1, a1
+; RV32IMB-NEXT:    slli a1, a1, 5
+; RV32IMB-NEXT:    sub a1, a1, a3
+; RV32IMB-NEXT:    add a1, a2, a1
+; RV32IMB-NEXT:    sh1add a2, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a2, a0, a2
 ; RV32IMB-NEXT:    lui a0, 14
 ; RV32IMB-NEXT:    addi a0, a0, -185
 ; RV32IMB-NEXT:    add a0, a2, a0
@@ -395,8 +430,9 @@ define i64 @add_mul_combine_reject_e3(i64 %x) {
 ; RV64IMB-LABEL: add_mul_combine_reject_e3:
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 1971
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    sub a0, a0, a1
 ; RV64IMB-NEXT:    ret
   %tmp0 = mul i64 %x, 29
   %tmp1 = add i64 %tmp0, 57159
@@ -407,16 +443,18 @@ define i32 @add_mul_combine_reject_f1(i32 %x) {
 ; RV32IMB-LABEL: add_mul_combine_reject_f1:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    addi a0, a0, 1972
-; RV32IMB-NEXT:    li a1, 29
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh1add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    addi a0, a0, 11
 ; RV32IMB-NEXT:    ret
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_f1:
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 1972
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 11
 ; RV64IMB-NEXT:    ret
   %tmp0 = mul i32 %x, 29
@@ -428,16 +466,18 @@ define signext i32 @add_mul_combine_reject_f2(i32 signext %x) {
 ; RV32IMB-LABEL: add_mul_combine_reject_f2:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    addi a0, a0, 1972
-; RV32IMB-NEXT:    li a1, 29
-; RV32IMB-NEXT:    mul a0, a0, a1
+; RV32IMB-NEXT:    sh1add a1, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a0, a0, a1
 ; RV32IMB-NEXT:    addi a0, a0, 11
 ; RV32IMB-NEXT:    ret
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_f2:
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 1972
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    subw a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 11
 ; RV64IMB-NEXT:    ret
   %tmp0 = mul i32 %x, 29
@@ -449,10 +489,14 @@ define i64 @add_mul_combine_reject_f3(i64 %x) {
 ; RV32IMB-LABEL: add_mul_combine_reject_f3:
 ; RV32IMB:       # %bb.0:
 ; RV32IMB-NEXT:    li a2, 29
-; RV32IMB-NEXT:    mul a1, a1, a2
-; RV32IMB-NEXT:    mulhu a3, a0, a2
-; RV32IMB-NEXT:    add a1, a3, a1
-; RV32IMB-NEXT:    mul a2, a0, a2
+; RV32IMB-NEXT:    mulhu a2, a0, a2
+; RV32IMB-NEXT:    sh1add a3, a1, a1
+; RV32IMB-NEXT:    slli a1, a1, 5
+; RV32IMB-NEXT:    sub a1, a1, a3
+; RV32IMB-NEXT:    add a1, a2, a1
+; RV32IMB-NEXT:    sh1add a2, a0, a0
+; RV32IMB-NEXT:    slli a0, a0, 5
+; RV32IMB-NEXT:    sub a2, a0, a2
 ; RV32IMB-NEXT:    lui a0, 14
 ; RV32IMB-NEXT:    addi a0, a0, -145
 ; RV32IMB-NEXT:    add a0, a2, a0
@@ -463,8 +507,9 @@ define i64 @add_mul_combine_reject_f3(i64 %x) {
 ; RV64IMB-LABEL: add_mul_combine_reject_f3:
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 1972
-; RV64IMB-NEXT:    li a1, 29
-; RV64IMB-NEXT:    mul a0, a0, a1
+; RV64IMB-NEXT:    sh1add a1, a0, a0
+; RV64IMB-NEXT:    slli a0, a0, 5
+; RV64IMB-NEXT:    sub a0, a0, a1
 ; RV64IMB-NEXT:    addi a0, a0, 11
 ; RV64IMB-NEXT:    ret
   %tmp0 = mul i64 %x, 29
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index b4c80b60e0bad5..3b32112d975273 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -568,31 +568,52 @@ define i64 @mul96(i64 %a) {
 }
 
 define i64 @mul119(i64 %a) {
-; CHECK-LABEL: mul119:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 119
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul119:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 119
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul119:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a1, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 7
+; RV64ZBA-NEXT:    sub a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 119
   ret i64 %c
 }
 
 define i64 @mul123(i64 %a) {
-; CHECK-LABEL: mul123:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 123
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul123:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 123
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul123:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a1, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 7
+; RV64ZBA-NEXT:    sub a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 123
   ret i64 %c
 }
 
 define i64 @mul125(i64 %a) {
-; CHECK-LABEL: mul125:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 125
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul125:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 125
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul125:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a1, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 7
+; RV64ZBA-NEXT:    sub a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 125
   ret i64 %c
 }

wangpc-pp · 2024-04-17T05:32:31Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

@@ -13447,7 +13447,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
  case 21:
  case 25:
  case 27:
-  case 29:


Why is it removed?

It was a mistake to start with. We don't actually have a TD pattern for 29.

wangpc-pp · 2024-04-17T05:39:29Z

llvm/test/CodeGen/RISCV/addimm-mulimm.ll

-; RV32IMB-NEXT:    mulhu a3, a0, a2
-; RV32IMB-NEXT:    add a1, a3, a1
-; RV32IMB-NEXT:    mul a2, a0, a2
+; RV32IMB-NEXT:    mulhu a2, a0, a2


We may have latency=2 multiply instructions on a high performance RV64 core, so this may be an regression. But this is a RV32 test (multiply won't be so fast), so I just leave this comment for record.

FYI, Syntacore SCR1 has single-cycle integer multiplier :(

A couple things here.

First, I agree that we are likely to want a per processor cost based threshold here. The only question for me is order of work and relative importance, not net result.

I wasn't aware SCR1 has a single cycle latency. I glanced at the schedule model for that just now, and given the only latencies set appear to be div/rem, I have to ask. Are you sure this is actually true?

Second, I'll argue that instruction count != latency. In this particular case, we have two independent instructions followed by a dependent subtract. On any core with multiple issue and single cycle shl/add, I'd expect that to have a latency of 2. Obviously, this argument can be taken to a ridiculous extreme, but it isn't obvious to me that a core with single cycle mul only wants single cycle expansions. The SCR1 might be a weird example here - in the schedule model it looks like there's only one ALU? Even then, that's not solely a function of latency.

For comparison, gcc appears to be defaulting to an expansion budget of at least four instructions for rv64gc. (example: https://godbolt.org/z/zzvqfha8f)

I'm not opposed to this optimization.
For high performance core (the latency will be 3-4 normally, 2 is too extreme), the units for simple ALU should be more than multiplier units, so this optimization should be an uplift.
For RV32 core, they are normally single issue with fewer units and the latency of multiplier can be very small because of low frequency (if you agree), I think this optimization will be a decrease.

First, I agree that we are likely to want a per processor cost based threshold here.
For comparison, gcc appears to be defaulting to an expansion budget of at least four instructions for rv64gc. (example: https://godbolt.org/z/zzvqfha8f)

Yeah, I agree too. Actually this is what GCC is doing. The cost of mul is 2 for generic OoO, 1 for size, 4 for rocket and others (https://godbolt.org/z/8nvdbo5s1)
https://github.com/gcc-mirror/gcc/blob/58a0b190a256bd2a184554de0fae0031a614ec67/gcc/config/riscv/riscv.cc#L275-L292
We may add similar subtarget feature in LLVM too.

preames · 2024-04-17T14:07:56Z

There's a bug here I need to fix, will post a revised version later this morning.

At i64, this discards the carry bit, and thus any power of two detection only considers the bottom 64 in the result. I believe this is the correct result. For i32, the temporary is 64 bits, so an i32 is sign extended in the temporary, and this reduces to the i64 case.

preames · 2024-04-17T19:38:59Z

There's a bug here I need to fix, will post a revised version later this morning.

Actually, it appears the original code was correct, and I actually had a redundant check, but skeptical review here is appreciated. Overflow logic makes my head hurt, so a skeptical second look at this is appreciated.

PR Link: llvm/llvm-project#88993

dtcxzyw

LGTM.

preames · 2024-04-23T17:54:01Z

LGTM.

JFYI, this needs a significant rebase. I'm going to wait until #89789 lands, and then revise this to use SHL_ADD directly.

preames requested review from dtcxzyw, topperc and pcwang-thead April 16, 2024 21:41

llvmbot added the backend:RISC-V label Apr 16, 2024

wangpc-pp requested review from wangpc-pp and removed request for pcwang-thead April 17, 2024 04:21

wangpc-pp reviewed Apr 17, 2024

View reviewed changes

dtcxzyw mentioned this pull request Apr 17, 2024

[RISCV] Strength reduce mul by 2^N - 2^M #88983

Merged

dtcxzyw added a commit to dtcxzyw/llvm-codegen-benchmark that referenced this pull request Apr 18, 2024

pre-commit: test PR88993

3a0f1a0

PR Link: llvm/llvm-project#88993

dtcxzyw mentioned this pull request Apr 18, 2024

pre-commit: test PR88993 dtcxzyw/llvm-codegen-benchmark#10

Closed

dtcxzyw approved these changes Apr 18, 2024

View reviewed changes

preames added 2 commits April 23, 2024 14:39

Merge branch 'main' into riscv-mul-pow2-minus-359

3d7ebbf

Use SHL_ADD

58d7022

preames merged commit b4f923e into llvm:main Apr 24, 2024
3 of 4 checks passed

preames deleted the riscv-mul-pow2-minus-359 branch April 24, 2024 14:55

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] Strength reduce mul by 2^M - 3/5/9 #88993

[RISCV] Strength reduce mul by 2^M - 3/5/9 #88993

preames commented Apr 16, 2024

llvmbot commented Apr 16, 2024

wangpc-pp Apr 17, 2024

preames Apr 17, 2024

wangpc-pp Apr 17, 2024

dtcxzyw Apr 17, 2024

preames Apr 17, 2024 •

edited

Loading

wangpc-pp Apr 18, 2024

preames commented Apr 17, 2024

preames commented Apr 17, 2024

dtcxzyw left a comment

preames commented Apr 23, 2024

[RISCV] Strength reduce mul by 2^M - 3/5/9 #88993

[RISCV] Strength reduce mul by 2^M - 3/5/9 #88993

Conversation

preames commented Apr 16, 2024

llvmbot commented Apr 16, 2024

wangpc-pp Apr 17, 2024

Choose a reason for hiding this comment

preames Apr 17, 2024

Choose a reason for hiding this comment

wangpc-pp Apr 17, 2024

Choose a reason for hiding this comment

dtcxzyw Apr 17, 2024

Choose a reason for hiding this comment

preames Apr 17, 2024 • edited Loading

Choose a reason for hiding this comment

wangpc-pp Apr 18, 2024

Choose a reason for hiding this comment

preames commented Apr 17, 2024

preames commented Apr 17, 2024

dtcxzyw left a comment

Choose a reason for hiding this comment

preames commented Apr 23, 2024

preames Apr 17, 2024 •

edited

Loading