[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub+shl+sub #90199

vfdff · 2024-04-26T11:55:37Z

Change the costmodel to lower a = b * C where C = 1 - (1 - 2^m) * 2^n to
sub w8, w0, w0, lsl #m
sub w0, w0, w8, lsl #n
Fix #89430

…+shl+sub Change the costmodel to lower a = b * C where C = 1 - (1 - 2^m) * 2^n to sub w8, w0, w0, lsl #m sub w0, w0, w8, lsl #n Fix llvm#89430

llvmbot · 2024-04-26T11:56:11Z

@llvm/pr-subscribers-backend-aarch64

Author: Allen (vfdff)

Changes

Change the costmodel to lower a = b * C where C = 1 - (1 - 2^m) * 2^n to
sub w8, w0, w0, lsl #m
sub w0, w0, w8, lsl #n
Fix #89430

Full diff: https://github.com/llvm/llvm-project/pull/90199.diff

2 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+30)
(modified) llvm/test/CodeGen/AArch64/mul_pow2.ll (+55-2)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8e9782c1930c3c..2d46b23516531e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17603,6 +17603,23 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
     return false;
   };
 
+  // Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
+  // C = 29 is equal to 1 - (1 - 2^3) * 2^2.
+  auto isPowMinusMinusOneConst = [](APInt C, APInt &M, APInt &N) {
+    APInt CVMinus1 = C - 1;
+    if (CVMinus1.isNegative())
+      return false;
+    unsigned TrailingZeroes = CVMinus1.countr_zero();
+    APInt CVPlus1 = CVMinus1.ashr(TrailingZeroes) + 1;
+    if (CVPlus1.isPowerOf2()) {
+      unsigned BitWidth = CVPlus1.getBitWidth();
+      M = APInt(BitWidth, CVPlus1.logBase2());
+      N = APInt(BitWidth, TrailingZeroes);
+      return true;
+    }
+    return false;
+  };
+
   if (ConstValue.isNonNegative()) {
     // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
     // (mul x, 2^N - 1) => (sub (shl x, N), x)
@@ -17611,6 +17628,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
     //     => MV = (add (shl x, M), x); (add (shl MV, N), MV)
     // (mul x, (2^M + 1) * 2^N + 1))
     //     =>  MV = add (shl x, M), x); add (shl MV, N), x)
+    // (mul x, 1 - (1 - 2^M) * 2^N))
+    //     =>  MV = sub (x - (shl x, M)); add (x - (shl x, M))
     APInt SCVMinus1 = ShiftedConstValue - 1;
     APInt SCVPlus1 = ShiftedConstValue + 1;
     APInt CVPlus1 = ConstValue + 1;
@@ -17647,6 +17666,17 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
         return Add(Shl(MVal, CVN.getZExtValue()), N0);
       }
     }
+
+    if (Subtarget->hasALULSLFast() &&
+        isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
+      unsigned ShiftM = CVM.getZExtValue();
+      unsigned ShiftN = CVN.getZExtValue();
+      // ALULSLFast implicate that Shifts <= 4 places are fast
+      if (ShiftM <= 4 && ShiftN <= 4) {
+        SDValue MVal = Sub(N0, Shl(N0, CVM.getZExtValue()));
+        return Sub(N0, Shl(MVal, CVN.getZExtValue()));
+      }
+    }
   } else {
     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
     // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
diff --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll
index 0c9ea51ba367e9..3f56c586211dba 100644
--- a/llvm/test/CodeGen/AArch64/mul_pow2.ll
+++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll
@@ -527,6 +527,23 @@ define i32 @test25_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
   ret i32 %mul
 }
 
+define i32 @test29_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
+; CHECK-LABEL: test29_fast_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w0, w0, lsl #3
+; CHECK-NEXT:    sub w0, w0, w8, lsl #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test29_fast_shift:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #29 // =0x1d
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
+
+  %mul = mul nsw i32 %x, 29 ; 29 = 1 - (1-8) * 4
+  ret i32 %mul
+}
+
 define i32 @test45_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
 ; CHECK-LABEL: test45_fast_shift:
 ; CHECK:       // %bb.0:
@@ -615,6 +632,42 @@ define i32 @test97_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
   ret i32 %mul
 }
 
+; Negative test: The shift number 5 is out of bound
+define i32 @test125_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
+; CHECK-LABEL: test125_fast_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125 // =0x7d
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test125_fast_shift:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #125 // =0x7d
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
+
+  %mul = mul nsw i32 %x, 125 ; 63 = 1 - ((1-32) << 2)
+  ret i32 %mul
+}
+
+; Negative test: The shift number 5 is out of bound
+define i32 @test225_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
+; CHECK-LABEL: test225_fast_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #225 // =0xe1
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test225_fast_shift:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #225 // =0xe1
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
+
+  %mul = mul nsw i32 %x, 225 ; 225 = 1 - ((1-8) << 5)
+  ret i32 %mul
+}
+
 ; Negative test: The shift amount 5 larger than 4
 define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
 ; CHECK-LABEL: test297_fast_shift:
@@ -910,9 +963,9 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) {
 ;
 ; GISEL-LABEL: muladd_demand_commute:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x8, .LCPI52_0
+; GISEL-NEXT:    adrp x8, .LCPI55_0
 ; GISEL-NEXT:    movi v3.4s, #1, msl #16
-; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI52_0]
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI55_0]
 ; GISEL-NEXT:    mla v1.4s, v0.4s, v2.4s
 ; GISEL-NEXT:    and v0.16b, v1.16b, v3.16b
 ; GISEL-NEXT:    ret

efriedma-quic · 2024-04-26T19:56:50Z

llvm/test/CodeGen/AArch64/mul_pow2.ll

+define i32 @test225_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
+; CHECK-LABEL: test225_fast_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #225 // =0xe1


You can write x*225 as x*-15*-15.

oh, Yes. 225 = (1-16) * (1-16), which is not supported now.

efriedma-quic · 2024-04-26T19:57:00Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

@@ -17611,6 +17628,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
    //     => MV = (add (shl x, M), x); (add (shl MV, N), MV)
    // (mul x, (2^M + 1) * 2^N + 1))
    //     =>  MV = add (shl x, M), x); add (shl MV, N), x)
+    // (mul x, 1 - (1 - 2^M) * 2^N))
+    //     =>  MV = sub (x - (shl x, M)); add (x - (shl x, M))


Formula looks inconsistent with other formulas.

Updated, it is MV = sub (x - (shl x, M)); sub (x - (shl MV, N)), thanks.

efriedma-quic

LGTM

[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub…

16feacf

…+shl+sub Change the costmodel to lower a = b * C where C = 1 - (1 - 2^m) * 2^n to sub w8, w0, w0, lsl #m sub w0, w0, w8, lsl #n Fix llvm#89430

vfdff requested review from davemgreen and efriedma-quic April 26, 2024 11:55

llvmbot added the backend:AArch64 label Apr 26, 2024

efriedma-quic reviewed Apr 26, 2024

View reviewed changes

[SelectionDAG] Fix comment

77d11b2

vfdff force-pushed the PR89430 branch from cb01471 to 77d11b2 Compare April 28, 2024 12:21

vfdff requested a review from efriedma-quic April 29, 2024 02:56

efriedma-quic approved these changes Apr 29, 2024

View reviewed changes

vfdff merged commit e123643 into llvm:main May 6, 2024
3 of 4 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub+shl+sub #90199

[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub+shl+sub #90199

vfdff commented Apr 26, 2024

llvmbot commented Apr 26, 2024

efriedma-quic Apr 26, 2024

vfdff Apr 28, 2024

efriedma-quic Apr 26, 2024

vfdff Apr 28, 2024 •

edited

Loading

efriedma-quic left a comment

[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub+shl+sub #90199

[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub+shl+sub #90199

Conversation

vfdff commented Apr 26, 2024

llvmbot commented Apr 26, 2024

efriedma-quic Apr 26, 2024

Choose a reason for hiding this comment

vfdff Apr 28, 2024

Choose a reason for hiding this comment

efriedma-quic Apr 26, 2024

Choose a reason for hiding this comment

vfdff Apr 28, 2024 • edited Loading

Choose a reason for hiding this comment

efriedma-quic left a comment

Choose a reason for hiding this comment

vfdff Apr 28, 2024 •

edited

Loading