-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub+shl+sub #90199
Conversation
…+shl+sub Change the costmodel to lower a = b * C where C = 1 - (1 - 2^m) * 2^n to sub w8, w0, w0, lsl #m sub w0, w0, w8, lsl #n Fix llvm#89430
@llvm/pr-subscribers-backend-aarch64 Author: Allen (vfdff) ChangesChange the costmodel to lower a = b * C where C = 1 - (1 - 2^m) * 2^n to Full diff: https://github.com/llvm/llvm-project/pull/90199.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8e9782c1930c3c..2d46b23516531e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17603,6 +17603,23 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return false;
};
+ // Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
+ // C = 29 is equal to 1 - (1 - 2^3) * 2^2.
+ auto isPowMinusMinusOneConst = [](APInt C, APInt &M, APInt &N) {
+ APInt CVMinus1 = C - 1;
+ if (CVMinus1.isNegative())
+ return false;
+ unsigned TrailingZeroes = CVMinus1.countr_zero();
+ APInt CVPlus1 = CVMinus1.ashr(TrailingZeroes) + 1;
+ if (CVPlus1.isPowerOf2()) {
+ unsigned BitWidth = CVPlus1.getBitWidth();
+ M = APInt(BitWidth, CVPlus1.logBase2());
+ N = APInt(BitWidth, TrailingZeroes);
+ return true;
+ }
+ return false;
+ };
+
if (ConstValue.isNonNegative()) {
// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
// (mul x, 2^N - 1) => (sub (shl x, N), x)
@@ -17611,6 +17628,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// => MV = (add (shl x, M), x); (add (shl MV, N), MV)
// (mul x, (2^M + 1) * 2^N + 1))
// => MV = add (shl x, M), x); add (shl MV, N), x)
+ // (mul x, 1 - (1 - 2^M) * 2^N))
+ // => MV = sub (x - (shl x, M)); add (x - (shl x, M))
APInt SCVMinus1 = ShiftedConstValue - 1;
APInt SCVPlus1 = ShiftedConstValue + 1;
APInt CVPlus1 = ConstValue + 1;
@@ -17647,6 +17666,17 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return Add(Shl(MVal, CVN.getZExtValue()), N0);
}
}
+
+ if (Subtarget->hasALULSLFast() &&
+ isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
+ unsigned ShiftM = CVM.getZExtValue();
+ unsigned ShiftN = CVN.getZExtValue();
+ // ALULSLFast implicate that Shifts <= 4 places are fast
+ if (ShiftM <= 4 && ShiftN <= 4) {
+ SDValue MVal = Sub(N0, Shl(N0, CVM.getZExtValue()));
+ return Sub(N0, Shl(MVal, CVN.getZExtValue()));
+ }
+ }
} else {
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
diff --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll
index 0c9ea51ba367e9..3f56c586211dba 100644
--- a/llvm/test/CodeGen/AArch64/mul_pow2.ll
+++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll
@@ -527,6 +527,23 @@ define i32 @test25_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
ret i32 %mul
}
+define i32 @test29_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
+; CHECK-LABEL: test29_fast_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub w8, w0, w0, lsl #3
+; CHECK-NEXT: sub w0, w0, w8, lsl #2
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: test29_fast_shift:
+; GISEL: // %bb.0:
+; GISEL-NEXT: mov w8, #29 // =0x1d
+; GISEL-NEXT: mul w0, w0, w8
+; GISEL-NEXT: ret
+
+ %mul = mul nsw i32 %x, 29 ; 29 = 1 - (1-8) * 4
+ ret i32 %mul
+}
+
define i32 @test45_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test45_fast_shift:
; CHECK: // %bb.0:
@@ -615,6 +632,42 @@ define i32 @test97_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
ret i32 %mul
}
+; Negative test: The shift number 5 is out of bound
+define i32 @test125_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
+; CHECK-LABEL: test125_fast_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #125 // =0x7d
+; CHECK-NEXT: mul w0, w0, w8
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: test125_fast_shift:
+; GISEL: // %bb.0:
+; GISEL-NEXT: mov w8, #125 // =0x7d
+; GISEL-NEXT: mul w0, w0, w8
+; GISEL-NEXT: ret
+
+ %mul = mul nsw i32 %x, 125 ; 63 = 1 - ((1-32) << 2)
+ ret i32 %mul
+}
+
+; Negative test: The shift number 5 is out of bound
+define i32 @test225_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
+; CHECK-LABEL: test225_fast_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #225 // =0xe1
+; CHECK-NEXT: mul w0, w0, w8
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: test225_fast_shift:
+; GISEL: // %bb.0:
+; GISEL-NEXT: mov w8, #225 // =0xe1
+; GISEL-NEXT: mul w0, w0, w8
+; GISEL-NEXT: ret
+
+ %mul = mul nsw i32 %x, 225 ; 225 = 1 - ((1-8) << 5)
+ ret i32 %mul
+}
+
; Negative test: The shift amount 5 larger than 4
define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test297_fast_shift:
@@ -910,9 +963,9 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) {
;
; GISEL-LABEL: muladd_demand_commute:
; GISEL: // %bb.0:
-; GISEL-NEXT: adrp x8, .LCPI52_0
+; GISEL-NEXT: adrp x8, .LCPI55_0
; GISEL-NEXT: movi v3.4s, #1, msl #16
-; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI52_0]
+; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI55_0]
; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s
; GISEL-NEXT: and v0.16b, v1.16b, v3.16b
; GISEL-NEXT: ret
|
define i32 @test225_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { | ||
; CHECK-LABEL: test225_fast_shift: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w8, #225 // =0xe1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can write x*225
as x*-15*-15
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh, Yes. 225 = (1-16) * (1-16), which is not supported now.
@@ -17611,6 +17628,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, | |||
// => MV = (add (shl x, M), x); (add (shl MV, N), MV) | |||
// (mul x, (2^M + 1) * 2^N + 1)) | |||
// => MV = add (shl x, M), x); add (shl MV, N), x) | |||
// (mul x, 1 - (1 - 2^M) * 2^N)) | |||
// => MV = sub (x - (shl x, M)); add (x - (shl x, M)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Formula looks inconsistent with other formulas.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, it is MV = sub (x - (shl x, M)); sub (x - (shl MV, N))
, thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Change the costmodel to lower a = b * C where C = 1 - (1 - 2^m) * 2^n to
sub w8, w0, w0, lsl #m
sub w0, w0, w8, lsl #n
Fix #89430