-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[AArch64] Explicitly mark ADDS, ANDS, SUBS, etc as binops #160170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
AZero13
wants to merge
1
commit into
llvm:main
Choose a base branch
from
AZero13:canon
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+52
−2
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-aarch64 Author: AZero13 (AZero13) ChangesThis also means overriding isBinOp and isCommutativeBinOp. Full diff: https://github.com/llvm/llvm-project/pull/160170.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cd7f0e719ad0c..7f674d72868a4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17107,6 +17107,27 @@ bool AArch64TargetLowering::shouldRemoveRedundantExtend(SDValue Extend) const {
return true;
}
+bool AArch64TargetLowering::isBinOp(unsigned Opcode) const {
+ switch (Opcode) {
+ // TODO: Add more?
+ case AArch64ISD::SUBS:
+ return true;
+ }
+ return TargetLoweringBase::isBinOp(Opcode);
+}
+
+bool AArch64TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
+ switch (Opcode) {
+ case AArch64ISD::ANDS:
+ case AArch64ISD::ADDS:
+ case AArch64ISD::PMULL:
+ case AArch64ISD::SMULL:
+ case AArch64ISD::UMULL:
+ return true;
+ }
+ return TargetLoweringBase::isCommutativeBinOp(Opcode);
+}
+
// Truncations from 64-bit GPR to 32-bit GPR is free.
bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
@@ -25996,7 +26017,7 @@ static SDValue performSETCCCombine(SDNode *N,
// (eg AND) if the flag is unused.
static SDValue performFlagSettingCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
- unsigned GenericOpcode) {
+ unsigned GenericOpcode, bool isCommutative) {
SDLoc DL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -26013,6 +26034,16 @@ static SDValue performFlagSettingCombine(SDNode *N,
GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
DCI.CombineTo(Generic, SDValue(N, 0));
+ // Not every non-commutative opcode isn't commutative. By that, ADCS is not
+ // considered commutative by the rest of the codebase as ADCS has a
+ // non-commutative flag. However, other than that, the operands don't matter
+ // for ADCS.
+ if (isCommutative) {
+ if (SDNode *Generic = DCI.DAG.getNodeIfExists(
+ GenericOpcode, DCI.DAG.getVTList(VT), {RHS, LHS}))
+ DCI.CombineTo(Generic, SDValue(N, 0));
+ }
+
return SDValue();
}
@@ -27547,7 +27578,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::TRUNCATE:
return performTruncateCombine(N, DAG, DCI);
case AArch64ISD::ANDS:
- return performFlagSettingCombine(N, DCI, ISD::AND);
+ return performFlagSettingCombine(N, DCI, ISD::AND, true);
case AArch64ISD::ADC:
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
return R;
@@ -27557,15 +27588,15 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::ADCS:
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
return R;
- return performFlagSettingCombine(N, DCI, AArch64ISD::ADC);
+ return performFlagSettingCombine(N, DCI, AArch64ISD::ADC, true);
case AArch64ISD::SBCS:
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
return R;
- return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
+ return performFlagSettingCombine(N, DCI, AArch64ISD::SBC, false);
case AArch64ISD::ADDS:
- return performFlagSettingCombine(N, DCI, ISD::ADD);
+ return performFlagSettingCombine(N, DCI, ISD::ADD, true);
case AArch64ISD::SUBS:
- return performFlagSettingCombine(N, DCI, ISD::SUB);
+ return performFlagSettingCombine(N, DCI, ISD::SUB, false);
case AArch64ISD::BICi: {
APInt DemandedBits =
APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ff073d3eafb1f..c1ec703acca33 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -250,6 +250,11 @@ class AArch64TargetLowering : public TargetLowering {
bool isLegalAddScalableImmediate(int64_t) const override;
bool isLegalICmpImmediate(int64_t) const override;
+ /// Add AArch64-specific opcodes to the default list.
+ bool isBinOp(unsigned Opcode) const override;
+
+ bool isCommutativeBinOp(unsigned Opcode) const override;
+
bool isMulAddWithConstProfitable(SDValue AddNode,
SDValue ConstNode) const override;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 6e5c666bdbc75..72a4377034f21 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -2459,14 +2459,14 @@ define <8 x i16> @sdistribute_const1_v8i8(<8 x i8> %src1, <8 x i8> %mul) {
; CHECK-NEON: // %bb.0: // %entry
; CHECK-NEON-NEXT: movi v2.8b, #10
; CHECK-NEON-NEXT: smull v0.8h, v0.8b, v1.8b
-; CHECK-NEON-NEXT: smlal v0.8h, v2.8b, v1.8b
+; CHECK-NEON-NEXT: smlal v0.8h, v1.8b, v2.8b
; CHECK-NEON-NEXT: ret
;
; CHECK-SVE-LABEL: sdistribute_const1_v8i8:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: movi v2.8b, #10
; CHECK-SVE-NEXT: smull v0.8h, v0.8b, v1.8b
-; CHECK-SVE-NEXT: smlal v0.8h, v2.8b, v1.8b
+; CHECK-SVE-NEXT: smlal v0.8h, v1.8b, v2.8b
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: sdistribute_const1_v8i8:
@@ -2546,14 +2546,14 @@ define <8 x i16> @udistribute_const1_v8i8(<8 x i8> %src1, <8 x i8> %mul) {
; CHECK-NEON: // %bb.0: // %entry
; CHECK-NEON-NEXT: movi v2.8b, #10
; CHECK-NEON-NEXT: umull v0.8h, v0.8b, v1.8b
-; CHECK-NEON-NEXT: umlal v0.8h, v2.8b, v1.8b
+; CHECK-NEON-NEXT: umlal v0.8h, v1.8b, v2.8b
; CHECK-NEON-NEXT: ret
;
; CHECK-SVE-LABEL: udistribute_const1_v8i8:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: movi v2.8b, #10
; CHECK-SVE-NEXT: umull v0.8h, v0.8b, v1.8b
-; CHECK-SVE-NEXT: umlal v0.8h, v2.8b, v1.8b
+; CHECK-SVE-NEXT: umlal v0.8h, v1.8b, v2.8b
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: udistribute_const1_v8i8:
@@ -2779,14 +2779,14 @@ define <2 x i64> @sdistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
; CHECK-NEON: // %bb.0: // %entry
; CHECK-NEON-NEXT: movi v2.2s, #10
; CHECK-NEON-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT: smlal v0.2d, v2.2s, v1.2s
+; CHECK-NEON-NEXT: smlal v0.2d, v1.2s, v2.2s
; CHECK-NEON-NEXT: ret
;
; CHECK-SVE-LABEL: sdistribute_const1_v2i32:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: movi v2.2s, #10
; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT: smlal v0.2d, v2.2s, v1.2s
+; CHECK-SVE-NEXT: smlal v0.2d, v1.2s, v2.2s
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: sdistribute_const1_v2i32:
@@ -2889,14 +2889,14 @@ define <2 x i64> @udistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
; CHECK-NEON: // %bb.0: // %entry
; CHECK-NEON-NEXT: movi v2.2s, #10
; CHECK-NEON-NEXT: umull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT: umlal v0.2d, v2.2s, v1.2s
+; CHECK-NEON-NEXT: umlal v0.2d, v1.2s, v2.2s
; CHECK-NEON-NEXT: ret
;
; CHECK-SVE-LABEL: udistribute_const1_v2i32:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: movi v2.2s, #10
; CHECK-SVE-NEXT: umull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT: umlal v0.2d, v2.2s, v1.2s
+; CHECK-SVE-NEXT: umlal v0.2d, v1.2s, v2.2s
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: udistribute_const1_v2i32:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index e6df9f2fb2c56..bd63548741c3b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -81,11 +81,17 @@ define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind {
}
define void @commutable_smull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
-; CHECK-LABEL: commutable_smull:
-; CHECK: // %bb.0:
-; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT: stp q0, q0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: commutable_smull:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: smull v0.2d, v1.2s, v0.2s
+; CHECK-SD-NEXT: stp q0, q0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: commutable_smull:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: stp q0, q0, [x0]
+; CHECK-GI-NEXT: ret
%1 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %B)
%2 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %B, <2 x i32> %A)
store <2 x i64> %1, ptr %C
@@ -138,11 +144,17 @@ define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind {
}
define void @commutable_umull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
-; CHECK-LABEL: commutable_umull:
-; CHECK: // %bb.0:
-; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT: stp q0, q0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: commutable_umull:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: umull v0.2d, v1.2s, v0.2s
+; CHECK-SD-NEXT: stp q0, q0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: commutable_umull:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: stp q0, q0, [x0]
+; CHECK-GI-NEXT: ret
%1 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %B)
%2 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %B, <2 x i32> %A)
store <2 x i64> %1, ptr %C
@@ -245,7 +257,7 @@ define <8 x i16> @pmull8h(ptr %A, ptr %B) nounwind {
define void @commutable_pmull8h(<8 x i8> %A, <8 x i8> %B, ptr %C) {
; CHECK-LABEL: commutable_pmull8h:
; CHECK: // %bb.0:
-; CHECK-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
%1 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %A, <8 x i8> %B)
diff --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
index b3ce9d2369104..44a38d7947d66 100644
--- a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
@@ -843,3 +843,26 @@ define i1 @cmn_nsw_neg_64(i64 %a, i64 %b) {
%cmp = icmp sgt i64 %a, %sub
ret i1 %cmp
}
+
+define i1 @cmn_and_adds(i32 %num, i32 %num2, ptr %use) {
+; CHECK-SD-LABEL: cmn_and_adds:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adds w8, w0, w1
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: str w8, [x2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmn_and_adds:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmn w0, w1
+; CHECK-GI-NEXT: add w9, w1, w0
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: str w9, [x2]
+; CHECK-GI-NEXT: mov w0, w8
+; CHECK-GI-NEXT: ret
+ %add = add nsw i32 %num2, %num
+ store i32 %add, ptr %use, align 4
+ %sub = sub nsw i32 0, %num2
+ %cmp = icmp slt i32 %num, %sub
+ ret i1 %cmp
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
67f2243
to
74ab3be
Compare
RKSimon
reviewed
Sep 23, 2025
7ee4433
to
ab71df0
Compare
35c35eb
to
ae0aa98
Compare
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This also means overriding isBinOp and isCommutativeBinOp.