[AArch64] Explicitly mark ADDS, ANDS, SUBS, etc as binops #160170

AZero13 · 2025-09-22T18:30:22Z

This also means overriding isBinOp and isCommutativeBinOp.

llvmbot · 2025-09-22T18:31:00Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-aarch64

Author: AZero13 (AZero13)

Changes

This also means overriding isBinOp and isCommutativeBinOp.

Full diff: https://github.com/llvm/llvm-project/pull/160170.diff

5 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+37-6)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+5)
(modified) llvm/test/CodeGen/AArch64/aarch64-smull.ll (+8-8)
(modified) llvm/test/CodeGen/AArch64/arm64-vmul.ll (+23-11)
(modified) llvm/test/CodeGen/AArch64/cmp-to-cmn.ll (+23)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cd7f0e719ad0c..7f674d72868a4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17107,6 +17107,27 @@ bool AArch64TargetLowering::shouldRemoveRedundantExtend(SDValue Extend) const {
   return true;
 }
 
+bool AArch64TargetLowering::isBinOp(unsigned Opcode) const {
+  switch (Opcode) {
+  // TODO: Add more?
+  case AArch64ISD::SUBS:
+    return true;
+  }
+  return TargetLoweringBase::isBinOp(Opcode);
+}
+
+bool AArch64TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
+  switch (Opcode) {
+  case AArch64ISD::ANDS:
+  case AArch64ISD::ADDS:
+  case AArch64ISD::PMULL:
+  case AArch64ISD::SMULL:
+  case AArch64ISD::UMULL:
+    return true;
+  }
+  return TargetLoweringBase::isCommutativeBinOp(Opcode);
+}
+
 // Truncations from 64-bit GPR to 32-bit GPR is free.
 bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
@@ -25996,7 +26017,7 @@ static SDValue performSETCCCombine(SDNode *N,
 // (eg AND) if the flag is unused.
 static SDValue performFlagSettingCombine(SDNode *N,
                                          TargetLowering::DAGCombinerInfo &DCI,
-                                         unsigned GenericOpcode) {
+                                         unsigned GenericOpcode, bool isCommutative) {
   SDLoc DL(N);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
@@ -26013,6 +26034,16 @@ static SDValue performFlagSettingCombine(SDNode *N,
           GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
     DCI.CombineTo(Generic, SDValue(N, 0));
 
+  // Not every non-commutative opcode isn't commutative. By that, ADCS is not
+  // considered commutative by the rest of the codebase as ADCS has a
+  // non-commutative flag. However, other than that, the operands don't matter
+  // for ADCS.
+  if (isCommutative) {
+    if (SDNode *Generic = DCI.DAG.getNodeIfExists(
+            GenericOpcode, DCI.DAG.getVTList(VT), {RHS, LHS}))
+      DCI.CombineTo(Generic, SDValue(N, 0));
+  }
+
   return SDValue();
 }
 
@@ -27547,7 +27578,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::TRUNCATE:
     return performTruncateCombine(N, DAG, DCI);
   case AArch64ISD::ANDS:
-    return performFlagSettingCombine(N, DCI, ISD::AND);
+    return performFlagSettingCombine(N, DCI, ISD::AND, true);
   case AArch64ISD::ADC:
     if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
       return R;
@@ -27557,15 +27588,15 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case AArch64ISD::ADCS:
     if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
       return R;
-    return performFlagSettingCombine(N, DCI, AArch64ISD::ADC);
+    return performFlagSettingCombine(N, DCI, AArch64ISD::ADC, true);
   case AArch64ISD::SBCS:
     if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
       return R;
-    return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
+    return performFlagSettingCombine(N, DCI, AArch64ISD::SBC, false);
   case AArch64ISD::ADDS:
-    return performFlagSettingCombine(N, DCI, ISD::ADD);
+    return performFlagSettingCombine(N, DCI, ISD::ADD, true);
   case AArch64ISD::SUBS:
-    return performFlagSettingCombine(N, DCI, ISD::SUB);
+    return performFlagSettingCombine(N, DCI, ISD::SUB, false);
   case AArch64ISD::BICi: {
     APInt DemandedBits =
         APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ff073d3eafb1f..c1ec703acca33 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -250,6 +250,11 @@ class AArch64TargetLowering : public TargetLowering {
   bool isLegalAddScalableImmediate(int64_t) const override;
   bool isLegalICmpImmediate(int64_t) const override;
 
+  /// Add AArch64-specific opcodes to the default list.
+  bool isBinOp(unsigned Opcode) const override;
+
+  bool isCommutativeBinOp(unsigned Opcode) const override;
+
   bool isMulAddWithConstProfitable(SDValue AddNode,
                                    SDValue ConstNode) const override;
 
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 6e5c666bdbc75..72a4377034f21 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -2459,14 +2459,14 @@ define <8 x i16> @sdistribute_const1_v8i8(<8 x i8> %src1, <8 x i8> %mul) {
 ; CHECK-NEON:       // %bb.0: // %entry
 ; CHECK-NEON-NEXT:    movi v2.8b, #10
 ; CHECK-NEON-NEXT:    smull v0.8h, v0.8b, v1.8b
-; CHECK-NEON-NEXT:    smlal v0.8h, v2.8b, v1.8b
+; CHECK-NEON-NEXT:    smlal v0.8h, v1.8b, v2.8b
 ; CHECK-NEON-NEXT:    ret
 ;
 ; CHECK-SVE-LABEL: sdistribute_const1_v8i8:
 ; CHECK-SVE:       // %bb.0: // %entry
 ; CHECK-SVE-NEXT:    movi v2.8b, #10
 ; CHECK-SVE-NEXT:    smull v0.8h, v0.8b, v1.8b
-; CHECK-SVE-NEXT:    smlal v0.8h, v2.8b, v1.8b
+; CHECK-SVE-NEXT:    smlal v0.8h, v1.8b, v2.8b
 ; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sdistribute_const1_v8i8:
@@ -2546,14 +2546,14 @@ define <8 x i16> @udistribute_const1_v8i8(<8 x i8> %src1, <8 x i8> %mul) {
 ; CHECK-NEON:       // %bb.0: // %entry
 ; CHECK-NEON-NEXT:    movi v2.8b, #10
 ; CHECK-NEON-NEXT:    umull v0.8h, v0.8b, v1.8b
-; CHECK-NEON-NEXT:    umlal v0.8h, v2.8b, v1.8b
+; CHECK-NEON-NEXT:    umlal v0.8h, v1.8b, v2.8b
 ; CHECK-NEON-NEXT:    ret
 ;
 ; CHECK-SVE-LABEL: udistribute_const1_v8i8:
 ; CHECK-SVE:       // %bb.0: // %entry
 ; CHECK-SVE-NEXT:    movi v2.8b, #10
 ; CHECK-SVE-NEXT:    umull v0.8h, v0.8b, v1.8b
-; CHECK-SVE-NEXT:    umlal v0.8h, v2.8b, v1.8b
+; CHECK-SVE-NEXT:    umlal v0.8h, v1.8b, v2.8b
 ; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: udistribute_const1_v8i8:
@@ -2779,14 +2779,14 @@ define <2 x i64> @sdistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
 ; CHECK-NEON:       // %bb.0: // %entry
 ; CHECK-NEON-NEXT:    movi v2.2s, #10
 ; CHECK-NEON-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT:    smlal v0.2d, v2.2s, v1.2s
+; CHECK-NEON-NEXT:    smlal v0.2d, v1.2s, v2.2s
 ; CHECK-NEON-NEXT:    ret
 ;
 ; CHECK-SVE-LABEL: sdistribute_const1_v2i32:
 ; CHECK-SVE:       // %bb.0: // %entry
 ; CHECK-SVE-NEXT:    movi v2.2s, #10
 ; CHECK-SVE-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT:    smlal v0.2d, v2.2s, v1.2s
+; CHECK-SVE-NEXT:    smlal v0.2d, v1.2s, v2.2s
 ; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sdistribute_const1_v2i32:
@@ -2889,14 +2889,14 @@ define <2 x i64> @udistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
 ; CHECK-NEON:       // %bb.0: // %entry
 ; CHECK-NEON-NEXT:    movi v2.2s, #10
 ; CHECK-NEON-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT:    umlal v0.2d, v2.2s, v1.2s
+; CHECK-NEON-NEXT:    umlal v0.2d, v1.2s, v2.2s
 ; CHECK-NEON-NEXT:    ret
 ;
 ; CHECK-SVE-LABEL: udistribute_const1_v2i32:
 ; CHECK-SVE:       // %bb.0: // %entry
 ; CHECK-SVE-NEXT:    movi v2.2s, #10
 ; CHECK-SVE-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT:    umlal v0.2d, v2.2s, v1.2s
+; CHECK-SVE-NEXT:    umlal v0.2d, v1.2s, v2.2s
 ; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: udistribute_const1_v2i32:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index e6df9f2fb2c56..bd63548741c3b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -81,11 +81,17 @@ define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind {
 }
 
 define void @commutable_smull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
-; CHECK-LABEL: commutable_smull:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    stp q0, q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: commutable_smull:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    smull v0.2d, v1.2s, v0.2s
+; CHECK-SD-NEXT:    stp q0, q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: commutable_smull:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    smull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT:    stp q0, q0, [x0]
+; CHECK-GI-NEXT:    ret
   %1 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %B)
   %2 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %B, <2 x i32> %A)
   store <2 x i64> %1, ptr %C
@@ -138,11 +144,17 @@ define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind {
 }
 
 define void @commutable_umull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
-; CHECK-LABEL: commutable_umull:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    stp q0, q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: commutable_umull:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    umull v0.2d, v1.2s, v0.2s
+; CHECK-SD-NEXT:    stp q0, q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: commutable_umull:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT:    stp q0, q0, [x0]
+; CHECK-GI-NEXT:    ret
   %1 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %B)
   %2 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %B, <2 x i32> %A)
   store <2 x i64> %1, ptr %C
@@ -245,7 +257,7 @@ define <8 x i16> @pmull8h(ptr %A, ptr %B) nounwind {
 define void @commutable_pmull8h(<8 x i8> %A, <8 x i8> %B, ptr %C) {
 ; CHECK-LABEL: commutable_pmull8h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    pmull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    pmull v0.8h, v1.8b, v0.8b
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    ret
   %1 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %A, <8 x i8> %B)
diff --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
index b3ce9d2369104..44a38d7947d66 100644
--- a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
@@ -843,3 +843,26 @@ define i1 @cmn_nsw_neg_64(i64 %a, i64 %b) {
   %cmp = icmp sgt i64 %a, %sub
   ret i1 %cmp
 }
+
+define i1 @cmn_and_adds(i32 %num, i32 %num2, ptr %use)  {
+; CHECK-SD-LABEL: cmn_and_adds:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    adds w8, w0, w1
+; CHECK-SD-NEXT:    cset w0, lt
+; CHECK-SD-NEXT:    str w8, [x2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: cmn_and_adds:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmn w0, w1
+; CHECK-GI-NEXT:    add w9, w1, w0
+; CHECK-GI-NEXT:    cset w8, lt
+; CHECK-GI-NEXT:    str w9, [x2]
+; CHECK-GI-NEXT:    mov w0, w8
+; CHECK-GI-NEXT:    ret
+  %add = add nsw i32 %num2, %num
+  store i32 %add, ptr %use, align 4
+  %sub = sub nsw i32 0, %num2
+  %cmp = icmp slt i32 %num, %sub
+  ret i1 %cmp
+}

github-actions · 2025-09-22T18:32:41Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvmbot added the backend:AArch64 label Sep 22, 2025

AZero13 force-pushed the canon branch from e3ddf94 to caed9b5 Compare September 22, 2025 18:32

AZero13 force-pushed the canon branch from caed9b5 to a3e4412 Compare September 22, 2025 18:34

llvmbot added the llvm:SelectionDAG SelectionDAGISel as well label Sep 22, 2025

AZero13 force-pushed the canon branch 5 times, most recently from 67f2243 to 74ab3be Compare September 22, 2025 21:29

RKSimon reviewed Sep 23, 2025

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Outdated Show resolved Hide resolved

AZero13 force-pushed the canon branch 2 times, most recently from 7ee4433 to ab71df0 Compare September 23, 2025 12:58

AZero13 requested a review from RKSimon September 23, 2025 13:02

AZero13 force-pushed the canon branch 2 times, most recently from 35c35eb to ae0aa98 Compare October 14, 2025 19:17

[AArch64] Explicitly mark ADDS, ANDS, SUBS, etc as binops

9ee7be0

AZero13 force-pushed the canon branch from ae0aa98 to 9ee7be0 Compare October 14, 2025 19:19

AZero13 changed the title ~~[AArch64] Allow folding between CMN and ADDS and other flag setting nodes are commutative~~ [AArch64] Explicitly mark ADDS, ANDS, SUBS, etc as binops Oct 14, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Explicitly mark ADDS, ANDS, SUBS, etc as binops #160170

[AArch64] Explicitly mark ADDS, ANDS, SUBS, etc as binops #160170

AZero13 commented Sep 22, 2025

Uh oh!

llvmbot commented Sep 22, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Sep 22, 2025 •

edited

Loading

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[AArch64] Explicitly mark ADDS, ANDS, SUBS, etc as binops #160170

Are you sure you want to change the base?

[AArch64] Explicitly mark ADDS, ANDS, SUBS, etc as binops #160170

Conversation

AZero13 commented Sep 22, 2025

Uh oh!

llvmbot commented Sep 22, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Sep 22, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

llvmbot commented Sep 22, 2025 •

edited

Loading

github-actions bot commented Sep 22, 2025 •

edited

Loading