Skip to content

Conversation

AZero13
Copy link
Contributor

@AZero13 AZero13 commented Sep 22, 2025

This also means overriding isBinOp and isCommutativeBinOp.

@llvmbot
Copy link
Member

llvmbot commented Sep 22, 2025

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-aarch64

Author: AZero13 (AZero13)

Changes

This also means overriding isBinOp and isCommutativeBinOp.


Full diff: https://github.com/llvm/llvm-project/pull/160170.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+37-6)
  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+5)
  • (modified) llvm/test/CodeGen/AArch64/aarch64-smull.ll (+8-8)
  • (modified) llvm/test/CodeGen/AArch64/arm64-vmul.ll (+23-11)
  • (modified) llvm/test/CodeGen/AArch64/cmp-to-cmn.ll (+23)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cd7f0e719ad0c..7f674d72868a4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17107,6 +17107,27 @@ bool AArch64TargetLowering::shouldRemoveRedundantExtend(SDValue Extend) const {
   return true;
 }
 
+bool AArch64TargetLowering::isBinOp(unsigned Opcode) const {
+  switch (Opcode) {
+  // TODO: Add more?
+  case AArch64ISD::SUBS:
+    return true;
+  }
+  return TargetLoweringBase::isBinOp(Opcode);
+}
+
+bool AArch64TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
+  switch (Opcode) {
+  case AArch64ISD::ANDS:
+  case AArch64ISD::ADDS:
+  case AArch64ISD::PMULL:
+  case AArch64ISD::SMULL:
+  case AArch64ISD::UMULL:
+    return true;
+  }
+  return TargetLoweringBase::isCommutativeBinOp(Opcode);
+}
+
 // Truncations from 64-bit GPR to 32-bit GPR is free.
 bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
@@ -25996,7 +26017,7 @@ static SDValue performSETCCCombine(SDNode *N,
 // (eg AND) if the flag is unused.
 static SDValue performFlagSettingCombine(SDNode *N,
                                          TargetLowering::DAGCombinerInfo &DCI,
-                                         unsigned GenericOpcode) {
+                                         unsigned GenericOpcode, bool isCommutative) {
   SDLoc DL(N);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
@@ -26013,6 +26034,16 @@ static SDValue performFlagSettingCombine(SDNode *N,
           GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
     DCI.CombineTo(Generic, SDValue(N, 0));
 
+  // Not every non-commutative opcode isn't commutative. By that, ADCS is not
+  // considered commutative by the rest of the codebase as ADCS has a
+  // non-commutative flag. However, other than that, the operands don't matter
+  // for ADCS.
+  if (isCommutative) {
+    if (SDNode *Generic = DCI.DAG.getNodeIfExists(
+            GenericOpcode, DCI.DAG.getVTList(VT), {RHS, LHS}))
+      DCI.CombineTo(Generic, SDValue(N, 0));
+  }
+
   return SDValue();
 }
 
@@ -27547,7 +27578,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::TRUNCATE:
     return performTruncateCombine(N, DAG, DCI);
   case AArch64ISD::ANDS:
-    return performFlagSettingCombine(N, DCI, ISD::AND);
+    return performFlagSettingCombine(N, DCI, ISD::AND, true);
   case AArch64ISD::ADC:
     if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
       return R;
@@ -27557,15 +27588,15 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case AArch64ISD::ADCS:
     if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
       return R;
-    return performFlagSettingCombine(N, DCI, AArch64ISD::ADC);
+    return performFlagSettingCombine(N, DCI, AArch64ISD::ADC, true);
   case AArch64ISD::SBCS:
     if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
       return R;
-    return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
+    return performFlagSettingCombine(N, DCI, AArch64ISD::SBC, false);
   case AArch64ISD::ADDS:
-    return performFlagSettingCombine(N, DCI, ISD::ADD);
+    return performFlagSettingCombine(N, DCI, ISD::ADD, true);
   case AArch64ISD::SUBS:
-    return performFlagSettingCombine(N, DCI, ISD::SUB);
+    return performFlagSettingCombine(N, DCI, ISD::SUB, false);
   case AArch64ISD::BICi: {
     APInt DemandedBits =
         APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ff073d3eafb1f..c1ec703acca33 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -250,6 +250,11 @@ class AArch64TargetLowering : public TargetLowering {
   bool isLegalAddScalableImmediate(int64_t) const override;
   bool isLegalICmpImmediate(int64_t) const override;
 
+  /// Add AArch64-specific opcodes to the default list.
+  bool isBinOp(unsigned Opcode) const override;
+
+  bool isCommutativeBinOp(unsigned Opcode) const override;
+
   bool isMulAddWithConstProfitable(SDValue AddNode,
                                    SDValue ConstNode) const override;
 
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 6e5c666bdbc75..72a4377034f21 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -2459,14 +2459,14 @@ define <8 x i16> @sdistribute_const1_v8i8(<8 x i8> %src1, <8 x i8> %mul) {
 ; CHECK-NEON:       // %bb.0: // %entry
 ; CHECK-NEON-NEXT:    movi v2.8b, #10
 ; CHECK-NEON-NEXT:    smull v0.8h, v0.8b, v1.8b
-; CHECK-NEON-NEXT:    smlal v0.8h, v2.8b, v1.8b
+; CHECK-NEON-NEXT:    smlal v0.8h, v1.8b, v2.8b
 ; CHECK-NEON-NEXT:    ret
 ;
 ; CHECK-SVE-LABEL: sdistribute_const1_v8i8:
 ; CHECK-SVE:       // %bb.0: // %entry
 ; CHECK-SVE-NEXT:    movi v2.8b, #10
 ; CHECK-SVE-NEXT:    smull v0.8h, v0.8b, v1.8b
-; CHECK-SVE-NEXT:    smlal v0.8h, v2.8b, v1.8b
+; CHECK-SVE-NEXT:    smlal v0.8h, v1.8b, v2.8b
 ; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sdistribute_const1_v8i8:
@@ -2546,14 +2546,14 @@ define <8 x i16> @udistribute_const1_v8i8(<8 x i8> %src1, <8 x i8> %mul) {
 ; CHECK-NEON:       // %bb.0: // %entry
 ; CHECK-NEON-NEXT:    movi v2.8b, #10
 ; CHECK-NEON-NEXT:    umull v0.8h, v0.8b, v1.8b
-; CHECK-NEON-NEXT:    umlal v0.8h, v2.8b, v1.8b
+; CHECK-NEON-NEXT:    umlal v0.8h, v1.8b, v2.8b
 ; CHECK-NEON-NEXT:    ret
 ;
 ; CHECK-SVE-LABEL: udistribute_const1_v8i8:
 ; CHECK-SVE:       // %bb.0: // %entry
 ; CHECK-SVE-NEXT:    movi v2.8b, #10
 ; CHECK-SVE-NEXT:    umull v0.8h, v0.8b, v1.8b
-; CHECK-SVE-NEXT:    umlal v0.8h, v2.8b, v1.8b
+; CHECK-SVE-NEXT:    umlal v0.8h, v1.8b, v2.8b
 ; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: udistribute_const1_v8i8:
@@ -2779,14 +2779,14 @@ define <2 x i64> @sdistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
 ; CHECK-NEON:       // %bb.0: // %entry
 ; CHECK-NEON-NEXT:    movi v2.2s, #10
 ; CHECK-NEON-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT:    smlal v0.2d, v2.2s, v1.2s
+; CHECK-NEON-NEXT:    smlal v0.2d, v1.2s, v2.2s
 ; CHECK-NEON-NEXT:    ret
 ;
 ; CHECK-SVE-LABEL: sdistribute_const1_v2i32:
 ; CHECK-SVE:       // %bb.0: // %entry
 ; CHECK-SVE-NEXT:    movi v2.2s, #10
 ; CHECK-SVE-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT:    smlal v0.2d, v2.2s, v1.2s
+; CHECK-SVE-NEXT:    smlal v0.2d, v1.2s, v2.2s
 ; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sdistribute_const1_v2i32:
@@ -2889,14 +2889,14 @@ define <2 x i64> @udistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
 ; CHECK-NEON:       // %bb.0: // %entry
 ; CHECK-NEON-NEXT:    movi v2.2s, #10
 ; CHECK-NEON-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT:    umlal v0.2d, v2.2s, v1.2s
+; CHECK-NEON-NEXT:    umlal v0.2d, v1.2s, v2.2s
 ; CHECK-NEON-NEXT:    ret
 ;
 ; CHECK-SVE-LABEL: udistribute_const1_v2i32:
 ; CHECK-SVE:       // %bb.0: // %entry
 ; CHECK-SVE-NEXT:    movi v2.2s, #10
 ; CHECK-SVE-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT:    umlal v0.2d, v2.2s, v1.2s
+; CHECK-SVE-NEXT:    umlal v0.2d, v1.2s, v2.2s
 ; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: udistribute_const1_v2i32:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index e6df9f2fb2c56..bd63548741c3b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -81,11 +81,17 @@ define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind {
 }
 
 define void @commutable_smull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
-; CHECK-LABEL: commutable_smull:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    stp q0, q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: commutable_smull:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    smull v0.2d, v1.2s, v0.2s
+; CHECK-SD-NEXT:    stp q0, q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: commutable_smull:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    smull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT:    stp q0, q0, [x0]
+; CHECK-GI-NEXT:    ret
   %1 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %B)
   %2 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %B, <2 x i32> %A)
   store <2 x i64> %1, ptr %C
@@ -138,11 +144,17 @@ define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind {
 }
 
 define void @commutable_umull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
-; CHECK-LABEL: commutable_umull:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    stp q0, q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: commutable_umull:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    umull v0.2d, v1.2s, v0.2s
+; CHECK-SD-NEXT:    stp q0, q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: commutable_umull:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT:    stp q0, q0, [x0]
+; CHECK-GI-NEXT:    ret
   %1 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %B)
   %2 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %B, <2 x i32> %A)
   store <2 x i64> %1, ptr %C
@@ -245,7 +257,7 @@ define <8 x i16> @pmull8h(ptr %A, ptr %B) nounwind {
 define void @commutable_pmull8h(<8 x i8> %A, <8 x i8> %B, ptr %C) {
 ; CHECK-LABEL: commutable_pmull8h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    pmull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    pmull v0.8h, v1.8b, v0.8b
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    ret
   %1 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %A, <8 x i8> %B)
diff --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
index b3ce9d2369104..44a38d7947d66 100644
--- a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
@@ -843,3 +843,26 @@ define i1 @cmn_nsw_neg_64(i64 %a, i64 %b) {
   %cmp = icmp sgt i64 %a, %sub
   ret i1 %cmp
 }
+
+define i1 @cmn_and_adds(i32 %num, i32 %num2, ptr %use)  {
+; CHECK-SD-LABEL: cmn_and_adds:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    adds w8, w0, w1
+; CHECK-SD-NEXT:    cset w0, lt
+; CHECK-SD-NEXT:    str w8, [x2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: cmn_and_adds:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmn w0, w1
+; CHECK-GI-NEXT:    add w9, w1, w0
+; CHECK-GI-NEXT:    cset w8, lt
+; CHECK-GI-NEXT:    str w9, [x2]
+; CHECK-GI-NEXT:    mov w0, w8
+; CHECK-GI-NEXT:    ret
+  %add = add nsw i32 %num2, %num
+  store i32 %add, ptr %use, align 4
+  %sub = sub nsw i32 0, %num2
+  %cmp = icmp slt i32 %num, %sub
+  ret i1 %cmp
+}

Copy link

github-actions bot commented Sep 22, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@llvmbot llvmbot added the llvm:SelectionDAG SelectionDAGISel as well label Sep 22, 2025
@AZero13 AZero13 force-pushed the canon branch 5 times, most recently from 67f2243 to 74ab3be Compare September 22, 2025 21:29
@AZero13 AZero13 force-pushed the canon branch 2 times, most recently from 7ee4433 to ab71df0 Compare September 23, 2025 12:58
@AZero13 AZero13 requested a review from RKSimon September 23, 2025 13:02
@AZero13 AZero13 force-pushed the canon branch 2 times, most recently from 35c35eb to ae0aa98 Compare October 14, 2025 19:17
@AZero13 AZero13 changed the title [AArch64] Allow folding between CMN and ADDS and other flag setting nodes are commutative [AArch64] Explicitly mark ADDS, ANDS, SUBS, etc as binops Oct 14, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants