From 39b54ba0eadb191875a6a0fbf90a2a64dd0450a6 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk@posteo.org>
Date: Fri, 14 Nov 2025 10:44:36 +0100
Subject: [PATCH 1/2] [AArch64] Add Tests with Common Subexpressions in CCMP
 Chains; NFC

---
 llvm/test/CodeGen/AArch64/ccmp-cse.ll | 144 ++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/ccmp-cse.ll

diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
new file mode 100644
index 0000000000000..d887b57849900
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    sub x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x1, #0, ls
+; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch = icmp ugt i64 %y, %unrelated
+  %or.cond = or i1 %cmp.match, %cmp.nomatch
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    sub x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x1, #0, ls
+; CHECK-NEXT:    ccmp x0, x1, #0, hs
+; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+  %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+  %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %or.cond = or i1 %cmp.match, %or.nomatch
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    sub x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x1, #0, ls
+; CHECK-NEXT:    ccmp x0, x1, #0, hs
+; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+  %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+  %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %or.cond = or i1 %or.nomatch, %cmp.match
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_and:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    sub x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x1, #2, hi
+; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch = icmp ugt i64 %y, %unrelated
+  %and.cond = and i1 %cmp.match, %cmp.nomatch
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or_sub_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    sub x8, x1, x2
+; CHECK-NEXT:    ccmp x1, x2, #2, ls
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch = icmp ugt i64 %y, %unrelated
+  %or.cond = or i1 %cmp.match, %cmp.nomatch
+  %sub.reuse = sub nuw i64 %x, %y
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+; Negative test: We must negate the or operation, hence this must come first.
+define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    sub x8, x2, x1
+; CHECK-NEXT:    ccmp x0, x1, #0, ls
+; CHECK-NEXT:    ccmp x2, x1, #2, lo
+; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+  %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+  %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %and.cond = and i1 %or.nomatch, %cmp.match
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+; Negative test: There is no analogue of SUBS for floating point.
+define float @test_negative_float(float %unrelated, float %x, float %y) nounwind {
+; CHECK-LABEL: test_negative_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s2, s0
+; CHECK-NEXT:    fsub s0, s2, s1
+; CHECK-NEXT:    movi d3, #0000000000000000
+; CHECK-NEXT:    fccmp s2, s1, #8, le
+; CHECK-NEXT:    fcsel s0, s3, s0, mi
+; CHECK-NEXT:    ret
+  %cmp.nomatch1 = fcmp olt float %y, %x
+  %cmp.nomatch2 = fcmp ogt float %y, %unrelated
+  %or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %sub.noreuse = fsub float %y, %x
+  %res = select i1 %or.cond, float 0.0, float %sub.noreuse
+  ret float %res
+}
+
+; Negative test: If both operands match a sub, do not reorder them.
+define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind {
+; CHECK-LABEL: test_prefer_right_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    ccmp x2, x1, #0, ls
+; CHECK-NEXT:    csel x8, x0, x1, lo
+; CHECK-NEXT:    sub x0, x2, x8
+; CHECK-NEXT:    ret
+  %cmp.match1 = icmp ult i64 %z, %y
+  %cmp.match2 = icmp ugt i64 %z, %x
+  %or.cond = or i1 %cmp.match1, %cmp.match2
+  %sub.reuse1 = sub nuw i64 %z, %y
+  %sub.reuse2 = sub nuw i64 %z, %x
+  %res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1
+  ret i64 %res
+}

From 282d1829913d1babd3fd60454bbd9b46ea7c219b Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk@posteo.org>
Date: Fri, 14 Nov 2025 10:50:02 +0100
Subject: [PATCH 2/2] [AArch64] Reorder Comparison Trees to Facilitate CSE

The AArch64 backend converts trees formed by conjunctions/disjunctions
of comparisons into sequences of `CCMP` instructions. The implementation
before this change checks whether a sub-tree must be processed first. If
not, it processes the operations in the order they occur in the DAG.

This may not be optimal if there is a corresponding `SUB` node for one
of the comparisons. In this case, we should process this comparison
first because we can then use the same instruction for the `SUB` node
and the comparison.

To achieve this, this commit comprises the following changes:

- Extend `canEmitConjunction` with a new output parameter `PreferFirst`,
  which reports to the caller whether the sub-tree should preferably be
  processed first.
- Set `PreferFirst` to `true` if we can find a corresponding `SUB` node
  in the DAG.
- If we can process a sub-tree with `PreferFirst = true` first (i.e., we
  do not violate any `MustBeFirst` constraint by doing so), we swap the
  sub-trees.
- The already existing code for performing the common subexpression
  elimination takes care to use only a single instruction for the
  comparison and the `SUB` node if possible.

Closes #149685.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 41 ++++++++++++++-----
 llvm/test/CodeGen/AArch64/ccmp-cse.ll         | 33 +++++++--------
 2 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8aea0d23ffc0a..6320b5de6af20 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3874,22 +3874,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
 /// \param MustBeFirst  Set to true if this subtree needs to be negated and we
 ///                     cannot do the negation naturally. We are required to
 ///                     emit the subtree first in this case.
+/// \param PreferFirst  Set to true if processing this subtree first may
+///                     result in more efficient code.
 /// \param WillNegate   Is true if are called when the result of this
 ///                     subexpression must be negated. This happens when the
 ///                     outer expression is an OR. We can use this fact to know
 ///                     that we have a double negation (or (or ...) ...) that
 ///                     can be implemented for free.
-static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
-                               bool &MustBeFirst, bool WillNegate,
+static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val,
+                               bool &CanNegate, bool &MustBeFirst,
+                               bool &PreferFirst, bool WillNegate,
                                unsigned Depth = 0) {
   if (!Val.hasOneUse())
     return false;
   unsigned Opcode = Val->getOpcode();
   if (Opcode == ISD::SETCC) {
-    if (Val->getOperand(0).getValueType() == MVT::f128)
+    EVT VT = Val->getOperand(0).getValueType();
+    if (VT == MVT::f128)
       return false;
     CanNegate = true;
     MustBeFirst = false;
+    // Designate this operation as a preferred first operation if the result
+    // of a SUB operation can be reused.
+    PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT),
+                                    {Val->getOperand(0), Val->getOperand(1)});
     return true;
   }
   // Protect against exponential runtime and stack overflow.
@@ -3901,11 +3909,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
     SDValue O1 = Val->getOperand(1);
     bool CanNegateL;
     bool MustBeFirstL;
-    if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
+    bool PreferFirstL;
+    if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL,
+                            IsOR, Depth + 1))
       return false;
     bool CanNegateR;
     bool MustBeFirstR;
-    if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
+    bool PreferFirstR;
+    if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR,
+                            IsOR, Depth + 1))
       return false;
 
     if (MustBeFirstL && MustBeFirstR)
@@ -3928,6 +3940,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
       CanNegate = false;
       MustBeFirst = MustBeFirstL || MustBeFirstR;
     }
+    PreferFirst = PreferFirstL || PreferFirstR;
     return true;
   }
   return false;
@@ -3989,19 +4002,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
   SDValue LHS = Val->getOperand(0);
   bool CanNegateL;
   bool MustBeFirstL;
-  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
+  bool PreferFirstL;
+  bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL,
+                                   PreferFirstL, IsOR);
   assert(ValidL && "Valid conjunction/disjunction tree");
   (void)ValidL;
 
   SDValue RHS = Val->getOperand(1);
   bool CanNegateR;
   bool MustBeFirstR;
-  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
+  bool PreferFirstR;
+  bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR,
+                                   PreferFirstR, IsOR);
   assert(ValidR && "Valid conjunction/disjunction tree");
   (void)ValidR;
 
-  // Swap sub-tree that must come first to the right side.
-  if (MustBeFirstL) {
+  bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
+
+  // Swap sub-tree that must or should come first to the right side.
+  if (MustBeFirstL || ShouldFirstL) {
     assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
     std::swap(LHS, RHS);
     std::swap(CanNegateL, CanNegateR);
@@ -4057,7 +4076,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
                                AArch64CC::CondCode &OutCC) {
   bool DummyCanNegate;
   bool DummyMustBeFirst;
-  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
+  bool DummyPreferFirst;
+  if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst,
+                          DummyPreferFirst, false))
     return SDValue();
 
   return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
index d887b57849900..657498172a04c 100644
--- a/llvm/test/CodeGen/AArch64/ccmp-cse.ll
+++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
@@ -4,10 +4,9 @@
 define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: test_single_or:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp x2, x0
-; CHECK-NEXT:    sub x8, x2, x1
-; CHECK-NEXT:    ccmp x2, x1, #0, ls
-; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x0, #2, hs
+; CHECK-NEXT:    csel x0, xzr, x8, hi
 ; CHECK-NEXT:    ret
   %cmp.match = icmp ult i64 %y, %x
   %cmp.nomatch = icmp ugt i64 %y, %unrelated
@@ -20,11 +19,10 @@ define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
 define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: test_two_ors:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp x2, x0
-; CHECK-NEXT:    sub x8, x2, x1
-; CHECK-NEXT:    ccmp x2, x1, #0, ls
+; CHECK-NEXT:    subs x8, x2, x1
 ; CHECK-NEXT:    ccmp x0, x1, #0, hs
-; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    ccmp x2, x0, #2, hs
+; CHECK-NEXT:    csel x0, xzr, x8, hi
 ; CHECK-NEXT:    ret
   %cmp.match = icmp ult i64 %y, %x
   %cmp.nomatch1 = icmp ult i64 %unrelated, %x
@@ -39,11 +37,10 @@ define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
 define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: test_two_ors_commuted:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp x2, x0
-; CHECK-NEXT:    sub x8, x2, x1
-; CHECK-NEXT:    ccmp x2, x1, #0, ls
+; CHECK-NEXT:    subs x8, x2, x1
 ; CHECK-NEXT:    ccmp x0, x1, #0, hs
-; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    ccmp x2, x0, #2, hs
+; CHECK-NEXT:    csel x0, xzr, x8, hi
 ; CHECK-NEXT:    ret
   %cmp.match = icmp ult i64 %y, %x
   %cmp.nomatch1 = icmp ult i64 %unrelated, %x
@@ -58,10 +55,9 @@ define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
 define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: test_single_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp x2, x0
-; CHECK-NEXT:    sub x8, x2, x1
-; CHECK-NEXT:    ccmp x2, x1, #2, hi
-; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x0, #0, lo
+; CHECK-NEXT:    csel x0, xzr, x8, hi
 ; CHECK-NEXT:    ret
   %cmp.match = icmp ult i64 %y, %x
   %cmp.nomatch = icmp ugt i64 %y, %unrelated
@@ -74,9 +70,8 @@ define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
 define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: test_single_or_sub_commuted:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp x2, x0
-; CHECK-NEXT:    sub x8, x1, x2
-; CHECK-NEXT:    ccmp x1, x2, #2, ls
+; CHECK-NEXT:    subs x8, x1, x2
+; CHECK-NEXT:    ccmp x2, x0, #2, ls
 ; CHECK-NEXT:    csel x0, xzr, x8, hi
 ; CHECK-NEXT:    ret
   %cmp.match = icmp ult i64 %y, %x