From 39b54ba0eadb191875a6a0fbf90a2a64dd0450a6 Mon Sep 17 00:00:00 2001 From: Marius Kamp Date: Fri, 14 Nov 2025 10:44:36 +0100 Subject: [PATCH 1/2] [AArch64] Add Tests with Common Subexpressions in CCMP Chains; NFC --- llvm/test/CodeGen/AArch64/ccmp-cse.ll | 144 ++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/ccmp-cse.ll diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll new file mode 100644 index 0000000000000..d887b57849900 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_or: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: ccmp x2, x1, #0, ls +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %or.cond = or i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_two_ors: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: ccmp x2, x1, #0, ls +; CHECK-NEXT: ccmp x0, x1, #0, hs +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %or.cond = or i1 %cmp.match, %or.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_two_ors_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: ccmp x2, x1, #0, ls +; CHECK-NEXT: ccmp x0, x1, #0, hs +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %or.cond = or i1 %or.nomatch, %cmp.match + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_and: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: ccmp x2, x1, #2, hi +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %and.cond = and i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %and.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_or_sub_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sub x8, x1, x2 +; CHECK-NEXT: ccmp x1, x2, #2, ls +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %or.cond = or i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %x, %y + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +; Negative test: We must negate the or operation, hence this must come first. +define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, ls +; CHECK-NEXT: ccmp x2, x1, #2, lo +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %and.cond = and i1 %or.nomatch, %cmp.match + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %and.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +; Negative test: There is no analogue of SUBS for floating point. +define float @test_negative_float(float %unrelated, float %x, float %y) nounwind { +; CHECK-LABEL: test_negative_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s2, s0 +; CHECK-NEXT: fsub s0, s2, s1 +; CHECK-NEXT: movi d3, #0000000000000000 +; CHECK-NEXT: fccmp s2, s1, #8, le +; CHECK-NEXT: fcsel s0, s3, s0, mi +; CHECK-NEXT: ret + %cmp.nomatch1 = fcmp olt float %y, %x + %cmp.nomatch2 = fcmp ogt float %y, %unrelated + %or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2 + %sub.noreuse = fsub float %y, %x + %res = select i1 %or.cond, float 0.0, float %sub.noreuse + ret float %res +} + +; Negative test: If both operands match a sub, do not reorder them. +define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind { +; CHECK-LABEL: test_prefer_right_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: ccmp x2, x1, #0, ls +; CHECK-NEXT: csel x8, x0, x1, lo +; CHECK-NEXT: sub x0, x2, x8 +; CHECK-NEXT: ret + %cmp.match1 = icmp ult i64 %z, %y + %cmp.match2 = icmp ugt i64 %z, %x + %or.cond = or i1 %cmp.match1, %cmp.match2 + %sub.reuse1 = sub nuw i64 %z, %y + %sub.reuse2 = sub nuw i64 %z, %x + %res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1 + ret i64 %res +} From 282d1829913d1babd3fd60454bbd9b46ea7c219b Mon Sep 17 00:00:00 2001 From: Marius Kamp Date: Fri, 14 Nov 2025 10:50:02 +0100 Subject: [PATCH 2/2] [AArch64] Reorder Comparison Trees to Facilitate CSE The AArch64 backend converts trees formed by conjunctions/disjunctions of comparisons into sequences of `CCMP` instructions. The implementation before this change checks whether a sub-tree must be processed first. If not, it processes the operations in the order they occur in the DAG. This may not be optimal if there is a corresponding `SUB` node for one of the comparisons. In this case, we should process this comparison first because we can then use the same instruction for the `SUB` node and the comparison. To achieve this, this commit comprises the following changes: - Extend `canEmitConjunction` with a new output parameter `PreferFirst`, which reports to the caller whether the sub-tree should preferably be processed first. - Set `PreferFirst` to `true` if we can find a corresponding `SUB` node in the DAG. - If we can process a sub-tree with `PreferFirst = true` first (i.e., we do not violate any `MustBeFirst` constraint by doing so), we swap the sub-trees. - The already existing code for performing the common subexpression elimination takes care to use only a single instruction for the comparison and the `SUB` node if possible. Closes #149685. --- .../Target/AArch64/AArch64ISelLowering.cpp | 41 ++++++++++++++----- llvm/test/CodeGen/AArch64/ccmp-cse.ll | 33 +++++++-------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8aea0d23ffc0a..6320b5de6af20 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3874,22 +3874,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, /// \param MustBeFirst Set to true if this subtree needs to be negated and we /// cannot do the negation naturally. We are required to /// emit the subtree first in this case. +/// \param PreferFirst Set to true if processing this subtree first may +/// result in more efficient code. /// \param WillNegate Is true if are called when the result of this /// subexpression must be negated. This happens when the /// outer expression is an OR. We can use this fact to know /// that we have a double negation (or (or ...) ...) that /// can be implemented for free. -static bool canEmitConjunction(const SDValue Val, bool &CanNegate, - bool &MustBeFirst, bool WillNegate, +static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, + bool &CanNegate, bool &MustBeFirst, + bool &PreferFirst, bool WillNegate, unsigned Depth = 0) { if (!Val.hasOneUse()) return false; unsigned Opcode = Val->getOpcode(); if (Opcode == ISD::SETCC) { - if (Val->getOperand(0).getValueType() == MVT::f128) + EVT VT = Val->getOperand(0).getValueType(); + if (VT == MVT::f128) return false; CanNegate = true; MustBeFirst = false; + // Designate this operation as a preferred first operation if the result + // of a SUB operation can be reused. + PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT), + {Val->getOperand(0), Val->getOperand(1)}); return true; } // Protect against exponential runtime and stack overflow. @@ -3901,11 +3909,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate, SDValue O1 = Val->getOperand(1); bool CanNegateL; bool MustBeFirstL; - if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) + bool PreferFirstL; + if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL, + IsOR, Depth + 1)) return false; bool CanNegateR; bool MustBeFirstR; - if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) + bool PreferFirstR; + if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR, + IsOR, Depth + 1)) return false; if (MustBeFirstL && MustBeFirstR) @@ -3928,6 +3940,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate, CanNegate = false; MustBeFirst = MustBeFirstL || MustBeFirstR; } + PreferFirst = PreferFirstL || PreferFirstR; return true; } return false; @@ -3989,19 +4002,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, SDValue LHS = Val->getOperand(0); bool CanNegateL; bool MustBeFirstL; - bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR); + bool PreferFirstL; + bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL, + PreferFirstL, IsOR); assert(ValidL && "Valid conjunction/disjunction tree"); (void)ValidL; SDValue RHS = Val->getOperand(1); bool CanNegateR; bool MustBeFirstR; - bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR); + bool PreferFirstR; + bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR, + PreferFirstR, IsOR); assert(ValidR && "Valid conjunction/disjunction tree"); (void)ValidR; - // Swap sub-tree that must come first to the right side. - if (MustBeFirstL) { + bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR; + + // Swap sub-tree that must or should come first to the right side. + if (MustBeFirstL || ShouldFirstL) { assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); std::swap(LHS, RHS); std::swap(CanNegateL, CanNegateR); @@ -4057,7 +4076,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC) { bool DummyCanNegate; bool DummyMustBeFirst; - if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false)) + bool DummyPreferFirst; + if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst, + DummyPreferFirst, false)) return SDValue(); return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll index d887b57849900..657498172a04c 100644 --- a/llvm/test/CodeGen/AArch64/ccmp-cse.ll +++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll @@ -4,10 +4,9 @@ define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind { ; CHECK-LABEL: test_single_or: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x2, x0 -; CHECK-NEXT: sub x8, x2, x1 -; CHECK-NEXT: ccmp x2, x1, #0, ls -; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi ; CHECK-NEXT: ret %cmp.match = icmp ult i64 %y, %x %cmp.nomatch = icmp ugt i64 %y, %unrelated @@ -20,11 +19,10 @@ define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind { define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind { ; CHECK-LABEL: test_two_ors: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x2, x0 -; CHECK-NEXT: sub x8, x2, x1 -; CHECK-NEXT: ccmp x2, x1, #0, ls +; CHECK-NEXT: subs x8, x2, x1 ; CHECK-NEXT: ccmp x0, x1, #0, hs -; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi ; CHECK-NEXT: ret %cmp.match = icmp ult i64 %y, %x %cmp.nomatch1 = icmp ult i64 %unrelated, %x @@ -39,11 +37,10 @@ define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind { define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { ; CHECK-LABEL: test_two_ors_commuted: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x2, x0 -; CHECK-NEXT: sub x8, x2, x1 -; CHECK-NEXT: ccmp x2, x1, #0, ls +; CHECK-NEXT: subs x8, x2, x1 ; CHECK-NEXT: ccmp x0, x1, #0, hs -; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi ; CHECK-NEXT: ret %cmp.match = icmp ult i64 %y, %x %cmp.nomatch1 = icmp ult i64 %unrelated, %x @@ -58,10 +55,9 @@ define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind { ; CHECK-LABEL: test_single_and: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x2, x0 -; CHECK-NEXT: sub x8, x2, x1 -; CHECK-NEXT: ccmp x2, x1, #2, hi -; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x2, x0, #0, lo +; CHECK-NEXT: csel x0, xzr, x8, hi ; CHECK-NEXT: ret %cmp.match = icmp ult i64 %y, %x %cmp.nomatch = icmp ugt i64 %y, %unrelated @@ -74,9 +70,8 @@ define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind { define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { ; CHECK-LABEL: test_single_or_sub_commuted: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x2, x0 -; CHECK-NEXT: sub x8, x1, x2 -; CHECK-NEXT: ccmp x1, x2, #2, ls +; CHECK-NEXT: subs x8, x1, x2 +; CHECK-NEXT: ccmp x2, x0, #2, ls ; CHECK-NEXT: csel x0, xzr, x8, hi ; CHECK-NEXT: ret %cmp.match = icmp ult i64 %y, %x