diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8aea0d23ffc0a..6320b5de6af20 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3874,22 +3874,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, /// \param MustBeFirst Set to true if this subtree needs to be negated and we /// cannot do the negation naturally. We are required to /// emit the subtree first in this case. +/// \param PreferFirst Set to true if processing this subtree first may +/// result in more efficient code. /// \param WillNegate Is true if are called when the result of this /// subexpression must be negated. This happens when the /// outer expression is an OR. We can use this fact to know /// that we have a double negation (or (or ...) ...) that /// can be implemented for free. -static bool canEmitConjunction(const SDValue Val, bool &CanNegate, - bool &MustBeFirst, bool WillNegate, +static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, + bool &CanNegate, bool &MustBeFirst, + bool &PreferFirst, bool WillNegate, unsigned Depth = 0) { if (!Val.hasOneUse()) return false; unsigned Opcode = Val->getOpcode(); if (Opcode == ISD::SETCC) { - if (Val->getOperand(0).getValueType() == MVT::f128) + EVT VT = Val->getOperand(0).getValueType(); + if (VT == MVT::f128) return false; CanNegate = true; MustBeFirst = false; + // Designate this operation as a preferred first operation if the result + // of a SUB operation can be reused. + PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT), + {Val->getOperand(0), Val->getOperand(1)}); return true; } // Protect against exponential runtime and stack overflow. @@ -3901,11 +3909,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate, SDValue O1 = Val->getOperand(1); bool CanNegateL; bool MustBeFirstL; - if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) + bool PreferFirstL; + if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL, + IsOR, Depth + 1)) return false; bool CanNegateR; bool MustBeFirstR; - if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) + bool PreferFirstR; + if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR, + IsOR, Depth + 1)) return false; if (MustBeFirstL && MustBeFirstR) @@ -3928,6 +3940,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate, CanNegate = false; MustBeFirst = MustBeFirstL || MustBeFirstR; } + PreferFirst = PreferFirstL || PreferFirstR; return true; } return false; @@ -3989,19 +4002,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, SDValue LHS = Val->getOperand(0); bool CanNegateL; bool MustBeFirstL; - bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR); + bool PreferFirstL; + bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL, + PreferFirstL, IsOR); assert(ValidL && "Valid conjunction/disjunction tree"); (void)ValidL; SDValue RHS = Val->getOperand(1); bool CanNegateR; bool MustBeFirstR; - bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR); + bool PreferFirstR; + bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR, + PreferFirstR, IsOR); assert(ValidR && "Valid conjunction/disjunction tree"); (void)ValidR; - // Swap sub-tree that must come first to the right side. - if (MustBeFirstL) { + bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR; + + // Swap sub-tree that must or should come first to the right side. + if (MustBeFirstL || ShouldFirstL) { assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); std::swap(LHS, RHS); std::swap(CanNegateL, CanNegateR); @@ -4057,7 +4076,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC) { bool DummyCanNegate; bool DummyMustBeFirst; - if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false)) + bool DummyPreferFirst; + if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst, + DummyPreferFirst, false)) return SDValue(); return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll new file mode 100644 index 0000000000000..657498172a04c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_or: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %or.cond = or i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_two_ors: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, hs +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %or.cond = or i1 %cmp.match, %or.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_two_ors_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, hs +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %or.cond = or i1 %or.nomatch, %cmp.match + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_and: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x2, x0, #0, lo +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %and.cond = and i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %and.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_or_sub_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x1, x2 +; CHECK-NEXT: ccmp x2, x0, #2, ls +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %or.cond = or i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %x, %y + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +; Negative test: We must negate the or operation, hence this must come first. +define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, ls +; CHECK-NEXT: ccmp x2, x1, #2, lo +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %and.cond = and i1 %or.nomatch, %cmp.match + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %and.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +; Negative test: There is no analogue of SUBS for floating point. +define float @test_negative_float(float %unrelated, float %x, float %y) nounwind { +; CHECK-LABEL: test_negative_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s2, s0 +; CHECK-NEXT: fsub s0, s2, s1 +; CHECK-NEXT: movi d3, #0000000000000000 +; CHECK-NEXT: fccmp s2, s1, #8, le +; CHECK-NEXT: fcsel s0, s3, s0, mi +; CHECK-NEXT: ret + %cmp.nomatch1 = fcmp olt float %y, %x + %cmp.nomatch2 = fcmp ogt float %y, %unrelated + %or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2 + %sub.noreuse = fsub float %y, %x + %res = select i1 %or.cond, float 0.0, float %sub.noreuse + ret float %res +} + +; Negative test: If both operands match a sub, do not reorder them. +define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind { +; CHECK-LABEL: test_prefer_right_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: ccmp x2, x1, #0, ls +; CHECK-NEXT: csel x8, x0, x1, lo +; CHECK-NEXT: sub x0, x2, x8 +; CHECK-NEXT: ret + %cmp.match1 = icmp ult i64 %z, %y + %cmp.match2 = icmp ugt i64 %z, %x + %or.cond = or i1 %cmp.match1, %cmp.match2 + %sub.reuse1 = sub nuw i64 %z, %y + %sub.reuse2 = sub nuw i64 %z, %x + %res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1 + ret i64 %res +}