-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[DAGCombiner] Improve FMin/FMax DAGCombines #161352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Add several improvements to DAGCombine patterns for fmin/fmax: * Fix incorrect results due to minimumnum not being marked as IsMin - e.g. nnan minimumnum(x, +inf) returned +inf instead of x * Fix incorrect results checking maximumnum for vecreduce patterns * Make maxnum/minnum return QNaN if one input is SNaN instead of X * Quiet SNaN inputs when propagating them e.g. maximum(x, SNaN) = QNaN * Update comments to mark when SNaN propagation is being ignored
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Lewis Crawford (LewisCrawford) ChangesAdd several improvements to DAGCombine patterns for fmin/fmax:
Full diff: https://github.com/llvm/llvm-project/pull/161352.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 204e1f0c75e00..05d93661352ed 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19323,8 +19323,10 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) {
EVT VT = N->getValueType(0);
const SDNodeFlags Flags = N->getFlags();
unsigned Opc = N->getOpcode();
- bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
- bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
+ bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
+ bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM || Opc == ISD::FMAXNUM;
+ bool IsMin =
+ Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM || Opc == ISD::FMINIMUMNUM;
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// Constant fold.
@@ -19339,34 +19341,53 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) {
if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
const APFloat &AF = N1CFP->getValueAPF();
- // minnum(X, nan) -> X
- // maxnum(X, nan) -> X
- // minimum(X, nan) -> nan
- // maximum(X, nan) -> nan
- if (AF.isNaN())
- return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
+ // minnum(X, qnan) -> X
+ // maxnum(X, qnan) -> X
+ // minnum(X, snan) -> qnan
+ // maxnum(X, snan) -> qnan
+ // minimum(X, nan) -> qnan
+ // maximum(X, nan) -> qnan
+ // minimumnum(X, nan) -> X
+ // maximumnum(X, nan) -> X
+ if (AF.isNaN()) {
+ if (PropAllNaNsToQNaNs || (AF.isSignaling() && PropOnlySNaNsToQNaNs)) {
+ if (AF.isSignaling())
+ return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT);
+ return N->getOperand(1);
+ }
+ return N->getOperand(0);
+ }
// In the following folds, inf can be replaced with the largest finite
// float, if the ninf flag is set.
if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
- // minnum(X, -inf) -> -inf
- // maxnum(X, +inf) -> +inf
+ // minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
+ // maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
// minimum(X, -inf) -> -inf if nnan
// maximum(X, +inf) -> +inf if nnan
- if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
+ // minimumnum(X, -inf) -> -inf
+ // maximumnum(X, +inf) -> +inf
+ if (IsMin == AF.isNegative() &&
+ (!PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
return N->getOperand(1);
// minnum(X, +inf) -> X if nnan
// maxnum(X, -inf) -> X if nnan
- // minimum(X, +inf) -> X
- // maximum(X, -inf) -> X
- if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
+ // minimum(X, +inf) -> X (ignoring quieting of sNaNs)
+ // maximum(X, -inf) -> X (ignoring quieting of sNaNs)
+ // minimumnum(X, +inf) -> X if nnan
+ // maximumnum(X, -inf) -> X if nnan
+ if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
return N->getOperand(0);
}
}
+ // There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM
+ if (Opc == ISD::FMINIMUMNUM || Opc == ISD::FMAXIMUMNUM)
+ return SDValue();
+
if (SDValue SD = reassociateReduction(
- PropagatesNaN
+ PropAllNaNsToQNaNs
? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
: (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
Opc, SDLoc(N), VT, N0, N1, Flags))
diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll
index d6252cc85e8b4..4fae327826cbe 100644
--- a/llvm/test/CodeGen/X86/fmaxnum.ll
+++ b/llvm/test/CodeGen/X86/fmaxnum.ll
@@ -653,5 +653,29 @@ define float @test_maxnum_const_nan(float %x) {
ret float %r
}
+; nnan maxnum(X, -inf) -> X
+define float @test_maxnum_neg_inf_nnan(float %x) nounwind {
+; CHECK-LABEL: test_maxnum_neg_inf_nnan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+ %r = call nnan float @llvm.maxnum.f32(float %x, float 0xfff0000000000000)
+ ret float %r
+}
+
+; Test SNaN quieting
+define float @test_maxnum_snan(float %x) {
+; SSE-LABEL: test_maxnum_snan:
+; SSE: # %bb.0:
+; SSE-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_maxnum_snan:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: retq
+ %r = call float @llvm.maxnum.f32(float 0x7ff4000000000000, float %x)
+ ret float %r
+}
+
attributes #0 = { "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index 864c2336f37c2..670005c03c27d 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -2649,3 +2649,96 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
%r = call <4 x bfloat> @llvm.maximum.v4bf16(<4 x bfloat> %x, <4 x bfloat> %y)
ret <4 x bfloat> %r
}
+
+; nnan minimum(X, +inf) -> X
+define float @test_fminimum_inf_nnan(float %x) nounwind {
+; SSE2-LABEL: test_fminimum_inf_nnan:
+; SSE2: # %bb.0:
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fminimum_inf_nnan:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fminimum_inf_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fminimum_inf_nnan:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+ %1 = call nnan float @llvm.minimum.f32(float %x, float 0x7ff0000000000000)
+ ret float %1
+}
+
+; nnan maximum(X, -inf) -> X
+define float @test_fmaximum_neg_inf_nnan(float %x) nounwind {
+; SSE2-LABEL: test_fmaximum_neg_inf_nnan:
+; SSE2: # %bb.0:
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fmaximum_neg_inf_nnan:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximum_neg_inf_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximum_neg_inf_nnan:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+ %1 = call nnan float @llvm.maximum.f32(float %x, float 0xfff0000000000000)
+ ret float %1
+}
+
+; Test SNaN quieting
+define float @test_fmaximum_snan(float %x) {
+; SSE2-LABEL: test_fmaximum_snan:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fmaximum_snan:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximum_snan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximum_snan:
+; X86: # %bb.0:
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: retl
+ %1 = tail call float @llvm.maximum.f32(float 0x7ff4000000000000, float %x)
+ ret float %1
+}
+
+define float @test_fminimum_snan(float %x) {
+; SSE2-LABEL: test_fminimum_snan:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fminimum_snan:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fminimum_snan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fminimum_snan:
+; X86: # %bb.0:
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: retl
+ %1 = tail call float @llvm.minimum.f32(float 0x7ff4000000000000, float %x)
+ ret float %1
+}
diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
index c66473e9edd19..25e6091a16e8f 100644
--- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
+++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
@@ -2479,3 +2479,90 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
%r = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> %x, <4 x bfloat> %y)
ret <4 x bfloat> %r
}
+
+; nnan minimumnum(X, +inf) -> X
+define float @test_fminimumnum_inf_nnan(float %x) nounwind {
+; SSE2-LABEL: test_fminimumnum_inf_nnan:
+; SSE2: # %bb.0:
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fminimumnum_inf_nnan:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fminimumnum_inf_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fminimumnum_inf_nnan:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+ %1 = call nnan float @llvm.minimumnum.f32(float %x, float 0x7ff0000000000000)
+ ret float %1
+}
+
+; nnan maximumnum(X, -inf) -> X
+define float @test_fmaximumnum_neg_inf_nnan(float %x) nounwind {
+; SSE2-LABEL: test_fmaximumnum_neg_inf_nnan:
+; SSE2: # %bb.0:
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fmaximumnum_neg_inf_nnan:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximumnum_neg_inf_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximumnum_neg_inf_nnan:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+ %1 = call nnan float @llvm.maximumnum.f32(float %x, float 0xfff0000000000000)
+ ret float %1
+}
+
+; Test we propagate the non-NaN arg, even if one arg is SNaN
+define float @test_fmaximumnum_snan(float %x) {
+; SSE2-LABEL: test_fmaximumnum_snan:
+; SSE2: # %bb.0:
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fmaximumnum_snan:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fmaximumnum_snan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fmaximumnum_snan:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+ %1 = tail call float @llvm.maximumnum.f32(float 0x7ff4000000000000, float %x)
+ ret float %1
+}
+
+define float @test_fminimumnum_snan(float %x) {
+; SSE2-LABEL: test_fminimumnum_snan:
+; SSE2: # %bb.0:
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fminimumnum_snan:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+;
+; AVX10_2-LABEL: test_fminimumnum_snan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: retq
+;
+; X86-LABEL: test_fminimumnum_snan:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+ %1 = tail call float @llvm.minimumnum.f32(float 0x7ff4000000000000, float %x)
+ ret float %1
+}
diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll
index 0ef8fdec33d93..b97fbfde2dbe3 100644
--- a/llvm/test/CodeGen/X86/fminnum.ll
+++ b/llvm/test/CodeGen/X86/fminnum.ll
@@ -653,5 +653,29 @@ define float @test_minnum_const_nan(float %x) {
ret float %r
}
+; nnan minnum(X, +inf) -> X
+define float @test_minnum_inf_nnan(float %x) nounwind {
+; CHECK-LABEL: test_minnum_inf_nnan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+ %r = call nnan float @llvm.minnum.f32(float %x, float 0x7ff0000000000000)
+ ret float %r
+}
+
+; Test SNaN quieting
+define float @test_minnum_snan(float %x) {
+; SSE-LABEL: test_minnum_snan:
+; SSE: # %bb.0:
+; SSE-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_minnum_snan:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: retq
+ %r = call float @llvm.minnum.f32(float 0x7ff4000000000000, float %x)
+ ret float %r
+}
+
attributes #0 = { "no-nans-fp-math"="true" }
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM overal, with a test nit.
Previously, when optimizing fmax(x, nan) -> x, the functions would return immediately, since the input parameter x to the function is already in the necessary register for output. To make the transformations more explicit, instead add a 2nd argument to the outer function, so that a MOV instruction is needed to take arg1 -> reg0. This makes the transformation easier to read than just making the entire function a NOP.
Add several improvements to DAGCombine patterns for fmin/fmax: * Fix incorrect results due to minimumnum not being marked as IsMin - e.g. nnan minimumnum(x, +inf) returned +inf instead of x * Fix incorrect results checking maximumnum for vecreduce patterns * Make maxnum/minnum return QNaN if one input is SNaN instead of X * Quiet SNaN inputs when propagating them e.g. maximum(x, SNaN) = QNaN * Update comments to mark when SNaN propagation is being ignored
Add several improvements to DAGCombine patterns for fmin/fmax: * Fix incorrect results due to minimumnum not being marked as IsMin - e.g. nnan minimumnum(x, +inf) returned +inf instead of x * Fix incorrect results checking maximumnum for vecreduce patterns * Make maxnum/minnum return QNaN if one input is SNaN instead of X * Quiet SNaN inputs when propagating them e.g. maximum(x, SNaN) = QNaN * Update comments to mark when SNaN propagation is being ignored
Add several improvements to DAGCombine patterns for fmin/fmax: