-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[SelectionDAGBuilder] Remove NoNaNsFPMath uses #169904
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
🐧 Linux x64 Test Results
|
Replaced by checking fast-math flags or nofpclass.
| %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 | ||
| %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer | ||
| %vc = fcmp one <vscale x 8 x bfloat> %va, %splat | ||
| %vc = fcmp nnan one <vscale x 8 x bfloat> %va, %splat |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Value tracking currently can't handle scalable vector for insertelement, even if vscale > 0 is always true.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At least the splat case should be handled in computeKnownFPClass/computeKnownBits. I'll post a patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in #170325
| return true; | ||
|
|
||
| for (int I = 0; I != 2; ++I) | ||
| if (!isKnownNeverNaN(Inst->getOperand(I), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is a bit weird to use a recursive ValueTracing query in the backend. I'd expect it to get canonicalized in InstCombine before the codegen. https://godbolt.org/z/r13WvTE3b
| // FMIN/FMAX are commutative if no NaNs and no negative zeros are allowed. | ||
| if (!DAG.getTarget().Options.NoNaNsFPMath || | ||
| !DAG.getTarget().Options.NoSignedZerosFPMath) | ||
| if ((!DAG.getTarget().Options.NoNaNsFPMath && !N->getFlags().hasNoNaNs()) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain why NoNaNsFPMath is still used here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This PR should modify as few test cases as possible.
Will handle them in backend parts.
|
@llvm/pr-subscribers-llvm-selectiondag Author: None (paperchalice) ChangesReplaced by checking fast-math flags or value tracking results. Patch is 401.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169904.diff 31 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0f3a207cc6414..58757ffc88976 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6567,21 +6567,23 @@ static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
}
// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
-static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
- ISD::CondCode CC, unsigned OrAndOpcode,
- SelectionDAG &DAG,
- bool isFMAXNUMFMINNUM_IEEE,
- bool isFMAXNUMFMINNUM) {
+static unsigned
+getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
+ SDNodeFlags LHSSetCCFlags, SDNodeFlags RHSSetCCFlags,
+ ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG,
+ bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM) {
// The optimization cannot be applied for all the predicates because
// of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
// NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
// applied at all if one of the operands is a signaling NaN.
+ bool SetCCNoNaNs = LHSSetCCFlags.hasNoNaNs() && RHSSetCCFlags.hasNoNaNs();
+
// It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
// are non NaN values.
if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
- return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+ return (SetCCNoNaNs || arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
isFMAXNUMFMINNUM_IEEE
? ISD::FMINNUM_IEEE
: ISD::DELETED_NODE;
@@ -6589,7 +6591,7 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
- return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+ return (SetCCNoNaNs || arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
isFMAXNUMFMINNUM_IEEE
? ISD::FMAXNUM_IEEE
: ISD::DELETED_NODE;
@@ -6638,6 +6640,8 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
!LHS->hasOneUse() || !RHS->hasOneUse())
return SDValue();
+ SDNodeFlags LHSSetCCFlags = LHS->getFlags();
+ SDNodeFlags RHSSetCCFlags = RHS->getFlags();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
LogicOp, LHS.getNode(), RHS.getNode());
@@ -6729,11 +6733,14 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
else
NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
} else if (OpVT.isFloatingPoint())
- NewOpcode =
- getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
- DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
+ NewOpcode = getMinMaxOpcodeForFP(
+ Operand1, Operand2, LHSSetCCFlags, RHSSetCCFlags, CC,
+ LogicOp->getOpcode(), DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
if (NewOpcode != ISD::DELETED_NODE) {
+ // Propagate fast-math flags from setcc.
+ SelectionDAG::FlagInserter FlagInserter(DAG, LHS->getFlags() &
+ RHS->getFlags());
SDValue MinMaxValue =
DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 53d73ad618bd1..9f9f69be6d8ce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2446,6 +2446,26 @@ static bool InBlock(const Value *V, const BasicBlock *BB) {
return true;
}
+static bool AreFCmpOperandsNonNaN(const Instruction *Inst,
+ const SelectionDAG &DAG) {
+ assert(
+ (isa<FCmpInst>(Inst) || isa<ConstrainedFPCmpIntrinsic>(Inst) ||
+ (isa<VPIntrinsic>(Inst) &&
+ dyn_cast<VPIntrinsic>(Inst)->getIntrinsicID() == Intrinsic::vp_fcmp)) &&
+ "Not fcmp instruction or its intrinsic variants!");
+
+ if (const auto *FPOp = dyn_cast<FPMathOperator>(Inst))
+ if (FPOp->hasNoNaNs())
+ return true;
+
+ for (int I = 0; I != 2; ++I)
+ if (!isKnownNeverNaN(Inst->getOperand(I),
+ SimplifyQuery(DAG.getDataLayout(), Inst)))
+ return false;
+
+ return true;
+}
+
/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
/// This function emits a branch and is used at the leaves of an OR or an
/// AND operator tree.
@@ -2479,7 +2499,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
FCmpInst::Predicate Pred =
InvertCond ? FC->getInversePredicate() : FC->getPredicate();
Condition = getFCmpCondCode(Pred);
- if (TM.Options.NoNaNsFPMath)
+ if (AreFCmpOperandsNonNaN(FC, DAG))
Condition = getFCmpCodeWithoutNaN(Condition);
}
@@ -3754,7 +3774,7 @@ void SelectionDAGBuilder::visitFCmp(const FCmpInst &I) {
ISD::CondCode Condition = getFCmpCondCode(predicate);
auto *FPMO = cast<FPMathOperator>(&I);
- if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
+ if (AreFCmpOperandsNonNaN(&I, DAG))
Condition = getFCmpCodeWithoutNaN(Condition);
SDNodeFlags Flags;
@@ -8496,7 +8516,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case ISD::STRICT_FSETCCS: {
auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
- if (TM.Options.NoNaNsFPMath)
+ if (AreFCmpOperandsNonNaN(FPCmp, DAG))
Condition = getFCmpCodeWithoutNaN(Condition);
Opers.push_back(DAG.getCondCode(Condition));
break;
@@ -8779,11 +8799,8 @@ void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
CmpInst::Predicate CondCode = VPIntrin.getPredicate();
bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy();
if (IsFP) {
- // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
- // flags, but calls that don't return floating-point types can't be
- // FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
Condition = getFCmpCondCode(CondCode);
- if (TM.Options.NoNaNsFPMath)
+ if (AreFCmpOperandsNonNaN(&VPIntrin, DAG))
Condition = getFCmpCodeWithoutNaN(Condition);
} else {
Condition = getICmpCondCode(CondCode);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3aef0bd31debe..47ccdc5bf3e50 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -14411,6 +14411,7 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
}
bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
+ SDNodeFlags UserFlags,
unsigned MaxDepth) const {
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::FCANONICALIZE)
@@ -14610,7 +14611,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
// FIXME: denormalsEnabledForType is broken for dynamic
return denormalsEnabledForType(DAG, Op.getValueType()) &&
- DAG.isKnownNeverSNaN(Op);
+ (UserFlags.hasNoNaNs() || DAG.isKnownNeverSNaN(Op));
}
bool SITargetLowering::isCanonicalized(Register Reg, const MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 74e58f4272e10..13b4facc12b18 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -555,7 +555,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
Register N1) const override;
bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
- unsigned MaxDepth = 5) const;
+ SDNodeFlags UserFlags = {}, unsigned MaxDepth = 5) const;
bool isCanonicalized(Register Reg, const MachineFunction &MF,
unsigned MaxDepth = 5) const;
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 526250a04e001..800d9f2f12262 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -992,11 +992,13 @@ def MFMALdScaleXForm : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(New, SDLoc(N), MVT::i32);
}]>;
-def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{
+def fcanonicalize_canonicalized
+ : PatFrag<(ops node:$op), (fcanonicalize node:$op), [{
const SITargetLowering &Lowering =
*static_cast<const SITargetLowering *>(getTargetLowering());
- return Lowering.isCanonicalized(*CurDAG, Op);
+ return Lowering.isCanonicalized(*CurDAG, Op->getOperand(0), N->getFlags());
}]> {
+ // FIXME: This predicate for GlobalISel is dead code.
let GISelPredicateCode = [{
const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
MF.getSubtarget().getTargetLowering());
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index c5f5b7d53cfb1..83259ee0e47d8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3485,10 +3485,7 @@ def : GCNPat<
// If fcanonicalize's operand is implicitly canonicalized, we only need a copy.
let AddedComplexity = 8 in {
foreach vt = [f16, v2f16, f32, v2f32, f64] in {
- def : GCNPat<
- (fcanonicalize (vt is_canonicalized:$src)),
- (COPY vt:$src)
- >;
+ def : GCNPat<(fcanonicalize_canonicalized vt:$src), (COPY vt:$src)>;
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1b0bf6823e390..83bf275b4270d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48011,6 +48011,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
if (Opcode) {
+ // Propagate fast-math-flags.
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N->getFlags());
if (IsStrict) {
SDValue Ret = DAG.getNode(Opcode == X86ISD::FMIN ? X86ISD::STRICT_FMIN
: X86ISD::STRICT_FMAX,
@@ -55532,8 +55534,9 @@ static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
// FMIN/FMAX are commutative if no NaNs and no negative zeros are allowed.
- if (!DAG.getTarget().Options.NoNaNsFPMath ||
- !DAG.getTarget().Options.NoSignedZerosFPMath)
+ if ((!DAG.getTarget().Options.NoNaNsFPMath && !N->getFlags().hasNoNaNs()) ||
+ (!DAG.getTarget().Options.NoSignedZerosFPMath &&
+ !N->getFlags().hasNoSignedZeros()))
return SDValue();
// If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
diff --git a/llvm/test/CodeGen/AArch64/arm64-constrained-fcmp-no-nans-opt.ll b/llvm/test/CodeGen/AArch64/arm64-constrained-fcmp-no-nans-opt.ll
index 968acb2565b4e..2ddaf0ecf7619 100644
--- a/llvm/test/CodeGen/AArch64/arm64-constrained-fcmp-no-nans-opt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-constrained-fcmp-no-nans-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 -enable-no-nans-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s
declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
@@ -7,7 +7,7 @@ declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, met
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_ueq(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_ueq(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -16,7 +16,7 @@ define i1 @f32_constrained_fcmp_ueq(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_une(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_une(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -25,7 +25,7 @@ define i1 @f32_constrained_fcmp_une(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_ugt(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_ugt(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -34,7 +34,7 @@ define i1 @f32_constrained_fcmp_ugt(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, ge
; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_uge(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_uge(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -43,7 +43,7 @@ define i1 @f32_constrained_fcmp_uge(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, lt
; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_ult(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_ult(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -52,7 +52,7 @@ define i1 @f32_constrained_fcmp_ult(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, le
; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_ule(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_ule(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -61,7 +61,7 @@ define i1 @f32_constrained_fcmp_ule(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_ueq(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_ueq(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -70,7 +70,7 @@ define i1 @f64_constrained_fcmp_ueq(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_une(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_une(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -79,7 +79,7 @@ define i1 @f64_constrained_fcmp_une(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_ugt(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_ugt(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -88,7 +88,7 @@ define i1 @f64_constrained_fcmp_ugt(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, ge
; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_uge(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_uge(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -97,7 +97,7 @@ define i1 @f64_constrained_fcmp_uge(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, lt
; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_ult(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_ult(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict")
ret i1 %cmp
}
@@ -106,7 +106,7 @@ define i1 @f64_constrained_fcmp_ult(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, le
; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_ule(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_ule(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict")
ret i1 %cmp
}
diff --git a/llvm/test/CodeGen/AArch64/build-vector-dup-simd-nnan.ll b/llvm/test/CodeGen/AArch64/build-vector-dup-simd-nnan.ll
new file mode 100644
index 0000000000000..440fd2ba7f8f7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/build-vector-dup-simd-nnan.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK
+
+define <1 x float> @dup_v1i32_oeq(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_oeq:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmeq s0, s0, s1
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan oeq float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
+define <1 x float> @dup_v1i32_ogt(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_ogt:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt s0, s0, s1
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan ogt float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
+define <1 x float> @dup_v1i32_oge(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_oge:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmge s0, s0, s1
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan oge float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
+define <1 x float> @dup_v1i32_olt(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_olt:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt s0, s1, s0
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan olt float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
+define <1 x float> @dup_v1i32_ole(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_ole:
+; CHECK: ...
[truncated]
|
…nownFPClass (#170325) Address comment llvm/llvm-project#169904 (comment)
Replaced by checking fast-math flags or value tracking results.