Skip to content
Open
27 changes: 17 additions & 10 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6567,29 +6567,31 @@ static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
}

// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
ISD::CondCode CC, unsigned OrAndOpcode,
SelectionDAG &DAG,
bool isFMAXNUMFMINNUM_IEEE,
bool isFMAXNUMFMINNUM) {
static unsigned
getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
SDNodeFlags LHSSetCCFlags, SDNodeFlags RHSSetCCFlags,
ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG,
bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM) {
// The optimization cannot be applied for all the predicates because
// of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
// NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
// applied at all if one of the operands is a signaling NaN.

bool SetCCNoNaNs = LHSSetCCFlags.hasNoNaNs() && RHSSetCCFlags.hasNoNaNs();

// It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
// are non NaN values.
if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
return (SetCCNoNaNs || arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
isFMAXNUMFMINNUM_IEEE
? ISD::FMINNUM_IEEE
: ISD::DELETED_NODE;
}

if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
return (SetCCNoNaNs || arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
isFMAXNUMFMINNUM_IEEE
? ISD::FMAXNUM_IEEE
: ISD::DELETED_NODE;
Expand Down Expand Up @@ -6638,6 +6640,8 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
!LHS->hasOneUse() || !RHS->hasOneUse())
return SDValue();

SDNodeFlags LHSSetCCFlags = LHS->getFlags();
SDNodeFlags RHSSetCCFlags = RHS->getFlags();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
LogicOp, LHS.getNode(), RHS.getNode());
Expand Down Expand Up @@ -6729,11 +6733,14 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
else
NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
} else if (OpVT.isFloatingPoint())
NewOpcode =
getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
NewOpcode = getMinMaxOpcodeForFP(
Operand1, Operand2, LHSSetCCFlags, RHSSetCCFlags, CC,
LogicOp->getOpcode(), DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);

if (NewOpcode != ISD::DELETED_NODE) {
// Propagate fast-math flags from setcc.
SelectionDAG::FlagInserter FlagInserter(DAG, LHS->getFlags() &
RHS->getFlags());
SDValue MinMaxValue =
DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
Expand Down
31 changes: 24 additions & 7 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2446,6 +2446,26 @@ static bool InBlock(const Value *V, const BasicBlock *BB) {
return true;
}

static bool AreFCmpOperandsNonNaN(const Instruction *Inst,
const SelectionDAG &DAG) {
assert(
(isa<FCmpInst>(Inst) || isa<ConstrainedFPCmpIntrinsic>(Inst) ||
(isa<VPIntrinsic>(Inst) &&
dyn_cast<VPIntrinsic>(Inst)->getIntrinsicID() == Intrinsic::vp_fcmp)) &&
"Not fcmp instruction or its intrinsic variants!");

if (const auto *FPOp = dyn_cast<FPMathOperator>(Inst))
if (FPOp->hasNoNaNs())
return true;

for (int I = 0; I != 2; ++I)
if (!isKnownNeverNaN(Inst->getOperand(I),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a bit weird to use a recursive ValueTracing query in the backend. I'd expect it to get canonicalized in InstCombine before the codegen. https://godbolt.org/z/r13WvTE3b

SimplifyQuery(DAG.getDataLayout(), Inst)))
return false;

return true;
}

/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
/// This function emits a branch and is used at the leaves of an OR or an
/// AND operator tree.
Expand Down Expand Up @@ -2479,7 +2499,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
FCmpInst::Predicate Pred =
InvertCond ? FC->getInversePredicate() : FC->getPredicate();
Condition = getFCmpCondCode(Pred);
if (TM.Options.NoNaNsFPMath)
if (AreFCmpOperandsNonNaN(FC, DAG))
Condition = getFCmpCodeWithoutNaN(Condition);
}

Expand Down Expand Up @@ -3754,7 +3774,7 @@ void SelectionDAGBuilder::visitFCmp(const FCmpInst &I) {

ISD::CondCode Condition = getFCmpCondCode(predicate);
auto *FPMO = cast<FPMathOperator>(&I);
if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
if (AreFCmpOperandsNonNaN(&I, DAG))
Condition = getFCmpCodeWithoutNaN(Condition);

SDNodeFlags Flags;
Expand Down Expand Up @@ -8496,7 +8516,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case ISD::STRICT_FSETCCS: {
auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
if (TM.Options.NoNaNsFPMath)
if (AreFCmpOperandsNonNaN(FPCmp, DAG))
Condition = getFCmpCodeWithoutNaN(Condition);
Opers.push_back(DAG.getCondCode(Condition));
break;
Expand Down Expand Up @@ -8779,11 +8799,8 @@ void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
CmpInst::Predicate CondCode = VPIntrin.getPredicate();
bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy();
if (IsFP) {
// FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
// flags, but calls that don't return floating-point types can't be
// FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
Condition = getFCmpCondCode(CondCode);
if (TM.Options.NoNaNsFPMath)
if (AreFCmpOperandsNonNaN(&VPIntrin, DAG))
Condition = getFCmpCodeWithoutNaN(Condition);
} else {
Condition = getICmpCondCode(CondCode);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14411,6 +14411,7 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
}

bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
SDNodeFlags UserFlags,
unsigned MaxDepth) const {
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::FCANONICALIZE)
Expand Down Expand Up @@ -14610,7 +14611,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,

// FIXME: denormalsEnabledForType is broken for dynamic
return denormalsEnabledForType(DAG, Op.getValueType()) &&
DAG.isKnownNeverSNaN(Op);
(UserFlags.hasNoNaNs() || DAG.isKnownNeverSNaN(Op));
}

bool SITargetLowering::isCanonicalized(Register Reg, const MachineFunction &MF,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
Register N1) const override;

bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
unsigned MaxDepth = 5) const;
SDNodeFlags UserFlags = {}, unsigned MaxDepth = 5) const;
bool isCanonicalized(Register Reg, const MachineFunction &MF,
unsigned MaxDepth = 5) const;
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -992,11 +992,13 @@ def MFMALdScaleXForm : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(New, SDLoc(N), MVT::i32);
}]>;

def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{
def fcanonicalize_canonicalized
: PatFrag<(ops node:$op), (fcanonicalize node:$op), [{
const SITargetLowering &Lowering =
*static_cast<const SITargetLowering *>(getTargetLowering());
return Lowering.isCanonicalized(*CurDAG, Op);
return Lowering.isCanonicalized(*CurDAG, Op->getOperand(0), N->getFlags());
}]> {
// FIXME: This predicate for GlobalISel is dead code.
let GISelPredicateCode = [{
const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
MF.getSubtarget().getTargetLowering());
Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -3485,10 +3485,7 @@ def : GCNPat<
// If fcanonicalize's operand is implicitly canonicalized, we only need a copy.
let AddedComplexity = 8 in {
foreach vt = [f16, v2f16, f32, v2f32, f64] in {
def : GCNPat<
(fcanonicalize (vt is_canonicalized:$src)),
(COPY vt:$src)
>;
def : GCNPat<(fcanonicalize_canonicalized vt:$src), (COPY vt:$src)>;
}
}

Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48011,6 +48011,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}

if (Opcode) {
// Propagate fast-math-flags.
SelectionDAG::FlagInserter FlagsInserter(DAG, N->getFlags());
if (IsStrict) {
SDValue Ret = DAG.getNode(Opcode == X86ISD::FMIN ? X86ISD::STRICT_FMIN
: X86ISD::STRICT_FMAX,
Expand Down Expand Up @@ -55532,8 +55534,9 @@ static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);

// FMIN/FMAX are commutative if no NaNs and no negative zeros are allowed.
if (!DAG.getTarget().Options.NoNaNsFPMath ||
!DAG.getTarget().Options.NoSignedZerosFPMath)
if ((!DAG.getTarget().Options.NoNaNsFPMath && !N->getFlags().hasNoNaNs()) ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why NoNaNsFPMath is still used here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR should modify as few test cases as possible.
Will handle them in backend parts.

(!DAG.getTarget().Options.NoSignedZerosFPMath &&
!N->getFlags().hasNoSignedZeros()))
return SDValue();

// If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/AArch64/arm64-constrained-fcmp-no-nans-opt.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 -enable-no-nans-fp-math | FileCheck %s
; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s

declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
Expand All @@ -7,7 +7,7 @@ declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, met
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
define i1 @f32_constrained_fcmp_ueq(float %a, float %b) nounwind ssp strictfp {
define i1 @f32_constrained_fcmp_ueq(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -16,7 +16,7 @@ define i1 @f32_constrained_fcmp_ueq(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
define i1 @f32_constrained_fcmp_une(float %a, float %b) nounwind ssp strictfp {
define i1 @f32_constrained_fcmp_une(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -25,7 +25,7 @@ define i1 @f32_constrained_fcmp_une(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
define i1 @f32_constrained_fcmp_ugt(float %a, float %b) nounwind ssp strictfp {
define i1 @f32_constrained_fcmp_ugt(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -34,7 +34,7 @@ define i1 @f32_constrained_fcmp_ugt(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, ge
; CHECK-NEXT: ret
define i1 @f32_constrained_fcmp_uge(float %a, float %b) nounwind ssp strictfp {
define i1 @f32_constrained_fcmp_uge(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -43,7 +43,7 @@ define i1 @f32_constrained_fcmp_uge(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, lt
; CHECK-NEXT: ret
define i1 @f32_constrained_fcmp_ult(float %a, float %b) nounwind ssp strictfp {
define i1 @f32_constrained_fcmp_ult(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -52,7 +52,7 @@ define i1 @f32_constrained_fcmp_ult(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp s0, s1
; CHECK-NEXT: cset w0, le
; CHECK-NEXT: ret
define i1 @f32_constrained_fcmp_ule(float %a, float %b) nounwind ssp strictfp {
define i1 @f32_constrained_fcmp_ule(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -61,7 +61,7 @@ define i1 @f32_constrained_fcmp_ule(float %a, float %b) nounwind ssp strictfp {
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
define i1 @f64_constrained_fcmp_ueq(double %a, double %b) nounwind ssp strictfp {
define i1 @f64_constrained_fcmp_ueq(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -70,7 +70,7 @@ define i1 @f64_constrained_fcmp_ueq(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
define i1 @f64_constrained_fcmp_une(double %a, double %b) nounwind ssp strictfp {
define i1 @f64_constrained_fcmp_une(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -79,7 +79,7 @@ define i1 @f64_constrained_fcmp_une(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
define i1 @f64_constrained_fcmp_ugt(double %a, double %b) nounwind ssp strictfp {
define i1 @f64_constrained_fcmp_ugt(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -88,7 +88,7 @@ define i1 @f64_constrained_fcmp_ugt(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, ge
; CHECK-NEXT: ret
define i1 @f64_constrained_fcmp_uge(double %a, double %b) nounwind ssp strictfp {
define i1 @f64_constrained_fcmp_uge(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -97,7 +97,7 @@ define i1 @f64_constrained_fcmp_uge(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, lt
; CHECK-NEXT: ret
define i1 @f64_constrained_fcmp_ult(double %a, double %b) nounwind ssp strictfp {
define i1 @f64_constrained_fcmp_ult(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict")
ret i1 %cmp
}
Expand All @@ -106,7 +106,7 @@ define i1 @f64_constrained_fcmp_ult(double %a, double %b) nounwind ssp strictfp
; CHECK: fcmp d0, d1
; CHECK-NEXT: cset w0, le
; CHECK-NEXT: ret
define i1 @f64_constrained_fcmp_ule(double %a, double %b) nounwind ssp strictfp {
define i1 @f64_constrained_fcmp_ule(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict")
ret i1 %cmp
}
Loading