llvm · paperchalice · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6567,29 +6567,31 @@ static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
 }
 
 // FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
-static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
-                                     ISD::CondCode CC, unsigned OrAndOpcode,
-                                     SelectionDAG &DAG,
-                                     bool isFMAXNUMFMINNUM_IEEE,
-                                     bool isFMAXNUMFMINNUM) {
+static unsigned
+getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
+                     SDNodeFlags LHSSetCCFlags, SDNodeFlags RHSSetCCFlags,
+                     ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG,
+                     bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM) {
   // The optimization cannot be applied for all the predicates because
   // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
   // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
   // applied at all if one of the operands is a signaling NaN.
 
+  bool SetCCNoNaNs = LHSSetCCFlags.hasNoNaNs() && RHSSetCCFlags.hasNoNaNs();
+
   // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
   // are non NaN values.
   if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
       ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
-    return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+    return (SetCCNoNaNs || arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
                    isFMAXNUMFMINNUM_IEEE
                ? ISD::FMINNUM_IEEE
                : ISD::DELETED_NODE;
   }
 
   if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
       ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
-    return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+    return (SetCCNoNaNs || arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
                    isFMAXNUMFMINNUM_IEEE
                ? ISD::FMAXNUM_IEEE
                : ISD::DELETED_NODE;
@@ -6638,6 +6640,8 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
       !LHS->hasOneUse() || !RHS->hasOneUse())
     return SDValue();
 
+  SDNodeFlags LHSSetCCFlags = LHS->getFlags();
+  SDNodeFlags RHSSetCCFlags = RHS->getFlags();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
       LogicOp, LHS.getNode(), RHS.getNode());
@@ -6729,11 +6733,14 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
         else
           NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
       } else if (OpVT.isFloatingPoint())
-        NewOpcode =
-            getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
-                                 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
+        NewOpcode = getMinMaxOpcodeForFP(
+            Operand1, Operand2, LHSSetCCFlags, RHSSetCCFlags, CC,
+            LogicOp->getOpcode(), DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
 
       if (NewOpcode != ISD::DELETED_NODE) {
+        // Propagate fast-math flags from setcc.
+        SelectionDAG::FlagInserter FlagInserter(DAG, LHS->getFlags() &
+                                                         RHS->getFlags());
         SDValue MinMaxValue =
             DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
         return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2446,6 +2446,26 @@ static bool InBlock(const Value *V, const BasicBlock *BB) {
   return true;
 }
 
+static bool AreFCmpOperandsNonNaN(const Instruction *Inst,
+                                  const SelectionDAG &DAG) {
+  assert(
+      (isa<FCmpInst>(Inst) || isa<ConstrainedFPCmpIntrinsic>(Inst) ||
+       (isa<VPIntrinsic>(Inst) &&
+        dyn_cast<VPIntrinsic>(Inst)->getIntrinsicID() == Intrinsic::vp_fcmp)) &&
+      "Not fcmp instruction or its intrinsic variants!");
+
+  if (const auto *FPOp = dyn_cast<FPMathOperator>(Inst))
+    if (FPOp->hasNoNaNs())
+      return true;
+
+  for (int I = 0; I != 2; ++I)
+    if (!isKnownNeverNaN(Inst->getOperand(I),
+                         SimplifyQuery(DAG.getDataLayout(), Inst)))
+      return false;
+
+  return true;
+}
+
 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
 /// This function emits a branch and is used at the leaves of an OR or an
 /// AND operator tree.
@@ -2479,7 +2499,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
         FCmpInst::Predicate Pred =
             InvertCond ? FC->getInversePredicate() : FC->getPredicate();
         Condition = getFCmpCondCode(Pred);
-        if (TM.Options.NoNaNsFPMath)
+        if (AreFCmpOperandsNonNaN(FC, DAG))
           Condition = getFCmpCodeWithoutNaN(Condition);
       }
 
@@ -3754,7 +3774,7 @@ void SelectionDAGBuilder::visitFCmp(const FCmpInst &I) {
 
   ISD::CondCode Condition = getFCmpCondCode(predicate);
   auto *FPMO = cast<FPMathOperator>(&I);
-  if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
+  if (AreFCmpOperandsNonNaN(&I, DAG))
     Condition = getFCmpCodeWithoutNaN(Condition);
 
   SDNodeFlags Flags;
@@ -8496,7 +8516,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   case ISD::STRICT_FSETCCS: {
     auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
     ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
-    if (TM.Options.NoNaNsFPMath)
+    if (AreFCmpOperandsNonNaN(FPCmp, DAG))
       Condition = getFCmpCodeWithoutNaN(Condition);
     Opers.push_back(DAG.getCondCode(Condition));
     break;
@@ -8779,11 +8799,8 @@ void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
   CmpInst::Predicate CondCode = VPIntrin.getPredicate();
   bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy();
   if (IsFP) {
-    // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
-    // flags, but calls that don't return floating-point types can't be
-    // FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
     Condition = getFCmpCondCode(CondCode);
-    if (TM.Options.NoNaNsFPMath)
+    if (AreFCmpOperandsNonNaN(&VPIntrin, DAG))
       Condition = getFCmpCodeWithoutNaN(Condition);
   } else {
     Condition = getICmpCondCode(CondCode);

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -14411,6 +14411,7 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
 }
 
 bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
+                                       SDNodeFlags UserFlags,
                                        unsigned MaxDepth) const {
   unsigned Opcode = Op.getOpcode();
   if (Opcode == ISD::FCANONICALIZE)
@@ -14610,7 +14611,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
 
   // FIXME: denormalsEnabledForType is broken for dynamic
   return denormalsEnabledForType(DAG, Op.getValueType()) &&
-         DAG.isKnownNeverSNaN(Op);
+         (UserFlags.hasNoNaNs() || DAG.isKnownNeverSNaN(Op));
 }
 
 bool SITargetLowering::isCanonicalized(Register Reg, const MachineFunction &MF,

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -555,7 +555,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
                            Register N1) const override;
 
   bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
-                       unsigned MaxDepth = 5) const;
+                       SDNodeFlags UserFlags = {}, unsigned MaxDepth = 5) const;
   bool isCanonicalized(Register Reg, const MachineFunction &MF,
                        unsigned MaxDepth = 5) const;
   bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -992,11 +992,13 @@ def MFMALdScaleXForm : SDNodeXForm<timm, [{
   return CurDAG->getTargetConstant(New, SDLoc(N), MVT::i32);
 }]>;
 
-def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{
+def fcanonicalize_canonicalized
+    : PatFrag<(ops node:$op), (fcanonicalize node:$op), [{
   const SITargetLowering &Lowering =
       *static_cast<const SITargetLowering *>(getTargetLowering());
-  return Lowering.isCanonicalized(*CurDAG, Op);
+  return Lowering.isCanonicalized(*CurDAG, Op->getOperand(0), N->getFlags());
 }]> {
+  // FIXME: This predicate for GlobalISel is dead code.
   let GISelPredicateCode = [{
     const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
         MF.getSubtarget().getTargetLowering());

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3485,10 +3485,7 @@ def : GCNPat<
 // If fcanonicalize's operand is implicitly canonicalized, we only need a copy.
 let AddedComplexity = 8 in {
 foreach vt = [f16, v2f16, f32, v2f32, f64] in {
-  def : GCNPat<
-    (fcanonicalize (vt is_canonicalized:$src)),
-    (COPY vt:$src)
-  >;
+  def : GCNPat<(fcanonicalize_canonicalized vt:$src), (COPY vt:$src)>;
 }
 }
 

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48011,6 +48011,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     }
 
     if (Opcode) {
+      // Propagate fast-math-flags.
+      SelectionDAG::FlagInserter FlagsInserter(DAG, N->getFlags());
       if (IsStrict) {
         SDValue Ret = DAG.getNode(Opcode == X86ISD::FMIN ? X86ISD::STRICT_FMIN
                                                          : X86ISD::STRICT_FMAX,
@@ -55532,8 +55534,9 @@ static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
   assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
 
   // FMIN/FMAX are commutative if no NaNs and no negative zeros are allowed.
-  if (!DAG.getTarget().Options.NoNaNsFPMath ||
-      !DAG.getTarget().Options.NoSignedZerosFPMath)
+  if ((!DAG.getTarget().Options.NoNaNsFPMath && !N->getFlags().hasNoNaNs()) ||
+      (!DAG.getTarget().Options.NoSignedZerosFPMath &&
+       !N->getFlags().hasNoSignedZeros()))
     return SDValue();
 
   // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes

diff --git a/llvm/test/CodeGen/AArch64/arm64-constrained-fcmp-no-nans-opt.ll b/llvm/test/CodeGen/AArch64/arm64-constrained-fcmp-no-nans-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 -enable-no-nans-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s
 
 declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
 declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
@@ -7,7 +7,7 @@ declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, met
 ; CHECK: fcmp s0, s1
 ; CHECK-NEXT: cset w0, eq
 ; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_ueq(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_ueq(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -16,7 +16,7 @@ define i1 @f32_constrained_fcmp_ueq(float %a, float %b) nounwind ssp strictfp {
 ; CHECK: fcmp s0, s1
 ; CHECK-NEXT: cset w0, ne
 ; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_une(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_une(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -25,7 +25,7 @@ define i1 @f32_constrained_fcmp_une(float %a, float %b) nounwind ssp strictfp {
 ; CHECK: fcmp s0, s1
 ; CHECK-NEXT: cset w0, gt
 ; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_ugt(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_ugt(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -34,7 +34,7 @@ define i1 @f32_constrained_fcmp_ugt(float %a, float %b) nounwind ssp strictfp {
 ; CHECK: fcmp s0, s1
 ; CHECK-NEXT: cset w0, ge
 ; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_uge(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_uge(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -43,7 +43,7 @@ define i1 @f32_constrained_fcmp_uge(float %a, float %b) nounwind ssp strictfp {
 ; CHECK: fcmp s0, s1
 ; CHECK-NEXT: cset w0, lt
 ; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_ult(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_ult(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -52,7 +52,7 @@ define i1 @f32_constrained_fcmp_ult(float %a, float %b) nounwind ssp strictfp {
 ; CHECK: fcmp s0, s1
 ; CHECK-NEXT: cset w0, le
 ; CHECK-NEXT: ret
-define i1 @f32_constrained_fcmp_ule(float %a, float %b) nounwind ssp strictfp {
+define i1 @f32_constrained_fcmp_ule(float nofpclass(nan) %a, float nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -61,7 +61,7 @@ define i1 @f32_constrained_fcmp_ule(float %a, float %b) nounwind ssp strictfp {
 ; CHECK: fcmp d0, d1
 ; CHECK-NEXT: cset w0, eq
 ; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_ueq(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_ueq(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -70,7 +70,7 @@ define i1 @f64_constrained_fcmp_ueq(double %a, double %b) nounwind ssp strictfp
 ; CHECK: fcmp d0, d1
 ; CHECK-NEXT: cset w0, ne
 ; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_une(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_une(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -79,7 +79,7 @@ define i1 @f64_constrained_fcmp_une(double %a, double %b) nounwind ssp strictfp
 ; CHECK: fcmp d0, d1
 ; CHECK-NEXT: cset w0, gt
 ; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_ugt(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_ugt(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -88,7 +88,7 @@ define i1 @f64_constrained_fcmp_ugt(double %a, double %b) nounwind ssp strictfp
 ; CHECK: fcmp d0, d1
 ; CHECK-NEXT: cset w0, ge
 ; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_uge(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_uge(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -97,7 +97,7 @@ define i1 @f64_constrained_fcmp_uge(double %a, double %b) nounwind ssp strictfp
 ; CHECK: fcmp d0, d1
 ; CHECK-NEXT: cset w0, lt
 ; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_ult(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_ult(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict")
   ret i1 %cmp
 }
@@ -106,7 +106,7 @@ define i1 @f64_constrained_fcmp_ult(double %a, double %b) nounwind ssp strictfp
 ; CHECK: fcmp d0, d1
 ; CHECK-NEXT: cset w0, le
 ; CHECK-NEXT: ret
-define i1 @f64_constrained_fcmp_ule(double %a, double %b) nounwind ssp strictfp {
+define i1 @f64_constrained_fcmp_ule(double nofpclass(nan) %a, double nofpclass(nan) %b) nounwind ssp strictfp {
   %cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict")
   ret i1 %cmp
 }