Skip to content

Conversation

@paperchalice
Copy link
Contributor

Add fast-math support for fcmp related intrinsics, because these intrinsics in CodeGen phase need nnan.
Currently the constraint is that the callee must be Function.

@paperchalice paperchalice marked this pull request as ready for review October 14, 2025 01:45
@llvmbot
Copy link
Member

llvmbot commented Oct 14, 2025

@llvm/pr-subscribers-llvm-ir

Author: None (paperchalice)

Changes

Add fast-math support for fcmp related intrinsics, because these intrinsics in CodeGen phase need nnan.
Currently the constraint is that the callee must be Function.


Full diff: https://github.com/llvm/llvm-project/pull/163195.diff

7 Files Affected:

  • (modified) llvm/docs/LangRef.rst (+5-3)
  • (modified) llvm/include/llvm/AsmParser/LLParser.h (+5)
  • (modified) llvm/include/llvm/IR/Operator.h (+5-1)
  • (modified) llvm/lib/AsmParser/LLParser.cpp (+23-4)
  • (modified) llvm/lib/IR/Operator.cpp (+14)
  • (modified) llvm/test/Assembler/fast-math-flags.ll (+2)
  • (modified) llvm/test/Bitcode/compatibility.ll (+6-2)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8b6c25c58d61e..dfd52101d5a22 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -4065,9 +4065,11 @@ Fast-Math Flags
 LLVM IR floating-point operations (:ref:`fneg <i_fneg>`, :ref:`fadd <i_fadd>`,
 :ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
 :ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`, :ref:`fptrunc <i_fptrunc>`,
-:ref:`fpext <i_fpext>`), and :ref:`phi <i_phi>`, :ref:`select <i_select>`, or
-:ref:`call <i_call>` instructions that return floating-point types may use the
-following flags to enable otherwise unsafe floating-point transformations.
+:ref:`fpext <i_fpext>`), and :ref:`phi <i_phi>`, :ref:`select <i_select>`,
+:ref:`call <i_call>` instructions that return floating-point types, or
+:ref:`call <i_call>` instructions that call fcmp related intrinsics
+may use the following flags to enable otherwise unsafe floating-point
+transformations.
 
 ``fast``
    This flag is a shorthand for specifying all fast-math flags at once, and
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index c01de4a289a69..ace034e574e7f 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -39,6 +39,7 @@ namespace llvm {
   class Value;
   class BasicBlock;
   class Instruction;
+  class CallInst;
   class Constant;
   class GlobalValue;
   class Comdat;
@@ -177,6 +178,10 @@ namespace llvm {
     // Map of module ID to path.
     std::map<unsigned, StringRef> ModuleIdMap;
 
+    // FIXME: Only after validateEndOfModule, these intrinsic
+    // calls are FPMathOperator.
+    std::map<CallInst *, FastMathFlags> FCmpIntrinsicFMFs;
+
     /// Only the llvm-as tool may set this to false to bypass
     /// UpgradeDebuginfo so it can generate broken bitcode.
     bool UpgradeDebugInfo;
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 10816c0e62c29..7594938554e85 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -346,6 +346,9 @@ class FPMathOperator : public Operator {
            isComposedOfHomogeneousFloatingPointTypes(Ty);
   }
 
+  /// Returns true if the intrinsic is fcmp.
+  static bool isFCmpIntrinsic(const Instruction &Inst);
+
   static bool classof(const Value *V) {
     unsigned Opcode;
     if (auto *I = dyn_cast<Instruction>(V))
@@ -372,7 +375,8 @@ class FPMathOperator : public Operator {
     case Instruction::PHI:
     case Instruction::Select:
     case Instruction::Call: {
-      return isSupportedFloatingPointType(V->getType());
+      return isSupportedFloatingPointType(V->getType()) ||
+             isFCmpIntrinsic(*dyn_cast<Instruction>(V));
     }
     default:
       return false;
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 380b19296a3c4..de560f35783f2 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -403,6 +403,11 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
                  "use of undefined value '@" +
                      Twine(ForwardRefValIDs.begin()->first) + "'");
 
+  // Fix fast-math flags for fcmp intrinsic calls.
+  for (auto [CI, FMF] : FCmpIntrinsicFMFs)
+    CI->setFastMathFlags(FMF);
+  FCmpIntrinsicFMFs.clear();
+
   if (AllowIncompleteIR && !ForwardRefMDNodes.empty())
     dropUnknownMetadataReferences();
 
@@ -8446,12 +8451,26 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS,
   CI->setTailCallKind(TCK);
   CI->setCallingConv(CC);
   if (FMF.any()) {
+    bool IsFCmpIntrinsic = false;
     if (!isa<FPMathOperator>(CI)) {
-      CI->deleteValue();
-      return error(CallLoc, "fast-math-flags specified for call without "
-                            "floating-point scalar or vector return type");
+      auto FCmpName = StringRef(CalleeID.StrVal);
+      if (FCmpName.starts_with("llvm.") &&
+          CalleeID.Kind == ValID::t_GlobalName) {
+        unsigned IID = Intrinsic::lookupIntrinsicID(FCmpName);
+        IsFCmpIntrinsic = IID == Intrinsic::vp_fcmp ||
+                          IID == Intrinsic::experimental_constrained_fcmp ||
+                          IID == Intrinsic::experimental_constrained_fcmps;
+      }
+      if (!IsFCmpIntrinsic) {
+        CI->deleteValue();
+        return error(CallLoc, "fast-math-flags specified for call without "
+                              "floating-point scalar or vector return type "
+                              "or callee is not fcmp intrinsic");
+      }
+      FCmpIntrinsicFMFs[CI] = FMF;
     }
-    CI->setFastMathFlags(FMF);
+    if (!IsFCmpIntrinsic)
+      CI->setFastMathFlags(FMF);
   }
 
   if (CalleeID.Kind == ValID::t_GlobalName &&
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index 39e5463cb6fc3..dcd21ad4d5f08 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -67,6 +67,20 @@ bool Operator::hasPoisonGeneratingAnnotations() const {
                I->hasPoisonGeneratingMetadata());
 }
 
+bool FPMathOperator::isFCmpIntrinsic(const Instruction &Inst) {
+  const auto *I = dyn_cast<CallInst>(&Inst);
+  if (!I)
+    return false;
+
+  unsigned IID = Intrinsic::not_intrinsic;
+  if (Function *F = I->getCalledFunction())
+    IID = F->getIntrinsicID();
+
+  return IID == Intrinsic::vp_fcmp ||
+         IID == Intrinsic::experimental_constrained_fcmp ||
+         IID == Intrinsic::experimental_constrained_fcmps;
+}
+
 Type *GEPOperator::getSourceElementType() const {
   if (auto *I = dyn_cast<GetElementPtrInst>(this))
     return I->getSourceElementType();
diff --git a/llvm/test/Assembler/fast-math-flags.ll b/llvm/test/Assembler/fast-math-flags.ll
index 9c08e9da1d19e..a8bf299e64f75 100644
--- a/llvm/test/Assembler/fast-math-flags.ll
+++ b/llvm/test/Assembler/fast-math-flags.ll
@@ -108,6 +108,8 @@ entry:
   %h_vec = fptrunc nnan <3 x float> %vec to <3 x half>
 ; CHECK: %h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
   %h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
+; CHECK: %i_fcmp_result = call nnan <3 x i1> @llvm.vp.fcmp.v3f32(<3 x float> %vec, <3 x float> %vec, metadata !"oeq", <3 x i1> <i1 false, i1 true, i1 false>, i32 3)
+  %i_fcmp_result = call nnan <3 x i1> @llvm.vp.fcmp.v3f32(<3 x float> %vec, <3 x float> %vec, metadata !"oeq", <3 x i1> <i1 false, i1 true, i1 false>, i32 3)
 ; CHECK:  ret float %f
   ret float %f
 }
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index e21786e5ee330..0ad9e4f8eff5c 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -1157,6 +1157,9 @@ define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
   %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf_v4f64()
   ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf_v4f64()
 
+  %call.fcmp.intrinsic = call nnan i1 @llvm.experimental.constrained.fcmp.f32(float %f, float %f, metadata !"oeq", metadata !"fpexcept.ignore")
+  ; CHECK: %call.fcmp.intrinsic = call nnan i1 @llvm.experimental.constrained.fcmp.f32(float %f, float %f, metadata !"oeq", metadata !"fpexcept.ignore")
+
   ret void
 }
 
@@ -1718,7 +1721,7 @@ exit:
   ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
 
   call void @f.nobuiltin() builtin
-  ; CHECK: call void @f.nobuiltin() #55
+  ; CHECK: call void @f.nobuiltin() #56
 
   call fastcc noalias ptr @f.noalias() noinline
   ; CHECK: call fastcc noalias ptr @f.noalias() #12
@@ -2288,7 +2291,8 @@ define float @nofpclass_callsites(float %arg, { float } %arg1) {
 ; CHECK: attributes #52 = { sanitize_realtime }
 ; CHECK: attributes #53 = { sanitize_realtime_blocking }
 ; CHECK: attributes #54 = { sanitize_alloc_token }
-; CHECK: attributes #55 = { builtin }
+; CHECK: attributes #55 = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) }
+; CHECK: attributes #56 = { builtin }
 
 ;; Metadata
 

@paperchalice
Copy link
Contributor Author

Ping?

@arsenm
Copy link
Contributor

arsenm commented Nov 4, 2025

We probably should be going the other way and removing fast math flags from the fcmp instruction

@paperchalice
Copy link
Contributor Author

Will try to investigate it🤔

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants