[IR] Add fast-math support for fcmp intrinsics

paperchalice · paperchalice · commit a0065c17980e · 2025-10-13T21:36:03.000+08:00
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
@@ -4065,9 +4065,11 @@ Fast-Math Flags
 LLVM IR floating-point operations (:ref:`fneg <i_fneg>`, :ref:`fadd <i_fadd>`,
 :ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
 :ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`, :ref:`fptrunc <i_fptrunc>`,
-:ref:`fpext <i_fpext>`), and :ref:`phi <i_phi>`, :ref:`select <i_select>`, or
-:ref:`call <i_call>` instructions that return floating-point types may use the
-following flags to enable otherwise unsafe floating-point transformations.
+:ref:`fpext <i_fpext>`), and :ref:`phi <i_phi>`, :ref:`select <i_select>`,
+:ref:`call <i_call>` instructions that return floating-point types, or
+:ref:`call <i_call>` instructions that call fcmp related intrinsics
+may use the following flags to enable otherwise unsafe floating-point
+transformations.
 
 ``fast``
    This flag is a shorthand for specifying all fast-math flags at once, and
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
@@ -39,6 +39,7 @@ namespace llvm {
   class Value;
   class BasicBlock;
   class Instruction;
+  class CallInst;
   class Constant;
   class GlobalValue;
   class Comdat;
@@ -177,6 +178,10 @@ namespace llvm {
     // Map of module ID to path.
     std::map<unsigned, StringRef> ModuleIdMap;
 
+    // FIXME: Only after validateEndOfModule, these intrinsic
+    // calls are FPMathOperator.
+    std::map<CallInst *, FastMathFlags> FCmpIntrinsicFMFs;
+
     /// Only the llvm-as tool may set this to false to bypass
     /// UpgradeDebuginfo so it can generate broken bitcode.
     bool UpgradeDebugInfo;
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
@@ -346,6 +346,9 @@ class FPMathOperator : public Operator {
            isComposedOfHomogeneousFloatingPointTypes(Ty);
   }
 
+  /// Returns true if the intrinsic is fcmp.
+  static bool isFCmpIntrinsic(const Instruction &Inst);
+
   static bool classof(const Value *V) {
     unsigned Opcode;
     if (auto *I = dyn_cast<Instruction>(V))
@@ -372,7 +375,8 @@ class FPMathOperator : public Operator {
     case Instruction::PHI:
     case Instruction::Select:
     case Instruction::Call: {
-      return isSupportedFloatingPointType(V->getType());
+      return isSupportedFloatingPointType(V->getType()) ||
+             isFCmpIntrinsic(*dyn_cast<Instruction>(V));
     }
     default:
       return false;
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
@@ -403,6 +403,9 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
                  "use of undefined value '@" +
                      Twine(ForwardRefValIDs.begin()->first) + "'");
 
+  for (auto [CI, FMF] : FCmpIntrinsicFMFs)
+    CI->setFastMathFlags(FMF);
+
   if (AllowIncompleteIR && !ForwardRefMDNodes.empty())
     dropUnknownMetadataReferences();
 
@@ -8446,12 +8449,26 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS,
   CI->setTailCallKind(TCK);
   CI->setCallingConv(CC);
   if (FMF.any()) {
+    bool IsFCmpIntrinsic = false;
     if (!isa<FPMathOperator>(CI)) {
-      CI->deleteValue();
-      return error(CallLoc, "fast-math-flags specified for call without "
-                            "floating-point scalar or vector return type");
+      auto FCmpName = StringRef(CalleeID.StrVal);
+      if (FCmpName.starts_with("llvm.") &&
+          CalleeID.Kind == ValID::t_GlobalName) {
+        unsigned IID = Intrinsic::lookupIntrinsicID(FCmpName);
+        IsFCmpIntrinsic = IID == Intrinsic::vp_fcmp ||
+                          IID == Intrinsic::experimental_constrained_fcmp ||
+                          IID == Intrinsic::experimental_constrained_fcmps;
+      }
+      if (!IsFCmpIntrinsic) {
+        CI->deleteValue();
+        return error(CallLoc, "fast-math-flags specified for call without "
+                              "floating-point scalar or vector return type "
+                              "or callee is not fcmp intrinsic");
+      }
+      FCmpIntrinsicFMFs[CI] = FMF;
     }
-    CI->setFastMathFlags(FMF);
+    if (!IsFCmpIntrinsic)
+      CI->setFastMathFlags(FMF);
   }
 
   if (CalleeID.Kind == ValID::t_GlobalName &&
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
@@ -67,6 +67,20 @@ bool Operator::hasPoisonGeneratingAnnotations() const {
                I->hasPoisonGeneratingMetadata());
 }
 
+bool FPMathOperator::isFCmpIntrinsic(const Instruction &Inst) {
+  const auto *I = dyn_cast<CallInst>(&Inst);
+  if (!I)
+    return false;
+
+  unsigned IID = Intrinsic::not_intrinsic;
+  if (Function *F = I->getCalledFunction())
+    IID = F->getIntrinsicID();
+
+  return IID == Intrinsic::vp_fcmp ||
+         IID == Intrinsic::experimental_constrained_fcmp ||
+         IID == Intrinsic::experimental_constrained_fcmps;
+}
+
 Type *GEPOperator::getSourceElementType() const {
   if (auto *I = dyn_cast<GetElementPtrInst>(this))
     return I->getSourceElementType();
diff --git a/llvm/test/Assembler/fast-math-flags.ll b/llvm/test/Assembler/fast-math-flags.ll
@@ -108,6 +108,8 @@ entry:
   %h_vec = fptrunc nnan <3 x float> %vec to <3 x half>
 ; CHECK: %h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
   %h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
+; CHECK: %i_fcmp_result = call nnan <3 x i1> @llvm.vp.fcmp.v3f32(<3 x float> %vec, <3 x float> %vec, metadata !"oeq", <3 x i1> <i1 false, i1 true, i1 false>, i32 3)
+  %i_fcmp_result = call nnan <3 x i1> @llvm.vp.fcmp.v3f32(<3 x float> %vec, <3 x float> %vec, metadata !"oeq", <3 x i1> <i1 false, i1 true, i1 false>, i32 3)
 ; CHECK:  ret float %f
   ret float %f
 }
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
@@ -1157,6 +1157,9 @@ define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
   %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf_v4f64()
   ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf_v4f64()
 
+  %call.fcmp.intrinsic = call nnan i1 @llvm.experimental.constrained.fcmp.f32(float %f, float %f, metadata !"oeq", metadata !"fpexcept.ignore")
+  ; CHECK: %call.fcmp.intrinsic = call nnan i1 @llvm.experimental.constrained.fcmp.f32(float %f, float %f, metadata !"oeq", metadata !"fpexcept.ignore")
+
   ret void
 }
 
@@ -1718,7 +1721,7 @@ exit:
   ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
 
   call void @f.nobuiltin() builtin
-  ; CHECK: call void @f.nobuiltin() #55
+  ; CHECK: call void @f.nobuiltin() #56
 
   call fastcc noalias ptr @f.noalias() noinline
   ; CHECK: call fastcc noalias ptr @f.noalias() #12
@@ -2288,7 +2291,8 @@ define float @nofpclass_callsites(float %arg, { float } %arg1) {
 ; CHECK: attributes #52 = { sanitize_realtime }
 ; CHECK: attributes #53 = { sanitize_realtime_blocking }
 ; CHECK: attributes #54 = { sanitize_alloc_token }
-; CHECK: attributes #55 = { builtin }
+; CHECK: attributes #55 = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) }
+; CHECK: attributes #56 = { builtin }
 
 ;; Metadata
 

Original file line number	Diff line number	Diff line change
`@@ -108,6 +108,8 @@ entry:`
`108`	`108`	`%h_vec = fptrunc nnan <3 x float> %vec to <3 x half>`
`109`	`109`	`; CHECK: %h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>`
`110`	`110`	`%h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>`
	`111`	`+; CHECK: %i_fcmp_result = call nnan <3 x i1> @llvm.vp.fcmp.v3f32(<3 x float> %vec, <3 x float> %vec, metadata !"oeq", <3 x i1> <i1 false, i1 true, i1 false>, i32 3)`
	`112`	`+ %i_fcmp_result = call nnan <3 x i1> @llvm.vp.fcmp.v3f32(<3 x float> %vec, <3 x float> %vec, metadata !"oeq", <3 x i1> <i1 false, i1 true, i1 false>, i32 3)`
`111`	`113`	`; CHECK: ret float %f`
`112`	`114`	`ret float %f`
`113`	`115`	`}`