diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8b6c25c58d61e..dfd52101d5a22 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4065,9 +4065,11 @@ Fast-Math Flags LLVM IR floating-point operations (:ref:`fneg `, :ref:`fadd `, :ref:`fsub `, :ref:`fmul `, :ref:`fdiv `, :ref:`frem `, :ref:`fcmp `, :ref:`fptrunc `, -:ref:`fpext `), and :ref:`phi `, :ref:`select `, or -:ref:`call ` instructions that return floating-point types may use the -following flags to enable otherwise unsafe floating-point transformations. +:ref:`fpext `), and :ref:`phi `, :ref:`select `, +:ref:`call ` instructions that return floating-point types, or +:ref:`call ` instructions that call fcmp related intrinsics +may use the following flags to enable otherwise unsafe floating-point +transformations. ``fast`` This flag is a shorthand for specifying all fast-math flags at once, and diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index c01de4a289a69..ace034e574e7f 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -39,6 +39,7 @@ namespace llvm { class Value; class BasicBlock; class Instruction; + class CallInst; class Constant; class GlobalValue; class Comdat; @@ -177,6 +178,10 @@ namespace llvm { // Map of module ID to path. std::map ModuleIdMap; + // FIXME: Only after validateEndOfModule, these intrinsic + // calls are FPMathOperator. + std::map FCmpIntrinsicFMFs; + /// Only the llvm-as tool may set this to false to bypass /// UpgradeDebuginfo so it can generate broken bitcode. bool UpgradeDebugInfo; diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index 10816c0e62c29..7594938554e85 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -346,6 +346,9 @@ class FPMathOperator : public Operator { isComposedOfHomogeneousFloatingPointTypes(Ty); } + /// Returns true if the intrinsic is fcmp. + static bool isFCmpIntrinsic(const Instruction &Inst); + static bool classof(const Value *V) { unsigned Opcode; if (auto *I = dyn_cast(V)) @@ -372,7 +375,8 @@ class FPMathOperator : public Operator { case Instruction::PHI: case Instruction::Select: case Instruction::Call: { - return isSupportedFloatingPointType(V->getType()); + return isSupportedFloatingPointType(V->getType()) || + isFCmpIntrinsic(*dyn_cast(V)); } default: return false; diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 380b19296a3c4..de560f35783f2 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -403,6 +403,11 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { "use of undefined value '@" + Twine(ForwardRefValIDs.begin()->first) + "'"); + // Fix fast-math flags for fcmp intrinsic calls. + for (auto [CI, FMF] : FCmpIntrinsicFMFs) + CI->setFastMathFlags(FMF); + FCmpIntrinsicFMFs.clear(); + if (AllowIncompleteIR && !ForwardRefMDNodes.empty()) dropUnknownMetadataReferences(); @@ -8446,12 +8451,26 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS, CI->setTailCallKind(TCK); CI->setCallingConv(CC); if (FMF.any()) { + bool IsFCmpIntrinsic = false; if (!isa(CI)) { - CI->deleteValue(); - return error(CallLoc, "fast-math-flags specified for call without " - "floating-point scalar or vector return type"); + auto FCmpName = StringRef(CalleeID.StrVal); + if (FCmpName.starts_with("llvm.") && + CalleeID.Kind == ValID::t_GlobalName) { + unsigned IID = Intrinsic::lookupIntrinsicID(FCmpName); + IsFCmpIntrinsic = IID == Intrinsic::vp_fcmp || + IID == Intrinsic::experimental_constrained_fcmp || + IID == Intrinsic::experimental_constrained_fcmps; + } + if (!IsFCmpIntrinsic) { + CI->deleteValue(); + return error(CallLoc, "fast-math-flags specified for call without " + "floating-point scalar or vector return type " + "or callee is not fcmp intrinsic"); + } + FCmpIntrinsicFMFs[CI] = FMF; } - CI->setFastMathFlags(FMF); + if (!IsFCmpIntrinsic) + CI->setFastMathFlags(FMF); } if (CalleeID.Kind == ValID::t_GlobalName && diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index 39e5463cb6fc3..dcd21ad4d5f08 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -67,6 +67,20 @@ bool Operator::hasPoisonGeneratingAnnotations() const { I->hasPoisonGeneratingMetadata()); } +bool FPMathOperator::isFCmpIntrinsic(const Instruction &Inst) { + const auto *I = dyn_cast(&Inst); + if (!I) + return false; + + unsigned IID = Intrinsic::not_intrinsic; + if (Function *F = I->getCalledFunction()) + IID = F->getIntrinsicID(); + + return IID == Intrinsic::vp_fcmp || + IID == Intrinsic::experimental_constrained_fcmp || + IID == Intrinsic::experimental_constrained_fcmps; +} + Type *GEPOperator::getSourceElementType() const { if (auto *I = dyn_cast(this)) return I->getSourceElementType(); diff --git a/llvm/test/Assembler/fast-math-flags.ll b/llvm/test/Assembler/fast-math-flags.ll index 9c08e9da1d19e..a8bf299e64f75 100644 --- a/llvm/test/Assembler/fast-math-flags.ll +++ b/llvm/test/Assembler/fast-math-flags.ll @@ -108,6 +108,8 @@ entry: %h_vec = fptrunc nnan <3 x float> %vec to <3 x half> ; CHECK: %h_scalable = fptrunc nnan %scalable to %h_scalable = fptrunc nnan %scalable to +; CHECK: %i_fcmp_result = call nnan <3 x i1> @llvm.vp.fcmp.v3f32(<3 x float> %vec, <3 x float> %vec, metadata !"oeq", <3 x i1> , i32 3) + %i_fcmp_result = call nnan <3 x i1> @llvm.vp.fcmp.v3f32(<3 x float> %vec, <3 x float> %vec, metadata !"oeq", <3 x i1> , i32 3) ; CHECK: ret float %f ret float %f } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index e21786e5ee330..0ad9e4f8eff5c 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1157,6 +1157,9 @@ define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) { %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf_v4f64() ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf_v4f64() + %call.fcmp.intrinsic = call nnan i1 @llvm.experimental.constrained.fcmp.f32(float %f, float %f, metadata !"oeq", metadata !"fpexcept.ignore") + ; CHECK: %call.fcmp.intrinsic = call nnan i1 @llvm.experimental.constrained.fcmp.f32(float %f, float %f, metadata !"oeq", metadata !"fpexcept.ignore") + ret void } @@ -1718,7 +1721,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #55 + ; CHECK: call void @f.nobuiltin() #56 call fastcc noalias ptr @f.noalias() noinline ; CHECK: call fastcc noalias ptr @f.noalias() #12 @@ -2288,7 +2291,8 @@ define float @nofpclass_callsites(float %arg, { float } %arg1) { ; CHECK: attributes #52 = { sanitize_realtime } ; CHECK: attributes #53 = { sanitize_realtime_blocking } ; CHECK: attributes #54 = { sanitize_alloc_token } -; CHECK: attributes #55 = { builtin } +; CHECK: attributes #55 = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) } +; CHECK: attributes #56 = { builtin } ;; Metadata