diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 6962b6022e854..0a28edefa1e65 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -164,10 +164,10 @@ class CodeGenOptions : public CodeGenOptionsBase { std::string FloatABI; /// The floating-point denormal mode to use. - llvm::DenormalMode FPDenormalMode = llvm::DenormalMode::Invalid; + llvm::DenormalMode FPDenormalMode; /// The floating-point subnormal mode to use, for float. - llvm::DenormalMode FP32DenormalMode = llvm::DenormalMode::Invalid; + llvm::DenormalMode FP32DenormalMode; /// The float precision limit to use, if non-empty. std::string LimitFloatPrecision; diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 53e00c14c0cae..09f145844641c 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -617,7 +617,7 @@ class ToolChain { Action::OffloadKind DeviceOffloadKind, const llvm::fltSemantics *FPType = nullptr) const { // FIXME: This should be IEEE when default handling is fixed. - return llvm::DenormalMode::Invalid; + return llvm::DenormalMode::getInvalid(); } }; diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 0aaf6813442a5..a34d3d8b43538 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -247,7 +247,7 @@ void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, if (!hasFP32Denormals) TargetOpts.Features.push_back( (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && - CGOpts.FP32DenormalMode == llvm::DenormalMode::IEEE + CGOpts.FP32DenormalMode.Output == llvm::DenormalMode::IEEE ? '+' : '-') + Twine("fp32-denormals")) .str()); // Always do not flush fp64 or fp16 denorms. diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 9ed2ccd54487c..cdd3ca474edf6 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1749,14 +1749,14 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, FuncAttrs.addAttribute("null-pointer-is-valid", "true"); // TODO: Omit attribute when the default is IEEE. - if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::Invalid) + if (CodeGenOpts.FPDenormalMode.isValid()) FuncAttrs.addAttribute("denormal-fp-math", - llvm::denormalModeName(CodeGenOpts.FPDenormalMode)); - - if (CodeGenOpts.FP32DenormalMode != llvm::DenormalMode::Invalid) + CodeGenOpts.FPDenormalMode.str()); + if (CodeGenOpts.FP32DenormalMode.isValid()) { FuncAttrs.addAttribute( "denormal-fp-math-f32", - llvm::denormalModeName(CodeGenOpts.FP32DenormalMode)); + CodeGenOpts.FP32DenormalMode.str()); + } FuncAttrs.addAttribute("no-trapping-math", llvm::toStringRef(CodeGenOpts.NoTrappingMath)); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 6a43b6bba6279..4e730025ac4f7 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -587,7 +587,7 @@ void CodeGenModule::Release() { // floating point values to 0. (This corresponds to its "__CUDA_FTZ" // property.) getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", - CodeGenOpts.FP32DenormalMode != + CodeGenOpts.FP32DenormalMode.Output != llvm::DenormalMode::IEEE); } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 68091b6597f32..06e4686ac2b98 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -108,14 +108,14 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( const llvm::fltSemantics *FPType) const { // Denormals should always be enabled for f16 and f64. if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) - return llvm::DenormalMode::IEEE; + return llvm::DenormalMode::getIEEE(); if (DeviceOffloadKind == Action::OFK_Cuda) { if (FPType && FPType == &llvm::APFloat::IEEEsingle() && DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, options::OPT_fno_cuda_flush_denormals_to_zero, false)) - return llvm::DenormalMode::PreserveSign; + return llvm::DenormalMode::getPreserveSign(); } const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); @@ -134,7 +134,8 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || !DefaultDenormsAreZeroForTarget; // Outputs are flushed to zero, preserving sign - return DAZ ? llvm::DenormalMode::PreserveSign : llvm::DenormalMode::IEEE; + return DAZ ? llvm::DenormalMode::getPreserveSign() : + llvm::DenormalMode::getIEEE(); } void AMDGPUToolChain::addClangTargetOptions( diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index aa599b02e44ac..6f092ca274c0d 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2641,7 +2641,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_fdenormal_fp_math_EQ: DenormalFPMath = llvm::parseDenormalFPAttribute(A->getValue()); - if (DenormalFPMath == llvm::DenormalMode::Invalid) { + if (!DenormalFPMath.isValid()) { D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } @@ -2649,7 +2649,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_fdenormal_fp_math_f32_EQ: DenormalFP32Math = llvm::parseDenormalFPAttribute(A->getValue()); - if (DenormalFP32Math == llvm::DenormalMode::Invalid) { + if (!DenormalFP32Math.isValid()) { D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } @@ -2768,7 +2768,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath && SignedZeros && TrappingMath && RoundingFPMath && - DenormalFPMath != llvm::DenormalMode::IEEE && + DenormalFPMath != llvm::DenormalMode::getIEEE() && FPContract.empty()) // OK: Current Arg doesn't conflict with -ffp-model=strict ; @@ -2816,14 +2816,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, CmdArgs.push_back("-fno-trapping-math"); // TODO: Omit flag for the default IEEE instead - if (DenormalFPMath != llvm::DenormalMode::Invalid) { - CmdArgs.push_back(Args.MakeArgString( - "-fdenormal-fp-math=" + llvm::denormalModeName(DenormalFPMath))); - } - - if (DenormalFP32Math != llvm::DenormalMode::Invalid) { - CmdArgs.push_back(Args.MakeArgString( - "-fdenormal-fp-math-f32=" + llvm::denormalModeName(DenormalFP32Math))); + if (DenormalFPMath.isValid()) { + llvm::SmallString<64> DenormFlag; + llvm::raw_svector_ostream ArgStr(DenormFlag); + ArgStr << "-fdenormal-fp-math=" << DenormalFPMath; + CmdArgs.push_back(Args.MakeArgString(ArgStr.str())); + } + + if (DenormalFP32Math.isValid()) { + llvm::SmallString<64> DenormFlag; + llvm::raw_svector_ostream ArgStr(DenormFlag); + ArgStr << "-fdenormal-fp-math-f32=" << DenormalFP32Math; + CmdArgs.push_back(Args.MakeArgString(ArgStr.str())); } if (!FPContract.empty()) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 438d5e10f649b..d6050925cd9e3 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -711,11 +711,11 @@ llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType( DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, options::OPT_fno_cuda_flush_denormals_to_zero, false)) - return llvm::DenormalMode::PreserveSign; + return llvm::DenormalMode::getPreserveSign(); } assert(DeviceOffloadKind != Action::OFK_Host); - return llvm::DenormalMode::IEEE; + return llvm::DenormalMode::getIEEE(); } bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 9f51c84783274..319f0d58ec4a5 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1286,14 +1286,14 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_EQ)) { StringRef Val = A->getValue(); Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val); - if (Opts.FPDenormalMode == llvm::DenormalMode::Invalid) + if (!Opts.FPDenormalMode.isValid()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_f32_EQ)) { StringRef Val = A->getValue(); Opts.FP32DenormalMode = llvm::parseDenormalFPAttribute(Val); - if (Opts.FP32DenormalMode == llvm::DenormalMode::Invalid) + if (!Opts.FP32DenormalMode.isValid()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } diff --git a/clang/test/CodeGen/denormalfpmode.c b/clang/test/CodeGen/denormalfpmode.c index b0013daefbf83..3b9ad0d7273ba 100644 --- a/clang/test/CodeGen/denormalfpmode.c +++ b/clang/test/CodeGen/denormalfpmode.c @@ -3,9 +3,9 @@ // RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-PZ // CHECK-LABEL: main -// CHECK-IEEE: attributes #0 = {{.*}}"denormal-fp-math"="ieee"{{.*}} -// CHECK-PS: attributes #0 = {{.*}}"denormal-fp-math"="preserve-sign"{{.*}} -// CHECK-PZ: attributes #0 = {{.*}}"denormal-fp-math"="positive-zero"{{.*}} +// CHECK-IEEE: attributes #0 = {{.*}}"denormal-fp-math"="ieee,ieee"{{.*}} +// CHECK-PS: attributes #0 = {{.*}}"denormal-fp-math"="preserve-sign,preserve-sign"{{.*}} +// CHECK-PZ: attributes #0 = {{.*}}"denormal-fp-math"="positive-zero,positive-zero"{{.*}} int main() { return 0; diff --git a/clang/test/CodeGenCUDA/flush-denormals.cu b/clang/test/CodeGenCUDA/flush-denormals.cu index 850c283680bc4..a372f3faaf58d 100644 --- a/clang/test/CodeGenCUDA/flush-denormals.cu +++ b/clang/test/CodeGenCUDA/flush-denormals.cu @@ -39,8 +39,8 @@ // CHECK-LABEL: define void @foo() #0 extern "C" __device__ void foo() {} -// FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign" -// NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee" +// FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" +// NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee,ieee" // FIXME: This should be removed diff --git a/clang/test/CodeGenCUDA/propagate-metadata.cu b/clang/test/CodeGenCUDA/propagate-metadata.cu index 242e0d1c5b987..45f9319f013f8 100644 --- a/clang/test/CodeGenCUDA/propagate-metadata.cu +++ b/clang/test/CodeGenCUDA/propagate-metadata.cu @@ -61,8 +61,8 @@ __global__ void kernel() { lib_fn(); } // FTZ-NOT: "denormal-fp-math" -// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign" -// NOFTZ-SAME: "denormal-fp-math-f32"="ieee" +// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign" +// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee" // CHECK-SAME: "no-trapping-math"="true" diff --git a/clang/test/Driver/cl-denorms-are-zero.cl b/clang/test/Driver/cl-denorms-are-zero.cl index 23a5f783b84a4..7774c0d60da8f 100644 --- a/clang/test/Driver/cl-denorms-are-zero.cl +++ b/clang/test/Driver/cl-denorms-are-zero.cl @@ -14,7 +14,7 @@ // RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-DENORM %s // RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s -// AMDGCN-FLUSH: "-fdenormal-fp-math-f32=preserve-sign" +// AMDGCN-FLUSH: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign" // This should be omitted and default to ieee // AMDGCN-DENORM-NOT: "-fdenormal-fp-math-f32" diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu index f0ab22573a395..c032732054141 100644 --- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu +++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu @@ -9,5 +9,5 @@ // CPUFTZ-NOT: -fdenormal-fp-math -// FTZ: "-fdenormal-fp-math-f32=preserve-sign" -// NOFTZ: "-fdenormal-fp-math=ieee" +// FTZ: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign" +// NOFTZ: "-fdenormal-fp-math=ieee,ieee" diff --git a/clang/test/Driver/denormal-fp-math.c b/clang/test/Driver/denormal-fp-math.c index 5914c0b6e24d0..af18517a740ab 100644 --- a/clang/test/Driver/denormal-fp-math.c +++ b/clang/test/Driver/denormal-fp-math.c @@ -3,10 +3,16 @@ // RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=positive-zero -v 2>&1 | FileCheck -check-prefix=CHECK-PZ %s // RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=ieee -fno-fast-math -v 2>&1 | FileCheck -check-prefix=CHECK-NO-UNSAFE %s // RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=ieee -fno-unsafe-math-optimizations -v 2>&1 | FileCheck -check-prefix=CHECK-NO-UNSAFE %s -// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID %s +// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID0 %s +// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=ieee,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID1 %s +// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,ieee -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID2 %s +// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID3 %s -// CHECK-IEEE: -fdenormal-fp-math=ieee -// CHECK-PS: "-fdenormal-fp-math=preserve-sign" -// CHECK-PZ: "-fdenormal-fp-math=positive-zero" +// CHECK-IEEE: -fdenormal-fp-math=ieee,ieee +// CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign" +// CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero" // CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee" -// CHECK-INVALID: error: invalid value 'foo' in '-fdenormal-fp-math=foo' +// CHECK-INVALID0: error: invalid value 'foo' in '-fdenormal-fp-math=foo' +// CHECK-INVALID1: error: invalid value 'ieee,foo' in '-fdenormal-fp-math=ieee,foo' +// CHECK-INVALID2: error: invalid value 'foo,ieee' in '-fdenormal-fp-math=foo,ieee' +// CHECK-INVALID3: error: invalid value 'foo,foo' in '-fdenormal-fp-math=foo,foo' diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index fffecbe1f5a8f..6eebef20c0ea6 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1820,12 +1820,21 @@ example: not introduce any new floating-point instructions that may trap. ``"denormal-fp-math"`` - This indicates the denormal (subnormal) handling that may be assumed - for the default floating-point environment. This may be one of - ``"ieee"``, ``"preserve-sign"``, or ``"positive-zero"``. If this - is attribute is not specified, the default is ``"ieee"``. If the - mode is ``"preserve-sign"``, or ``"positive-zero"``, denormal - outputs may be flushed to zero by standard floating point + This indicates the denormal (subnormal) handling that may be + assumed for the default floating-point environment. This is a + comma separated pair. The elements may be one of ``"ieee"``, + ``"preserve-sign"``, or ``"positive-zero"``. The first entry + indicates the flushing mode for the result of floating point + operations. The second indicates the handling of denormal inputs + to floating point instructions. For compatability with older + bitcode, if the second value is omitted, both input and output + modes will assume the same mode. + + If this is attribute is not specified, the default is + ``"ieee,ieee"``. + + If the output mode is ``"preserve-sign"``, or ``"positive-zero"``, + denormal outputs may be flushed to zero by standard floating-point operations. It is not mandated that flushing to zero occurs, but if a denormal output is flushed to zero, it must respect the sign mode. Not all targets support all modes. While this indicates the @@ -1834,6 +1843,12 @@ example: consistent. User or platform code is expected to set the floating point mode appropriately before function entry. + If the input mode is ``"preserve-sign"``, or ``"positive-zero"``, a + floating-point operation must treat any input denormal value as + zero. In some situations, if an instruction does not respect this + mode, the input may need to be converted to 0 as if by + ``@llvm.canonicalize`` during lowering for correctness. + ``"denormal-fp-math-f32"`` Same as ``"denormal-fp-math"``, but only controls the behavior of the 32-bit float type (or vectors of 32-bit floats). If both are @@ -15593,9 +15608,9 @@ Each of these intrinsics corresponds to a normal floating-point operation. The data arguments and the return value are the same as the corresponding FP operation. -The rounding mode argument is a metadata string specifying what -assumptions, if any, the optimizer can make when transforming constant -values. Some constrained FP intrinsics omit this argument. If required +The rounding mode argument is a metadata string specifying what +assumptions, if any, the optimizer can make when transforming constant +values. Some constrained FP intrinsics omit this argument. If required by the intrinsic, this argument must be one of the following strings: :: @@ -15911,7 +15926,7 @@ Syntax: Overview: """"""""" -The '``llvm.experimental.constrained.fptoui``' intrinsic converts a +The '``llvm.experimental.constrained.fptoui``' intrinsic converts a floating-point ``value`` to its unsigned integer equivalent of type ``ty2``. Arguments: @@ -15944,7 +15959,7 @@ Syntax: Overview: """"""""" -The '``llvm.experimental.constrained.fptosi``' intrinsic converts +The '``llvm.experimental.constrained.fptosi``' intrinsic converts :ref:`floating-point ` ``value`` to type ``ty2``. Arguments: @@ -15952,7 +15967,7 @@ Arguments: The first argument to the '``llvm.experimental.constrained.fptosi``' intrinsic must be :ref:`floating point ` or :ref:`vector -` of floating point values. +` of floating point values. The second argument specifies the exception behavior as described above. @@ -16061,7 +16076,7 @@ intrinsic must be :ref:`floating point ` or :ref:`vector ` of floating point values. This argument must be larger in size than the result. -The second and third arguments specify the rounding mode and exception +The second and third arguments specify the rounding mode and exception behavior as described above. Semantics: @@ -16085,7 +16100,7 @@ Syntax: Overview: """"""""" -The '``llvm.experimental.constrained.fpext``' intrinsic extends a +The '``llvm.experimental.constrained.fpext``' intrinsic extends a floating-point ``value`` to a larger floating-point value. Arguments: @@ -17102,7 +17117,7 @@ Syntax: declare @llvm.experimental.constrained.llround( , metadata ) - + Overview: """"""""" diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h b/llvm/include/llvm/ADT/FloatingPointMode.h index 670b2368da9fc..4120c354b7d94 100644 --- a/llvm/include/llvm/ADT/FloatingPointMode.h +++ b/llvm/include/llvm/ADT/FloatingPointMode.h @@ -14,28 +14,97 @@ #define LLVM_FLOATINGPOINTMODE_H #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { -/// Represent handled modes for denormal (aka subnormal) modes in the floating -/// point environment. -enum class DenormalMode { - Invalid = -1, +/// Represent ssubnormal handling kind for floating point instruction inputs and +/// outputs. +struct DenormalMode { + /// Represent handled modes for denormal (aka subnormal) modes in the floating + /// point environment. + enum DenormalModeKind : char { + Invalid = -1, - /// IEEE-754 denormal numbers preserved. - IEEE, + /// IEEE-754 denormal numbers preserved. + IEEE, - /// The sign of a flushed-to-zero number is preserved in the sign of 0 - PreserveSign, + /// The sign of a flushed-to-zero number is preserved in the sign of 0 + PreserveSign, - /// Denormals are flushed to positive zero. - PositiveZero + /// Denormals are flushed to positive zero. + PositiveZero + }; + + /// Denormal flushing mode for floating point instruction results in the + /// default floating point environment. + DenormalModeKind Output = DenormalModeKind::Invalid; + + /// Denormal treatment kind for floating point instruction inputs in the + /// default floating-point environment. If this is not DenormalModeKind::IEEE, + /// floating-point instructions implicitly treat the input value as 0. + DenormalModeKind Input = DenormalModeKind::Invalid; + + DenormalMode() = default; + DenormalMode(DenormalModeKind Out, DenormalModeKind In) : + Output(Out), Input(In) {} + + + static DenormalMode getInvalid() { + return DenormalMode(DenormalModeKind::Invalid, DenormalModeKind::Invalid); + } + + static DenormalMode getIEEE() { + return DenormalMode(DenormalModeKind::IEEE, DenormalModeKind::IEEE); + } + + static DenormalMode getPreserveSign() { + return DenormalMode(DenormalModeKind::PreserveSign, + DenormalModeKind::PreserveSign); + } + + static DenormalMode getPositiveZero() { + return DenormalMode(DenormalModeKind::PositiveZero, + DenormalModeKind::PositiveZero); + } + + bool operator==(DenormalMode Other) const { + return Output == Other.Output && Input == Other.Input; + } + + bool operator!=(DenormalMode Other) const { + return !(*this == Other); + } + + bool isSimple() const { + return Input == Output; + } + + bool isValid() const { + return Output != DenormalModeKind::Invalid && + Input != DenormalModeKind::Invalid; + } + + inline void print(raw_ostream &OS) const; + + inline std::string str() const { + std::string storage; + raw_string_ostream OS(storage); + print(OS); + return OS.str(); + } }; +inline raw_ostream& operator<<(raw_ostream &OS, DenormalMode Mode) { + Mode.print(OS); + return OS; +} + /// Parse the expected names from the denormal-fp-math attribute. -inline DenormalMode parseDenormalFPAttribute(StringRef Str) { +inline DenormalMode::DenormalModeKind +parseDenormalFPAttributeComponent(StringRef Str) { // Assume ieee on unspecified attribute. - return StringSwitch(Str) + return StringSwitch(Str) .Cases("", "ieee", DenormalMode::IEEE) .Case("preserve-sign", DenormalMode::PreserveSign) .Case("positive-zero", DenormalMode::PositiveZero) @@ -44,7 +113,7 @@ inline DenormalMode parseDenormalFPAttribute(StringRef Str) { /// Return the name used for the denormal handling mode used by the the /// expected names from the denormal-fp-math attribute. -inline StringRef denormalModeName(DenormalMode Mode) { +inline StringRef denormalModeKindName(DenormalMode::DenormalModeKind Mode) { switch (Mode) { case DenormalMode::IEEE: return "ieee"; @@ -57,6 +126,26 @@ inline StringRef denormalModeName(DenormalMode Mode) { } } +/// Returns the denormal mode to use for inputs and outputs. +inline DenormalMode parseDenormalFPAttribute(StringRef Str) { + StringRef OutputStr, InputStr; + std::tie(OutputStr, InputStr) = Str.split(','); + + DenormalMode Mode; + Mode.Output = parseDenormalFPAttributeComponent(OutputStr); + + // Maintain compatability with old form of the attribute which only specified + // one component. + Mode.Input = InputStr.empty() ? Mode.Output : + parseDenormalFPAttributeComponent(InputStr); + + return Mode; +} + +void DenormalMode::print(raw_ostream &OS) const { + OS << denormalModeKindName(Output) << ',' << denormalModeKindName(Input); +} + } #endif // LLVM_FLOATINGPOINTMODE_H diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index e59e1fb77a78d..10b6aa90370dc 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -290,7 +290,7 @@ DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const // target by default. StringRef Val = Attr.getValueAsString(); if (Val.empty()) - return DenormalMode::Invalid; + return DenormalMode::getInvalid(); return parseDenormalFPAttribute(Val); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 134f3e17d2b77..f8b90ae6274ae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6605,9 +6605,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT)) return SDValue(); - // Check if all the bytes of the combined value we are looking at are stored - // to the same base address. Collect bytes offsets from Base address into - // ByteOffsets. + // Check if all the bytes of the combined value we are looking at are stored + // to the same base address. Collect bytes offsets from Base address into + // ByteOffsets. SDValue CombinedValue; SmallVector ByteOffsets(Width, INT64_MAX); int64_t FirstOffset = INT64_MAX; @@ -6627,15 +6627,15 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { Value.getOpcode() == ISD::SRA) { ConstantSDNode *ShiftOffset = dyn_cast(Value.getOperand(1)); - // Trying to match the following pattern. The shift offset must be + // Trying to match the following pattern. The shift offset must be // a constant and a multiple of 8. It is the byte offset in "y". - // + // // x = srl y, offset - // i8 z = trunc x + // i8 z = trunc x // store z, ... if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8)) return SDValue(); - + Offset = ShiftOffset->getSExtValue()/8; Value = Value.getOperand(0); } @@ -6680,7 +6680,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { assert(FirstOffset != INT64_MAX && "First byte offset must be set"); assert(FirstStore && "First store must be set"); - // Check if the bytes of the combined value we are looking at match with + // Check if the bytes of the combined value we are looking at match with // either big or little endian value store. Optional IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); if (!IsBigEndian.hasValue()) @@ -8619,7 +8619,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); - return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2, Flags); } // Otherwise see if we can optimize to a better pattern. @@ -10490,7 +10490,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LoadSDNode *LN0 = cast(N0); // Reducing the width of a volatile load is illegal. For atomics, we may be - // able to reduce the width provided we never widen again. (see D66309) + // able to reduce the width provided we never widen again. (see D66309) if (!LN0->isSimple() || !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); @@ -20820,7 +20820,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, EVT CCVT = getSetCCResultType(VT); ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; DenormalMode DenormMode = DAG.getDenormalMode(VT); - if (DenormMode == DenormalMode::IEEE) { + if (DenormMode.Input == DenormalMode::IEEE) { + // This is specifically a check for the handling of denormal inputs, + // not the result. + // fabs(X) < SmallestNormal ? 0.0 : Est const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 1503550dad418..f1b66094ff764 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -123,7 +123,7 @@ bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { return FtzEnabled; } - return MF.getDenormalMode(APFloat::IEEEsingle()) == + return MF.getDenormalMode(APFloat::IEEEsingle()).Output == DenormalMode::PreserveSign; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 64dca01e1b214..91471956991bf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1706,7 +1706,8 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) { StringRef Attr = II->getFunction() ->getFnAttribute("denormal-fp-math-f32") .getValueAsString(); - bool FtzEnabled = parseDenormalFPAttribute(Attr) != DenormalMode::IEEE; + DenormalMode Mode = parseDenormalFPAttribute(Attr); + bool FtzEnabled = Mode.Output != DenormalMode::IEEE; if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn)) return nullptr; diff --git a/llvm/unittests/ADT/FloatingPointMode.cpp b/llvm/unittests/ADT/FloatingPointMode.cpp index c0d59823db6cb..5b819f3495afc 100644 --- a/llvm/unittests/ADT/FloatingPointMode.cpp +++ b/llvm/unittests/ADT/FloatingPointMode.cpp @@ -13,21 +13,122 @@ using namespace llvm; namespace { -TEST(FloatingPointModeTest, ParseDenormalFPAttribute) { - EXPECT_EQ(DenormalMode::IEEE, parseDenormalFPAttribute("ieee")); - EXPECT_EQ(DenormalMode::IEEE, parseDenormalFPAttribute("")); +TEST(FloatingPointModeTest, ParseDenormalFPAttributeComponent) { + EXPECT_EQ(DenormalMode::IEEE, parseDenormalFPAttributeComponent("ieee")); + EXPECT_EQ(DenormalMode::IEEE, parseDenormalFPAttributeComponent("")); EXPECT_EQ(DenormalMode::PreserveSign, - parseDenormalFPAttribute("preserve-sign")); + parseDenormalFPAttributeComponent("preserve-sign")); EXPECT_EQ(DenormalMode::PositiveZero, - parseDenormalFPAttribute("positive-zero")); - EXPECT_EQ(DenormalMode::Invalid, parseDenormalFPAttribute("foo")); + parseDenormalFPAttributeComponent("positive-zero")); + EXPECT_EQ(DenormalMode::Invalid, parseDenormalFPAttributeComponent("foo")); } TEST(FloatingPointModeTest, DenormalAttributeName) { - EXPECT_EQ("ieee", denormalModeName(DenormalMode::IEEE)); - EXPECT_EQ("preserve-sign", denormalModeName(DenormalMode::PreserveSign)); - EXPECT_EQ("positive-zero", denormalModeName(DenormalMode::PositiveZero)); - EXPECT_EQ("", denormalModeName(DenormalMode::Invalid)); + EXPECT_EQ("ieee", denormalModeKindName(DenormalMode::IEEE)); + EXPECT_EQ("preserve-sign", denormalModeKindName(DenormalMode::PreserveSign)); + EXPECT_EQ("positive-zero", denormalModeKindName(DenormalMode::PositiveZero)); + EXPECT_EQ("", denormalModeKindName(DenormalMode::Invalid)); +} + +TEST(FloatingPointModeTest, ParseDenormalFPAttribute) { + EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE), + parseDenormalFPAttribute("ieee")); + EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE), + parseDenormalFPAttribute("ieee,ieee")); + EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE), + parseDenormalFPAttribute("ieee,")); + EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE), + parseDenormalFPAttribute("")); + EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE), + parseDenormalFPAttribute(",")); + + EXPECT_EQ(DenormalMode(DenormalMode::PreserveSign, DenormalMode::PreserveSign), + parseDenormalFPAttribute("preserve-sign")); + EXPECT_EQ(DenormalMode(DenormalMode::PreserveSign, DenormalMode::PreserveSign), + parseDenormalFPAttribute("preserve-sign,")); + EXPECT_EQ(DenormalMode(DenormalMode::PreserveSign, DenormalMode::PreserveSign), + parseDenormalFPAttribute("preserve-sign,preserve-sign")); + + EXPECT_EQ(DenormalMode(DenormalMode::PositiveZero, DenormalMode::PositiveZero), + parseDenormalFPAttribute("positive-zero")); + EXPECT_EQ(DenormalMode(DenormalMode::PositiveZero, DenormalMode::PositiveZero), + parseDenormalFPAttribute("positive-zero,positive-zero")); + + + EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::PositiveZero), + parseDenormalFPAttribute("ieee,positive-zero")); + EXPECT_EQ(DenormalMode(DenormalMode::PositiveZero, DenormalMode::IEEE), + parseDenormalFPAttribute("positive-zero,ieee")); + + EXPECT_EQ(DenormalMode(DenormalMode::PreserveSign, DenormalMode::IEEE), + parseDenormalFPAttribute("preserve-sign,ieee")); + EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::PreserveSign), + parseDenormalFPAttribute("ieee,preserve-sign")); + + + EXPECT_EQ(DenormalMode(DenormalMode::Invalid, DenormalMode::Invalid), + parseDenormalFPAttribute("foo")); + EXPECT_EQ(DenormalMode(DenormalMode::Invalid, DenormalMode::Invalid), + parseDenormalFPAttribute("foo,foo")); + EXPECT_EQ(DenormalMode(DenormalMode::Invalid, DenormalMode::Invalid), + parseDenormalFPAttribute("foo,bar")); +} + +TEST(FloatingPointModeTest, RenderDenormalFPAttribute) { + EXPECT_EQ(DenormalMode(DenormalMode::Invalid, DenormalMode::Invalid), + parseDenormalFPAttribute("foo")); + + EXPECT_EQ("ieee,ieee", + DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE).str()); + EXPECT_EQ(",", + DenormalMode(DenormalMode::Invalid, DenormalMode::Invalid).str()); + + EXPECT_EQ( + "preserve-sign,preserve-sign", + DenormalMode(DenormalMode::PreserveSign, DenormalMode::PreserveSign).str()); + + EXPECT_EQ( + "positive-zero,positive-zero", + DenormalMode(DenormalMode::PositiveZero, DenormalMode::PositiveZero).str()); + + EXPECT_EQ( + "ieee,preserve-sign", + DenormalMode(DenormalMode::IEEE, DenormalMode::PreserveSign).str()); + + EXPECT_EQ( + "preserve-sign,ieee", + DenormalMode(DenormalMode::PreserveSign, DenormalMode::IEEE).str()); + + EXPECT_EQ( + "preserve-sign,positive-zero", + DenormalMode(DenormalMode::PreserveSign, DenormalMode::PositiveZero).str()); +} + +TEST(FloatingPointModeTest, DenormalModeIsSimple) { + EXPECT_TRUE(DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE).isSimple()); + EXPECT_FALSE(DenormalMode(DenormalMode::IEEE, + DenormalMode::Invalid).isSimple()); + EXPECT_FALSE(DenormalMode(DenormalMode::PreserveSign, + DenormalMode::PositiveZero).isSimple()); +} + +TEST(FloatingPointModeTest, DenormalModeIsValid) { + EXPECT_TRUE(DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE).isValid()); + EXPECT_FALSE(DenormalMode(DenormalMode::IEEE, DenormalMode::Invalid).isValid()); + EXPECT_FALSE(DenormalMode(DenormalMode::Invalid, DenormalMode::IEEE).isValid()); + EXPECT_FALSE(DenormalMode(DenormalMode::Invalid, + DenormalMode::Invalid).isValid()); +} + +TEST(FloatingPointModeTest, DenormalModeConstructor) { + EXPECT_EQ(DenormalMode(DenormalMode::Invalid, DenormalMode::Invalid), + DenormalMode::getInvalid()); + EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE), + DenormalMode::getIEEE()); + EXPECT_EQ(DenormalMode(DenormalMode::PreserveSign, DenormalMode::PreserveSign), + DenormalMode::getPreserveSign()); + EXPECT_EQ(DenormalMode(DenormalMode::PositiveZero, DenormalMode::PositiveZero), + DenormalMode::getPositiveZero()); } }