Skip to content

Commit

Permalink
clang: Treat ieee mode as the default for denormal-fp-math
Browse files Browse the repository at this point in the history
The IR hasn't switched the default yet, so explicitly add the ieee
attributes.

I'm still not really sure how the target default denormal mode should
interact with -fno-unsafe-math-optimizations. The target may have
selected the default mode to be non-IEEE based on the flags or based
on its true behavior, but we don't know which is the case. Since the
only users of a non-IEEE mode without a flag still support IEEE mode,
just reset to IEEE.
  • Loading branch information
arsenm committed Mar 5, 2020
1 parent c94a413 commit c64ca93
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 30 deletions.
6 changes: 3 additions & 3 deletions clang/include/clang/Basic/CodeGenOptions.h
Expand Up @@ -164,10 +164,10 @@ class CodeGenOptions : public CodeGenOptionsBase {
std::string FloatABI;

/// The floating-point denormal mode to use.
llvm::DenormalMode FPDenormalMode;
llvm::DenormalMode FPDenormalMode = llvm::DenormalMode::getIEEE();

/// The floating-point subnormal mode to use, for float.
llvm::DenormalMode FP32DenormalMode;
/// The floating-point denormal mode to use, for float.
llvm::DenormalMode FP32DenormalMode = llvm::DenormalMode::getIEEE();

/// The float precision limit to use, if non-empty.
std::string LimitFloatPrecision;
Expand Down
3 changes: 1 addition & 2 deletions clang/include/clang/Driver/ToolChain.h
Expand Up @@ -623,8 +623,7 @@ class ToolChain {
const llvm::opt::ArgList &DriverArgs,
Action::OffloadKind DeviceOffloadKind,
const llvm::fltSemantics *FPType = nullptr) const {
// FIXME: This should be IEEE when default handling is fixed.
return llvm::DenormalMode::getInvalid();
return llvm::DenormalMode::getIEEE();
}
};

Expand Down
19 changes: 14 additions & 5 deletions clang/lib/Driver/ToolChains/Clang.cpp
Expand Up @@ -2548,8 +2548,13 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
ReciprocalMath = false;
SignedZeros = true;
// -fno_fast_math restores default denormal and fpcontract handling
DenormalFPMath = DefaultDenormalFPMath;
FPContract = "";
DenormalFPMath = DefaultDenormalFPMath;

// FIXME: The target may have picked a non-IEEE default mode here based on
// -cl-denorms-are-zero. Should the target consider -fp-model interaction?
DenormalFP32Math = DefaultDenormalFP32Math;

StringRef Val = A->getValue();
if (OFastEnabled && !Val.equals("fast")) {
// Only -ffp-model=fast is compatible with OFast, ignore.
Expand Down Expand Up @@ -2726,7 +2731,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
FPExceptionBehavior = "strict";
// -fno_unsafe_math_optimizations restores default denormal handling
DenormalFPMath = DefaultDenormalFPMath;
DenormalFP32Math = DefaultDenormalFP32Math;

// The target may have opted to flush just f32 by default, so force IEEE.
DenormalFP32Math = llvm::DenormalMode::getIEEE();
break;

case options::OPT_Ofast:
Expand Down Expand Up @@ -2767,11 +2774,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
if (StrictFPModel) {
// If -ffp-model=strict has been specified on command line but
// subsequent options conflict then emit warning diagnostic.
// TODO: How should this interact with DenormalFP32Math?
if (HonorINFs && HonorNaNs &&
!AssociativeMath && !ReciprocalMath &&
SignedZeros && TrappingMath && RoundingFPMath &&
(FPContract.equals("off") || FPContract.empty()))
(FPContract.equals("off") || FPContract.empty()) &&
DenormalFPMath == llvm::DenormalMode::getIEEE() &&
DenormalFP32Math == llvm::DenormalMode::getIEEE())
// OK: Current Arg doesn't conflict with -ffp-model=strict
;
else {
Expand Down Expand Up @@ -2825,7 +2833,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
}

if (DenormalFP32Math.isValid()) {
// Add f32 specific denormal mode flag if it's different.
if (DenormalFP32Math != DenormalFPMath) {
llvm::SmallString<64> DenormFlag;
llvm::raw_svector_ostream ArgStr(DenormFlag);
ArgStr << "-fdenormal-fp-math-f32=" << DenormalFP32Math;
Expand Down
9 changes: 2 additions & 7 deletions clang/test/CodeGenCUDA/flush-denormals.cu
@@ -1,6 +1,6 @@
// RUN: %clang_cc1 -fcuda-is-device \
// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
// RUN: FileCheck -check-prefix=DEFAULT %s
// RUN: FileCheck -check-prefix=NOFTZ %s

// RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=ieee \
// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
Expand All @@ -10,10 +10,9 @@
// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
// RUN: FileCheck -check-prefix=FTZ %s

// FIXME: Unspecified should default to ieee
// RUN: %clang_cc1 -fcuda-is-device -x hip \
// RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \
// RUN: FileCheck -check-prefix=AMDFTZ %s
// RUN: FileCheck -check-prefix=AMDNOFTZ %s

// RUN: %clang_cc1 -fcuda-is-device -x hip \
// RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -fdenormal-fp-math-f32=ieee -emit-llvm -o - %s | \
Expand Down Expand Up @@ -42,10 +41,6 @@ extern "C" __device__ void foo() {}
// FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
// NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee,ieee"


// FIXME: This should be removed
// DEFAULT-NOT: "denormal-fp-math-f32"

// AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals
// AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals

Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGenCUDA/propagate-metadata.cu
Expand Up @@ -15,7 +15,7 @@
// RUN: %s -o %t.bc -triple nvptx-unknown-unknown

// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \
// RUN: -fno-trapping-math -fcuda-is-device -fdenormal-fp-math-f32=ieee -triple nvptx-unknown-unknown \
// RUN: -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \
// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST

// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \
Expand Down Expand Up @@ -60,8 +60,7 @@ __global__ void kernel() { lib_fn(); }
// CHECK-SAME: convergent
// CHECK-SAME: norecurse

// FTZ-NOT: "denormal-fp-math"

// FTZ: "denormal-fp-math"="ieee,ieee"
// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"

Expand All @@ -76,7 +75,8 @@ __global__ void kernel() { lib_fn(); }
// CHECK-SAME: convergent
// CHECK-NOT: norecurse

// FTZ-NOT: "denormal-fp-math"
// FTZ-SAME: "denormal-fp-math"="ieee,ieee"
// NOFTZ-SAME: "denormal-fp-math"="ieee,ieee"

// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
Expand Down
14 changes: 7 additions & 7 deletions clang/test/CodeGenOpenCL/amdgpu-features.cl
Expand Up @@ -14,13 +14,13 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s

// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,-fp32-denormals"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime,-fp32-denormals"
// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime"
// GFX700: "target-features"="+ci-insts,+flat-address-space,+fp64-fp16-denormals,-fp32-denormals"
// GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
Expand Down
6 changes: 5 additions & 1 deletion clang/test/Driver/cuda-flush-denormals-to-zero.cu
Expand Up @@ -9,5 +9,9 @@

// CPUFTZ-NOT: -fdenormal-fp-math

// FTZ-NOT: -fdenormal-fp-math-f32=
// FTZ: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
// NOFTZ: "-fdenormal-fp-math=ieee,ieee"

// The default of ieee is omitted
// NOFTZ-NOT: "-fdenormal-fp-math"
// NOFTZ-NOT: "-fdenormal-fp-math-f32"
3 changes: 2 additions & 1 deletion clang/test/Driver/denormal-fp-math.c
Expand Up @@ -8,7 +8,8 @@
// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,ieee -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID2 %s
// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID3 %s

// CHECK-IEEE: -fdenormal-fp-math=ieee,ieee
// TODO: ieee is the implied default, and the flag is not passed.
// CHECK-IEEE: "-fdenormal-fp-math=ieee,ieee"
// CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign"
// CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero"
// CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee"
Expand Down
4 changes: 4 additions & 0 deletions clang/test/Driver/fp-model.c
Expand Up @@ -63,6 +63,10 @@
// RUN: | FileCheck --check-prefix=WARNf %s
// WARNf: warning: overriding '-ffp-model=strict' option with '-Ofast' [-Woverriding-t-option]

// RUN: %clang -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \
// RUN: | FileCheck --check-prefix=WARN10 %s
// WARN10: warning: overriding '-ffp-model=strict' option with '-fdenormal-fp-math=preserve-sign,preserve-sign' [-Woverriding-t-option]

// RUN: %clang -### -c %s 2>&1 \
// RUN: | FileCheck --check-prefix=CHECK-NOROUND %s
// CHECK-NOROUND: "-cc1"
Expand Down

0 comments on commit c64ca93

Please sign in to comment.