Skip to content

Commit

Permalink
Reapply "clang: Treat ieee mode as the default for denormal-fp-math"
Browse files Browse the repository at this point in the history
This reverts commit 737394c.

The fp-model test was failing on platforms that enable denormal flushing
based on -ffast-math. This needs to reset to IEEE, not the default in
these cases.

Change-Id: Ibbad32f66d0d0b89b9c1173a3a96fb1a570ddd89
  • Loading branch information
arsenm committed Mar 6, 2020
1 parent 33b696b commit 00b2a9d
Show file tree
Hide file tree
Showing 9 changed files with 44 additions and 33 deletions.
6 changes: 3 additions & 3 deletions clang/include/clang/Basic/CodeGenOptions.h
Expand Up @@ -164,10 +164,10 @@ class CodeGenOptions : public CodeGenOptionsBase {
std::string FloatABI;

/// The floating-point denormal mode to use.
llvm::DenormalMode FPDenormalMode;
llvm::DenormalMode FPDenormalMode = llvm::DenormalMode::getIEEE();

/// The floating-point subnormal mode to use, for float.
llvm::DenormalMode FP32DenormalMode;
/// The floating-point denormal mode to use, for float.
llvm::DenormalMode FP32DenormalMode = llvm::DenormalMode::getIEEE();

/// The float precision limit to use, if non-empty.
std::string LimitFloatPrecision;
Expand Down
3 changes: 1 addition & 2 deletions clang/include/clang/Driver/ToolChain.h
Expand Up @@ -623,8 +623,7 @@ class ToolChain {
const llvm::opt::ArgList &DriverArgs,
Action::OffloadKind DeviceOffloadKind,
const llvm::fltSemantics *FPType = nullptr) const {
// FIXME: This should be IEEE when default handling is fixed.
return llvm::DenormalMode::getInvalid();
return llvm::DenormalMode::getIEEE();
}
};

Expand Down
24 changes: 16 additions & 8 deletions clang/lib/Driver/ToolChains/Clang.cpp
Expand Up @@ -2548,8 +2548,13 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
ReciprocalMath = false;
SignedZeros = true;
// -fno_fast_math restores default denormal and fpcontract handling
DenormalFPMath = DefaultDenormalFPMath;
FPContract = "";
DenormalFPMath = llvm::DenormalMode::getIEEE();

// FIXME: The target may have picked a non-IEEE default mode here based on
// -cl-denorms-are-zero. Should the target consider -fp-model interaction?
DenormalFP32Math = llvm::DenormalMode::getIEEE();

StringRef Val = A->getValue();
if (OFastEnabled && !Val.equals("fast")) {
// Only -ffp-model=fast is compatible with OFast, ignore.
Expand Down Expand Up @@ -2724,9 +2729,10 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
SignedZeros = true;
TrappingMath = true;
FPExceptionBehavior = "strict";
// -fno_unsafe_math_optimizations restores default denormal handling
DenormalFPMath = DefaultDenormalFPMath;
DenormalFP32Math = DefaultDenormalFP32Math;

// The target may have opted to flush by default, so force IEEE.
DenormalFPMath = llvm::DenormalMode::getIEEE();
DenormalFP32Math = llvm::DenormalMode::getIEEE();
break;

case options::OPT_Ofast:
Expand Down Expand Up @@ -2760,18 +2766,19 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
RoundingFPMath = false;
// -fno_fast_math restores default denormal and fpcontract handling
DenormalFPMath = DefaultDenormalFPMath;
DenormalFP32Math = DefaultDenormalFP32Math;
DenormalFP32Math = llvm::DenormalMode::getIEEE();
FPContract = "";
break;
}
if (StrictFPModel) {
// If -ffp-model=strict has been specified on command line but
// subsequent options conflict then emit warning diagnostic.
// TODO: How should this interact with DenormalFP32Math?
if (HonorINFs && HonorNaNs &&
!AssociativeMath && !ReciprocalMath &&
SignedZeros && TrappingMath && RoundingFPMath &&
(FPContract.equals("off") || FPContract.empty()))
(FPContract.equals("off") || FPContract.empty()) &&
DenormalFPMath == llvm::DenormalMode::getIEEE() &&
DenormalFP32Math == llvm::DenormalMode::getIEEE())
// OK: Current Arg doesn't conflict with -ffp-model=strict
;
else {
Expand Down Expand Up @@ -2825,7 +2832,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
}

if (DenormalFP32Math.isValid()) {
// Add f32 specific denormal mode flag if it's different.
if (DenormalFP32Math != DenormalFPMath) {
llvm::SmallString<64> DenormFlag;
llvm::raw_svector_ostream ArgStr(DenormFlag);
ArgStr << "-fdenormal-fp-math-f32=" << DenormalFP32Math;
Expand Down
9 changes: 2 additions & 7 deletions clang/test/CodeGenCUDA/flush-denormals.cu
@@ -1,6 +1,6 @@
// RUN: %clang_cc1 -fcuda-is-device \
// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
// RUN: FileCheck -check-prefix=DEFAULT %s
// RUN: FileCheck -check-prefix=NOFTZ %s

// RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=ieee \
// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
Expand All @@ -10,10 +10,9 @@
// RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
// RUN: FileCheck -check-prefix=FTZ %s

// FIXME: Unspecified should default to ieee
// RUN: %clang_cc1 -fcuda-is-device -x hip \
// RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \
// RUN: FileCheck -check-prefix=AMDFTZ %s
// RUN: FileCheck -check-prefix=AMDNOFTZ %s

// RUN: %clang_cc1 -fcuda-is-device -x hip \
// RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -fdenormal-fp-math-f32=ieee -emit-llvm -o - %s | \
Expand Down Expand Up @@ -42,10 +41,6 @@ extern "C" __device__ void foo() {}
// FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
// NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee,ieee"


// FIXME: This should be removed
// DEFAULT-NOT: "denormal-fp-math-f32"

// AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals
// AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals

Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGenCUDA/propagate-metadata.cu
Expand Up @@ -15,7 +15,7 @@
// RUN: %s -o %t.bc -triple nvptx-unknown-unknown

// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \
// RUN: -fno-trapping-math -fcuda-is-device -fdenormal-fp-math-f32=ieee -triple nvptx-unknown-unknown \
// RUN: -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \
// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST

// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \
Expand Down Expand Up @@ -60,8 +60,7 @@ __global__ void kernel() { lib_fn(); }
// CHECK-SAME: convergent
// CHECK-SAME: norecurse

// FTZ-NOT: "denormal-fp-math"

// FTZ: "denormal-fp-math"="ieee,ieee"
// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"

Expand All @@ -76,7 +75,8 @@ __global__ void kernel() { lib_fn(); }
// CHECK-SAME: convergent
// CHECK-NOT: norecurse

// FTZ-NOT: "denormal-fp-math"
// FTZ-SAME: "denormal-fp-math"="ieee,ieee"
// NOFTZ-SAME: "denormal-fp-math"="ieee,ieee"

// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
Expand Down
14 changes: 7 additions & 7 deletions clang/test/CodeGenOpenCL/amdgpu-features.cl
Expand Up @@ -14,13 +14,13 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s

// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,-fp32-denormals"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime,-fp32-denormals"
// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime"
// GFX700: "target-features"="+ci-insts,+flat-address-space,+fp64-fp16-denormals,-fp32-denormals"
// GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
Expand Down
6 changes: 5 additions & 1 deletion clang/test/Driver/cuda-flush-denormals-to-zero.cu
Expand Up @@ -9,5 +9,9 @@

// CPUFTZ-NOT: -fdenormal-fp-math

// FTZ-NOT: -fdenormal-fp-math-f32=
// FTZ: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
// NOFTZ: "-fdenormal-fp-math=ieee,ieee"

// The default of ieee is omitted
// NOFTZ-NOT: "-fdenormal-fp-math"
// NOFTZ-NOT: "-fdenormal-fp-math-f32"
3 changes: 2 additions & 1 deletion clang/test/Driver/denormal-fp-math.c
Expand Up @@ -8,7 +8,8 @@
// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,ieee -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID2 %s
// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID3 %s

// CHECK-IEEE: -fdenormal-fp-math=ieee,ieee
// TODO: ieee is the implied default, and the flag is not passed.
// CHECK-IEEE: "-fdenormal-fp-math=ieee,ieee"
// CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign"
// CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero"
// CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee"
Expand Down
4 changes: 4 additions & 0 deletions clang/test/Driver/fp-model.c
Expand Up @@ -63,6 +63,10 @@
// RUN: | FileCheck --check-prefix=WARNf %s
// WARNf: warning: overriding '-ffp-model=strict' option with '-Ofast' [-Woverriding-t-option]

// RUN: %clang -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \
// RUN: | FileCheck --check-prefix=WARN10 %s
// WARN10: warning: overriding '-ffp-model=strict' option with '-fdenormal-fp-math=preserve-sign,preserve-sign' [-Woverriding-t-option]

// RUN: %clang -### -c %s 2>&1 \
// RUN: | FileCheck --check-prefix=CHECK-NOROUND %s
// CHECK-NOROUND: "-cc1"
Expand Down

0 comments on commit 00b2a9d

Please sign in to comment.