Skip to content

Commit

Permalink
[AMDGPU][GFX9] Set +fp32-denormals for >=gfx900 unless -cl-denorms-ar…
Browse files Browse the repository at this point in the history
…e-zero is set

Differential Revision: https://reviews.llvm.org/D31482

llvm-svn: 300306
  • Loading branch information
kzhuravl committed Apr 14, 2017
1 parent c9a4fc0 commit e668b1c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
9 changes: 6 additions & 3 deletions clang/lib/Basic/Targets.cpp
Expand Up @@ -2112,9 +2112,12 @@ class AMDGPUTargetInfo final : public TargetInfo {
bool hasFP64:1;
bool hasFMAF:1;
bool hasLDEXPF:1;
bool hasFullSpeedFP32Denorms:1;
const AddrSpace AS;

static bool hasFullSpeedFMAF32(StringRef GPUName) {
return parseAMDGCNName(GPUName) >= GK_GFX9;
}

static bool isAMDGCN(const llvm::Triple &TT) {
return TT.getArch() == llvm::Triple::amdgcn;
}
Expand All @@ -2130,7 +2133,6 @@ class AMDGPUTargetInfo final : public TargetInfo {
hasFP64(false),
hasFMAF(false),
hasLDEXPF(false),
hasFullSpeedFP32Denorms(false),
AS(isGenericZero(Triple)){
if (getTriple().getArch() == llvm::Triple::amdgcn) {
hasFP64 = true;
Expand Down Expand Up @@ -2200,7 +2202,8 @@ class AMDGPUTargetInfo final : public TargetInfo {
hasFP64Denormals = true;
}
if (!hasFP32Denormals)
TargetOpts.Features.push_back((Twine(hasFullSpeedFP32Denorms &&
TargetOpts.Features.push_back(
(Twine(hasFullSpeedFMAF32(TargetOpts.CPU) &&
!CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str());
// Always do not flush fp64 or fp16 denorms.
if (!hasFP64Denormals && hasFP64)
Expand Down
13 changes: 13 additions & 0 deletions clang/test/CodeGenOpenCL/gfx9-fp32-denorms.cl
@@ -0,0 +1,13 @@
// REQUIRES: amdgpu-registered-target

// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck --check-prefix=DEFAULT %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature +fp32-denormals %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_ON %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature -fp32-denormals %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_OFF %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -cl-denorms-are-zero %s | FileCheck --check-prefix=OPT_DENORMS_ARE_ZERO %s

// DEFAULT: +fp32-denormals
// FEATURE_FP32_DENORMALS_ON: +fp32-denormals
// FEATURE_FP32_DENORMALS_OFF: -fp32-denormals
// OPT_DENORMS_ARE_ZERO: -fp32-denormals

kernel void gfx9_fp32_denorms() {}

0 comments on commit e668b1c

Please sign in to comment.