Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 120 additions & 103 deletions llvm/docs/AMDGPUUsage.rst

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,18 @@ enum : uint8_t {
// [GFX6-GFX9].
#define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
// [GFX6-GFX11].
#define COMPUTE_PGM_RSRC1_GFX6_GFX11(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX11_##NAME, SHIFT, WIDTH)
// GFX9+.
#define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
// GFX10+.
#define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
// GFX12+.
#define COMPUTE_PGM_RSRC1_GFX12_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX12_PLUS_##NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
Expand All @@ -103,9 +109,11 @@ enum : int32_t {
COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
COMPUTE_PGM_RSRC1(PRIV, 20, 1),
COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1),
COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_DX10_CLAMP, 21, 1),
COMPUTE_PGM_RSRC1_GFX12_PLUS(ENABLE_WG_RR_EN, 21, 1),
COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_IEEE_MODE, 23, 1),
COMPUTE_PGM_RSRC1_GFX12_PLUS(DISABLE_PERF, 23, 1),
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));

assert(isUInt<32>(PI.ScratchSize));
assert(isUInt<32>(PI.getComputePGMRSrc1()));
assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
assert(isUInt<32>(PI.getComputePGMRSrc2()));

KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
Expand All @@ -435,7 +435,7 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
Align MaxKernArgAlign;
KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);

KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);

Expand Down Expand Up @@ -974,7 +974,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
OutStreamer->emitInt32(R_00B848_COMPUTE_PGM_RSRC1);

OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1(STM));

OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);
OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc2());
Expand Down Expand Up @@ -1038,7 +1038,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,

MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
if (MD->getPALMajorVersion() < 3) {
MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM));
if (AMDGPU::isCompute(CC)) {
MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2());
} else {
Expand Down Expand Up @@ -1116,10 +1116,11 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
StringRef FnName = MF.getFunction().getName();
MD->setFunctionScratchSize(FnName, MFI.getStackSize());
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

// Set compute registers
MD->setRsrc1(CallingConv::AMDGPU_CS,
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.getComputePGMRSrc2());

// Set optional info
Expand Down Expand Up @@ -1155,7 +1156,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);

Out.compute_pgm_resource_registers =
CurrentProgramInfo.getComputePGMRSrc1() |
CurrentProgramInfo.getComputePGMRSrc1(STM) |
(CurrentProgramInfo.getComputePGMRSrc2() << 32);
Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2199,7 +2199,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
SIModeRegisterDefaults Mode(F);
SIModeRegisterDefaults Mode(F, *Impl.ST);
Impl.HasFP32DenormalFlush =
Mode.FP32Denormals == DenormalMode::getPreserveSign();
return Impl.run(F);
Expand All @@ -2216,7 +2216,7 @@ PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
Impl.UA = &FAM.getResult<UniformityInfoAnalysis>(F);
Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
SIModeRegisterDefaults Mode(F);
SIModeRegisterDefaults Mode(F, *Impl.ST);
Impl.HasFP32DenormalFlush =
Mode.FP32Denormals == DenormalMode::getPreserveSign();
PreservedAnalyses PA = PreservedAnalyses::none();
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
}
#endif
Subtarget = &MF.getSubtarget<GCNSubtarget>();
Mode = SIModeRegisterDefaults(MF.getFunction());
Mode = SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
return SelectionDAGISel::runOnMachineFunction(MF);
}

Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1499,13 +1499,13 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
static_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
MachineFunction &MF = PFS.MF;
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
return true;

if (MFI->Occupancy == 0) {
// Fixup the subtarget dependent default value.
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
}

Expand Down Expand Up @@ -1659,8 +1659,10 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->ArgInfo.WorkItemIDZ, 0, 0)))
return true;

MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
if (ST.hasIEEEMode())
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
if (ST.hasDX10ClampMode())
MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;

// FIXME: Move proper support for denormal-fp-math into base MachineFunction
MFI->Mode.FP32Denormals.Input = YamlMFI.Mode.FP32InputDenormals
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()), CommonTTI(TM, F),
IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) {
SIModeRegisterDefaults Mode(F);
SIModeRegisterDefaults Mode(F, *ST);
HasFP32Denormals = Mode.FP32Denormals != DenormalMode::getPreserveSign();
HasFP64FP16Denormals =
Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
Expand Down Expand Up @@ -1163,8 +1163,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,

// FIXME: dx10_clamp can just take the caller setting, but there seems to be
// no way to support merge for backend defined attributes.
SIModeRegisterDefaults CallerMode(*Caller);
SIModeRegisterDefaults CalleeMode(*Callee);
SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
if (!CallerMode.isInlineCompatible(CalleeMode))
return false;

Expand Down
30 changes: 27 additions & 3 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5342,11 +5342,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
Val, ValRange);
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
Expand Down Expand Up @@ -5409,6 +5415,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
Val, ValRange);
} else if (ID == ".amdhsa_round_robin_scheduling") {
if (IVersion.Major < 12)
return Error(IDRange.Start, "directive requires gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
ValRange);
} else {
return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
}
Expand Down Expand Up @@ -5562,6 +5574,18 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}
Lex();

if (ID == "enable_dx10_clamp") {
if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
isGFX12Plus())
return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
}

if (ID == "enable_ieee_mode") {
if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
isGFX12Plus())
return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
}

if (ID == "enable_wavefront_size32") {
if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1868,12 +1868,16 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
return MCDisassembler::Fail;

PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
if (!isGFX12Plus())
PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);

if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
return MCDisassembler::Fail;

PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
if (!isGFX12Plus())
PRINT_DIRECTIVE(".amdhsa_ieee_mode",
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);

if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
return MCDisassembler::Fail;
Expand All @@ -1899,6 +1903,11 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
}

if (isGFX12Plus())
PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);

return MCDisassembler::Success;
}

Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,15 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// \returns true is CSUB atomics support a no-return form.
bool hasAtomicCSubNoRtnInsts() const { return HasAtomicCSubNoRtnInsts; }

// \returns true if the target has DX10_CLAMP kernel descriptor mode bit
bool hasDX10ClampMode() const { return getGeneration() < GFX12; }

// \returns true if the target has IEEE kernel descriptor mode bit
bool hasIEEEMode() const { return getGeneration() < GFX12; }

// \returns true if the target has WG_RR_MODE kernel descriptor mode bit
bool hasRrWGMode() const { return getGeneration() >= GFX12; }

/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
Expand Down
15 changes: 9 additions & 6 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,12 +451,12 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
if (IVersion.Major < 12) {
PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
}
if (IVersion.Major >= 9)
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
compute_pgm_rsrc1,
Expand All @@ -478,6 +478,9 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
if (IVersion.Major >= 12)
PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
PRINT_FIELD(
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
compute_pgm_rsrc2,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,9 @@ enum Register_Flag : uint8_t {
#define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21)
#define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1)
#define C_00B848_DX10_CLAMP 0xFFDFFFFF
#define S_00B848_RR_WG_MODE(x) (((x) & 0x1) << 21)
#define G_00B848_RR_WG_MODE(x) (((x) >> 21) & 0x1)
#define C_00B848_RR_WG_MODE 0xFFDFFFFF
#define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22)
#define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1)
#define C_00B848_DEBUG_MODE 0xFFBFFFFF
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const GCNTargetMachine &getTM(const GCNSubtarget *STI) {

SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
const GCNSubtarget *STI)
: AMDGPUMachineFunction(F, *STI), Mode(F), GWSResourcePSV(getTM(STI)),
: AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),
UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),
WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
Expand Down
22 changes: 14 additions & 8 deletions llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,26 @@
//===----------------------------------------------------------------------===//

#include "SIModeRegisterDefaults.h"
#include "GCNSubtarget.h"

using namespace llvm;

SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
const GCNSubtarget &ST) {
*this = getDefaultForCallingConv(F.getCallingConv());

StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
if (!IEEEAttr.empty())
IEEE = IEEEAttr == "true";
if (ST.hasIEEEMode()) {
StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
if (!IEEEAttr.empty())
IEEE = IEEEAttr == "true";
}

StringRef DX10ClampAttr =
F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
if (!DX10ClampAttr.empty())
DX10Clamp = DX10ClampAttr == "true";
if (ST.hasDX10ClampMode()) {
StringRef DX10ClampAttr =
F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
if (!DX10ClampAttr.empty())
DX10Clamp = DX10ClampAttr == "true";
}

StringRef DenormF32Attr =
F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

namespace llvm {

class GCNSubtarget;

// Track defaults for fields in the MODE register.
struct SIModeRegisterDefaults {
/// Floating point opcodes that support exception flag gathering quiet and
Expand All @@ -40,7 +42,7 @@ struct SIModeRegisterDefaults {
FP32Denormals(DenormalMode::getIEEE()),
FP64FP16Denormals(DenormalMode::getIEEE()) {}

SIModeRegisterDefaults(const Function &F);
SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);

static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
SIModeRegisterDefaults Mode;
Expand Down
Loading