Skip to content

Commit

Permalink
[AMDGPU] Add pal metadata 3.0 support to callable pal funcs (#67104)
Browse files Browse the repository at this point in the history
PAL Metadata 3.0 introduces an explicit structure in metadata for the
programmable registers written out by the compiler backend.
The previous approach used opaque registers which can change between different
architectures and required encoding the bitfield information in the backend,
which may change between versions.

This change is an extension the previously added support - which only handled
entry functions. This adds support for all functions.

The change also includes some re-factoring to separate common code.
  • Loading branch information
dstutt committed Feb 6, 2024
1 parent d4c5aca commit d6c7253
Show file tree
Hide file tree
Showing 4 changed files with 347 additions and 22 deletions.
54 changes: 32 additions & 22 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,27 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
}

// Helper function to add common PAL Metadata 3.0+
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
const SIProgramInfo &CurrentProgramInfo,
CallingConv::ID CC, const GCNSubtarget &ST) {
if (ST.hasIEEEMode())
MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);

MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);

if (AMDGPU::isCompute(CC)) {
MD->setHwStage(CC, ".trap_present",
(bool)CurrentProgramInfo.TrapHandlerEnable);
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);

MD->setHwStage(CC, ".lds_size",
(unsigned)(CurrentProgramInfo.LdsSize *
getLdsDwGranularity(ST) * sizeof(uint32_t)));
}
}

// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
// is AMDPAL. It stores each compute/SPI register setting and other PAL
// metadata items into the PALMD::Metadata, combining with any provided by the
Expand Down Expand Up @@ -1056,24 +1077,8 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
}
} else {
MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);

if (AMDGPU::isCompute(CC)) {
MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
MD->setHwStage(CC, ".trap_present",
(bool)CurrentProgramInfo.TrapHandlerEnable);

// EXCPEnMSB?
const unsigned LdsDwGranularity = 128;
MD->setHwStage(CC, ".lds_size",
(unsigned)(CurrentProgramInfo.LdsSize * LdsDwGranularity *
sizeof(uint32_t)));
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
} else {
MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
}
MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);
}

// ScratchSize is in bytes, 16 aligned.
Expand Down Expand Up @@ -1127,10 +1132,15 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
MD->setFunctionScratchSize(FnName, MFI.getStackSize());
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

// Set compute registers
MD->setRsrc1(CallingConv::AMDGPU_CS,
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.getComputePGMRSrc2());
if (MD->getPALMajorVersion() < 3) {
// Set compute registers
MD->setRsrc1(CallingConv::AMDGPU_CS,
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
MD->setRsrc2(CallingConv::AMDGPU_CS,
CurrentProgramInfo.getComputePGMRSrc2());
} else {
EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
}

// Set optional info
MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2958,6 +2958,11 @@ bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
return hasAny64BitVGPROperands(OpDesc);
}

unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
// Currently this is 128 for all subtargets
return 128;
}

} // namespace AMDGPU

raw_ostream &operator<<(raw_ostream &OS,
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1439,6 +1439,11 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID);
/// \returns true if the intrinsic is uniform
bool isIntrinsicAlwaysUniform(unsigned IntrID);

/// \returns lds block size in terms of dwords. \p
/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
/// must be defined in terms of bytes.
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);

} // end namespace AMDGPU

raw_ostream &operator<<(raw_ostream &OS,
Expand Down

0 comments on commit d6c7253

Please sign in to comment.