Skip to content

Commit

Permalink
AMDGPU: Add target id and code object v4 support
Browse files Browse the repository at this point in the history
  - Add target id support (https://clang.llvm.org/docs/ClangOffloadBundler.html#target-id)
  - Add code object v4 support (https://llvm.org/docs/AMDGPUUsage.html#elf-code-object)
    - Add kernarg_size to kernel descriptor
    - Change trap handler ABI to no longer move queue pointer into s[0:1]
  - Cleanup ELF definitions
    - Add V2, V3, V4 suffixes to make a clear distinction for code object version
    - Consolidate note names

Differential Revision: https://reviews.llvm.org/D95638
  • Loading branch information
kzhuravl committed Mar 24, 2021
1 parent eb4ad0e commit f4ace63
Show file tree
Hide file tree
Showing 97 changed files with 3,764 additions and 705 deletions.
2 changes: 1 addition & 1 deletion lld/test/ELF/amdgpu-abi-version.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# REQUIRES: amdgpu
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s -o %t.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj %s -o %t.o
# RUN: ld.lld -shared %t.o -o %t.so
# RUN: llvm-readobj --file-headers %t.so | FileCheck %s

Expand Down
2 changes: 1 addition & 1 deletion lld/test/ELF/lto/amdgcn-oses.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D

; AMDHSA: OS/ABI: AMDGPU_HSA (0x40)
; AMDHSA: ABIVersion: 1
; AMDHSA: ABIVersion: 2

; AMDPAL: OS/ABI: AMDGPU_PAL (0x41)
; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
Expand Down
63 changes: 54 additions & 9 deletions llvm/include/llvm/BinaryFormat/ELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ enum {
// was never defined for V1.
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
ELFABIVERSION_AMDGPU_HSA_V4 = 2
};

#define ELF_RELOC(name, value) name = value,
Expand Down Expand Up @@ -742,10 +743,51 @@ enum : unsigned {

// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
EF_AMDGPU_XNACK = 0x100,
// Indicates if the "sram-ecc" target feature is enabled for all code
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
// Indicates if the trap handler is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,

// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
// Indicates if the "sramecc" target feature is enabled for all code
// contained in the object.
EF_AMDGPU_SRAM_ECC = 0x200,
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,

// XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
// XNACK is not supported.
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
// XNACK is any/default/unspecified.
EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
// XNACK is off.
EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
// XNACK is on.
EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,

// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
// SRAMECC is not supported.
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
// SRAMECC is any/default/unspecified.
EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
// SRAMECC is off.
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
// SRAMECC is on.
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
};

// ELF Relocation types for AMDGPU
Expand Down Expand Up @@ -1547,15 +1589,18 @@ enum {
SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON
};

// AMD specific notes. (Code Object V2)
// AMD vendor specific notes. (Code Object V2)
enum {
// Note types with values between 0 and 9 (inclusive) are reserved.
NT_AMD_AMDGPU_HSA_METADATA = 10,
NT_AMD_AMDGPU_ISA = 11,
NT_AMD_AMDGPU_PAL_METADATA = 12
NT_AMD_HSA_CODE_OBJECT_VERSION = 1,
NT_AMD_HSA_HSAIL = 2,
NT_AMD_HSA_ISA_VERSION = 3,
// Note types with values between 4 and 9 (inclusive) are reserved.
NT_AMD_HSA_METADATA = 10,
NT_AMD_HSA_ISA_NAME = 11,
NT_AMD_PAL_METADATA = 12
};

// AMDGPU specific notes. (Code Object V3)
// AMDGPU vendor specific notes. (Code Object V3)
enum {
// Note types with values between 0 and 31 (inclusive) are reserved.
NT_AMDGPU_METADATA = 32
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,9 @@ class MCTargetAsmParser : public MCAsmParserExtension {
return nullptr;
}

// For any initialization at the beginning of parsing.
virtual void onBeginOfFile() {}

// For any checks or cleanups at the end of parsing.
virtual void onEndOfFile() {}
};
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/MC/MCSubtargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class MCSubtargetInfo {
const unsigned *OperandCycles; // Itinerary operand cycles
const unsigned *ForwardingPaths;
FeatureBitset FeatureBits; // Feature bits for current CPU + FS
std::string FeatureString; // Feature string

public:
MCSubtargetInfo(const MCSubtargetInfo &) = default;
Expand All @@ -112,6 +113,8 @@ class MCSubtargetInfo {
FeatureBits = FeatureBits_;
}

StringRef getFeatureString() const { return FeatureString; }

bool hasFeature(unsigned Feature) const {
return FeatureBits[Feature];
}
Expand Down
18 changes: 14 additions & 4 deletions llvm/include/llvm/Support/AMDGPUMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,20 @@ namespace AMDGPU {
//===----------------------------------------------------------------------===//
namespace HSAMD {

/// HSA metadata major version.
constexpr uint32_t VersionMajor = 1;
/// HSA metadata minor version.
constexpr uint32_t VersionMinor = 0;
/// HSA metadata major version for code object V2.
constexpr uint32_t VersionMajorV2 = 1;
/// HSA metadata minor version for code object V2.
constexpr uint32_t VersionMinorV2 = 0;

/// HSA metadata major version for code object V3.
constexpr uint32_t VersionMajorV3 = 1;
/// HSA metadata minor version for code object V3.
constexpr uint32_t VersionMinorV3 = 0;

/// HSA metadata major version for code object V4.
constexpr uint32_t VersionMajorV4 = 1;
/// HSA metadata minor version for code object V4.
constexpr uint32_t VersionMinorV4 = 1;

/// HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
Expand Down
9 changes: 7 additions & 2 deletions llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ enum : int32_t {
struct kernel_descriptor_t {
uint32_t group_segment_fixed_size;
uint32_t private_segment_fixed_size;
uint8_t reserved0[8];
uint32_t kernarg_size;
uint8_t reserved0[4];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[20];
uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
Expand All @@ -178,7 +179,8 @@ struct kernel_descriptor_t {
enum : uint32_t {
GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
RESERVED0_OFFSET = 8,
KERNARG_SIZE_OFFSET = 8,
RESERVED0_OFFSET = 12,
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
RESERVED1_OFFSET = 24,
COMPUTE_PGM_RSRC3_OFFSET = 44,
Expand All @@ -197,6 +199,9 @@ static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
"invalid offset for private_segment_fixed_size");
static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
KERNARG_SIZE_OFFSET,
"invalid offset for kernarg_size");
static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
"invalid offset for reserved0");
static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/MC/MCParser/AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
(void)InsertResult;
}

getTargetParser().onBeginOfFile();

// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
ParseStatementInfo Info(&AsmStrRewrites);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/MC/MCParser/MasmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,8 @@ bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
(void)InsertResult;
}

getTargetParser().onBeginOfFile();

// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof) ||
SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/MC/MCSubtargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ static FeatureBitset getFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS,
void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU,
StringRef FS) {
FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures);
FeatureString = std::string(FS);

if (!TuneCPU.empty())
CPUSchedModel = &getSchedModelForCPU(TuneCPU);
else
Expand All @@ -217,6 +219,7 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU,
void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef TuneCPU,
StringRef FS) {
FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures);
FeatureString = std::string(FS);
}

MCSubtargetInfo::MCSubtargetInfo(const Triple &TT, StringRef C, StringRef TC,
Expand Down
35 changes: 30 additions & 5 deletions llvm/lib/ObjectYAML/ELFYAML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration(
ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS);
ECase(NT_FREEBSD_PROCSTAT_AUXV);
// AMD specific notes. (Code Object V2)
ECase(NT_AMD_AMDGPU_HSA_METADATA);
ECase(NT_AMD_AMDGPU_ISA);
ECase(NT_AMD_AMDGPU_PAL_METADATA);
ECase(NT_AMD_HSA_METADATA);
ECase(NT_AMD_HSA_ISA_NAME);
ECase(NT_AMD_PAL_METADATA);
// AMDGPU specific notes. (Code Object V3)
ECase(NT_AMDGPU_METADATA);
#undef ECase
Expand Down Expand Up @@ -546,8 +546,33 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1031, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1032, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1033, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
LLVM_FALLTHROUGH;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
BCase(EF_AMDGPU_FEATURE_XNACK_V3);
BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4,
EF_AMDGPU_FEATURE_XNACK_V4);
BCaseMask(EF_AMDGPU_FEATURE_XNACK_ANY_V4,
EF_AMDGPU_FEATURE_XNACK_V4);
BCaseMask(EF_AMDGPU_FEATURE_XNACK_OFF_V4,
EF_AMDGPU_FEATURE_XNACK_V4);
BCaseMask(EF_AMDGPU_FEATURE_XNACK_ON_V4,
EF_AMDGPU_FEATURE_XNACK_V4);
BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4,
EF_AMDGPU_FEATURE_SRAMECC_V4);
BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_ANY_V4,
EF_AMDGPU_FEATURE_SRAMECC_V4);
BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_OFF_V4,
EF_AMDGPU_FEATURE_SRAMECC_V4);
BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_ON_V4,
EF_AMDGPU_FEATURE_SRAMECC_V4);
break;
}
break;
default:
break;
Expand Down
Loading

0 comments on commit f4ace63

Please sign in to comment.