Skip to content

Commit

Permalink
AMDGPU: Add sram-ecc feature
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D53222

llvm-svn: 346177
  • Loading branch information
kzhuravl committed Nov 5, 2018
1 parent 8b37f1d commit 108927b
Show file tree
Hide file tree
Showing 14 changed files with 182 additions and 44 deletions.
51 changes: 33 additions & 18 deletions llvm/docs/AMDGPUUsage.rst
Expand Up @@ -207,6 +207,8 @@ names from both the *Processor* and *Alternative Processor* can be used.
names.
``gfx906`` ``amdgcn`` dGPU - xnack *TBA*
[off]
sram-ecc
[on]
.. TODO
Add product
names.
Expand Down Expand Up @@ -246,24 +248,26 @@ For example:
.. table:: AMDGPU Target Features
:name: amdgpu-target-feature-table

============== ==================================================
Target Feature Description
============== ==================================================
-m[no-]xnack Enable/disable generating code that has
memory clauses that are compatible with
having XNACK replay enabled.

This is used for demand paging and page
migration. If XNACK replay is enabled in
the device, then if a page fault occurs
the code may execute incorrectly if the
``xnack`` feature is not enabled. Executing
code that has the feature enabled on a
device that does not have XNACK replay
enabled will execute correctly, but may
be less performant than code with the
feature disabled.
============== ==================================================
=============== ==================================================
Target Feature Description
=============== ==================================================
-m[no-]xnack Enable/disable generating code that has
memory clauses that are compatible with
having XNACK replay enabled.

This is used for demand paging and page
migration. If XNACK replay is enabled in
the device, then if a page fault occurs
the code may execute incorrectly if the
``xnack`` feature is not enabled. Executing
code that has the feature enabled on a
device that does not have XNACK replay
enabled will execute correctly, but may
be less performant than code with the
feature disabled.
-m[no-]sram-ecc Enable/disable generating code that assumes SRAM
ECC is enabled/disabled.
=============== ==================================================

.. _amdgpu-address-spaces:

Expand Down Expand Up @@ -549,6 +553,17 @@ The AMDGPU backend uses the following ELF header:
be 0.
See
:ref:`amdgpu-target-features`.
``EF_AMDGPU_SRAM_ECC`` 0x00000200 Indicates if the ``sram-ecc``
target feature is
enabled for all code
contained in the code object.
If the processor
does not support the
``sram-ecc`` target
feature then must
be 0.
See
:ref:`amdgpu-target-features`.
================================= ========== =============================

.. table:: AMDGPU ``EF_AMDGPU_MACH`` Values
Expand Down
7 changes: 5 additions & 2 deletions llvm/include/llvm/BinaryFormat/ELF.h
Expand Up @@ -711,9 +711,12 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,

// Indicates if the xnack target feature is enabled for all code contained in
// the object.
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
EF_AMDGPU_XNACK = 0x100,
// Indicates if the "sram-ecc" target feature is enabled for all code
// contained in the object.
EF_AMDGPU_SRAM_ECC = 0x200,
};

// ELF Relocation types for AMDGPU
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ObjectYAML/ELFYAML.cpp
Expand Up @@ -404,6 +404,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
break;
case ELF::EM_X86_64:
break;
Expand Down
29 changes: 12 additions & 17 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Expand Up @@ -266,13 +266,10 @@ def FeatureDLInsts : SubtargetFeature<"dl-insts",
"Has deep learning instructions"
>;

def FeatureD16PreservesUnusedBits : SubtargetFeature<
"d16-preserves-unused-bits",
"D16PreservesUnusedBits",
def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
"EnableSRAMECC",
"true",
"If present, then instructions defined by HasD16LoadStore predicate preserve "
"unused bits. Otherwise instructions defined by HasD16LoadStore predicate "
"zero unused bits."
"Enable SRAM ECC"
>;

//===------------------------------------------------------------===//
Expand Down Expand Up @@ -524,35 +521,32 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureD16PreservesUnusedBits]>;
FeatureLDSBankCount32]>;

def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureXNACK,
FeatureD16PreservesUnusedBits]>;
FeatureXNACK]>;

def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
[FeatureGFX9,
FeatureLDSBankCount32,
FeatureFmaMixInsts,
FeatureD16PreservesUnusedBits]>;
FeatureFmaMixInsts]>;

def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
[FeatureGFX9,
HalfRate64Ops,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
FeatureDLInsts]>;
FeatureDLInsts,
FeatureSRAMECC]>;

def FeatureISAVersion9_0_9 : SubtargetFeatureISAVersion <9,0,9,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureXNACK,
FeatureD16PreservesUnusedBits]>;
FeatureXNACK]>;

//===----------------------------------------------------------------------===//
// Debugger related subtarget features.
Expand Down Expand Up @@ -684,8 +678,9 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
AssemblerPredicate<"!FeatureUnpackedD16VMem">;

def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">,
AssemblerPredicate<"FeatureD16PreservesUnusedBits">;
def D16PreservesUnusedBits :
Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">,
AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;

def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Expand Up @@ -198,7 +198,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDPP(false),
HasR128A16(false),
HasDLInsts(false),
D16PreservesUnusedBits(false),
EnableSRAMECC(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Expand Up @@ -353,7 +353,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
bool HasDPP;
bool HasR128A16;
bool HasDLInsts;
bool D16PreservesUnusedBits;
bool EnableSRAMECC;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;
Expand Down Expand Up @@ -679,8 +679,8 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
return HasDLInsts;
}

bool d16PreservesUnusedBits() const {
return D16PreservesUnusedBits;
bool isSRAMECCEnabled() const {
return EnableSRAMECC;
}

// Scratch is allocated in 256 dword per wave blocks for the entire
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Expand Up @@ -347,6 +347,10 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
if (AMDGPU::hasXNACK(STI))
EFlags |= ELF::EF_AMDGPU_XNACK;

EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
if (AMDGPU::hasSRAMECC(STI))
EFlags |= ELF::EF_AMDGPU_SRAM_ECC;

MCA.setELFHeaderEFlags(EFlags);
}

Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Expand Up @@ -152,6 +152,8 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {

if (hasXNACK(*STI))
Stream << "+xnack";
if (hasSRAMECC(*STI))
Stream << "+sram-ecc";

Stream.flush();
}
Expand Down Expand Up @@ -593,6 +595,10 @@ bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}

bool hasSRAMECC(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
}

bool hasMIMG_R128(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Expand Up @@ -342,6 +342,7 @@ inline bool isKernel(CallingConv::ID CC) {
}

bool hasXNACK(const MCSubtargetInfo &STI);
bool hasSRAMECC(const MCSubtargetInfo &STI);
bool hasMIMG_R128(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);

Expand Down
16 changes: 14 additions & 2 deletions llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
Expand Up @@ -34,6 +34,12 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+code-object-v3,-xnack < %s | FileCheck --check-prefixes=NO-XNACK-GFX902 %s

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX904 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,-sram-ecc < %s | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX904 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX906 %s

; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
; GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
Expand All @@ -48,10 +54,16 @@
; GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack"
; GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904"
; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906"
; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc"

; XNACK-GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack"
; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902
; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"

; SRAM-ECC-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+sram-ecc"
; NO-SRAM-ECC-GFX906: "amdgcn-amd-amdhsa--gfx906"

; SRAM-ECC-XNACK-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack+sram-ecc"
; XNACK-GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc"

define amdgpu_kernel void @directive_amdgcn_target() {
ret void
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
Expand Up @@ -86,6 +86,7 @@
; GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E)
; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
; ALL: ]

Expand Down
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/AMDGPU/elf-header-flags-sram-ecc.ll
@@ -0,0 +1,38 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX902 %s

; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc,+xnack < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s

; NO-SRAM-ECC-GFX902: Flags [
; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D)
; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
; NO-SRAM-ECC-GFX902-NEXT: ]

; SRAM-ECC-GFX902: Flags [
; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D)
; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
; SRAM-ECC-GFX902-NEXT: ]

; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; NO-SRAM-ECC-GFX906-NEXT: ]

; SRAM-ECC-GFX906: Flags [
; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-GFX906-NEXT: ]

; SRAM-ECC-XNACK-GFX906: Flags [
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_XNACK (0x100)
; SRAM-ECC-XNACK-GFX906-NEXT: ]

define amdgpu_kernel void @elf_header() {
ret void
}
61 changes: 61 additions & 0 deletions llvm/test/Object/AMDGPU/elf-header-flags-sram-ecc.yaml
@@ -0,0 +1,61 @@
# RUN: yaml2obj -docnum=1 %s > %t.o.1
# RUN: llvm-readobj -s -file-headers %t.o.1 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-NONE %s
# RUN: obj2yaml %t.o.1 | FileCheck --check-prefixes=YAML-SRAM-ECC-NONE %s
# RUN: yaml2obj -docnum=2 %s > %t.o.2
# RUN: llvm-readobj -s -file-headers %t.o.2 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-GFX900 %s
# RUN: obj2yaml %t.o.2 | FileCheck --check-prefixes=YAML-SRAM-ECC-GFX900 %s
# RUN: yaml2obj -docnum=3 %s > %t.o.3
# RUN: llvm-readobj -s -file-headers %t.o.3 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-XNACK-GFX900 %s
# RUN: obj2yaml %t.o.3 | FileCheck --check-prefixes=YAML-SRAM-ECC-XNACK-GFX900 %s

# ELF-SRAM-ECC-NONE: Flags [
# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-NONE-NEXT: ]

# ELF-SRAM-ECC-GFX900: Flags [
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-GFX900-NEXT: ]

# ELF-SRAM-ECC-XNACK-GFX900: Flags [
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_XNACK (0x100)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: ]

# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]

# Doc1
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_SRAM_ECC ]
...

# Doc2
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
...

# Doc3
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
...
3 changes: 2 additions & 1 deletion llvm/tools/llvm-readobj/ELFDumper.cpp
Expand Up @@ -1355,7 +1355,8 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK)
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
};

static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
Expand Down

0 comments on commit 108927b

Please sign in to comment.