89 changes: 44 additions & 45 deletions llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll
Original file line number Diff line number Diff line change
@@ -1,60 +1,59 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX904 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx908 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX908 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1011 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1011 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1012 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1012 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1030 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1030 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1031 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1031 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1032 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1032 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1033 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1033 %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI600 %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI601 %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI602 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI702 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI705 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s

; HSA: .hsa_code_object_version 2,1
; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
; HSA-SI601: .hsa_code_object_isa 6,0,1,"AMD","AMDGPU"
; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx600"
; NONHSA-SI601: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx601"
; NONHSA-SI602: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx602"
; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU"
; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU"
; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU"
; HSA-CI704: .hsa_code_object_isa 7,0,4,"AMD","AMDGPU"
; HSA-CI705: .hsa_code_object_isa 7,0,5,"AMD","AMDGPU"
; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU"
; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
; HSA-VI805: .hsa_code_object_isa 8,0,5,"AMD","AMDGPU"
; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU"
; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU"
; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU"
; HSA-GFX905: .hsa_code_object_isa 9,0,5,"AMD","AMDGPU"
; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
; HSA-GFX908: .hsa_code_object_isa 9,0,8,"AMD","AMDGPU"
; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU"
; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
; HSA-GFX1011: .hsa_code_object_isa 10,1,1,"AMD","AMDGPU"
; HSA-GFX1012: .hsa_code_object_isa 10,1,2,"AMD","AMDGPU"
; HSA-GFX1030: .hsa_code_object_isa 10,3,0,"AMD","AMDGPU"
; HSA-GFX1031: .hsa_code_object_isa 10,3,1,"AMD","AMDGPU"
; HSA-GFX1032: .hsa_code_object_isa 10,3,2,"AMD","AMDGPU"
; HSA-GFX1033: .hsa_code_object_isa 10,3,3,"AMD","AMDGPU"
; HSA-GFX907: .hsa_code_object_isa 9,0,7,"AMD","AMDGPU"
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/AMDGPU/hsa.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck --check-prefix=ELF %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s

; The SHT_NOTE section contains the output from the .hsa_code_object_*
; directives.
Expand Down Expand Up @@ -49,12 +49,10 @@
; HSA: enable_sgpr_kernarg_segment_ptr = 1

; PRE-GFX10: enable_wavefront_size32 = 0
; GFX10-W32: enable_wavefront_size32 = 1
; GFX10-W64: enable_wavefront_size32 = 0
; GFX10-W32: .amdhsa_wavefront_size32 1
; GFX10-W64: .amdhsa_wavefront_size32 0

; PRE-GFX10: wavefront_size = 6
; GFX10-W32: wavefront_size = 5
; GFX10-W64: wavefront_size = 6

; HSA: call_convention = -1
; HSA: .end_amd_kernel_code_t
Expand All @@ -66,7 +64,7 @@
; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
; Make sure we generate flat store for HSA
; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
; GFX10: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
; GFX10: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off

; HSA: .Lfunc_end0:
; HSA: .size simple, .Lfunc_end0-simple
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/AMDGPU/kernarg-size.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=HSA %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=HSA %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=HSA %s

declare void @llvm.trap() #0
declare void @llvm.debugtrap() #1

; HSA: .amdhsa_kernel trap
; HSA-NEXT: .amdhsa_group_segment_fixed_size 0
; HSA-NEXT: .amdhsa_private_segment_fixed_size 0
; HSA-NEXT: .amdhsa_kernarg_size 8
; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .end_amdhsa_kernel

define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
store volatile i32 1, i32 addrspace(1)* %arg0
call void @llvm.trap()
unreachable
store volatile i32 2, i32 addrspace(1)* %arg0
ret void
}
63 changes: 36 additions & 27 deletions llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck --check-prefixes=GCN,CI,ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,VI,ALL %s
; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,GFX9,ALL %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,GFX10HSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,GFX10HSA,ALL %s

; FIXME: align on alloca seems to be ignored for private_segment_alignment

Expand All @@ -19,30 +19,6 @@
; GFX9-DAG: s_mov_b32 s{{[0-9]+}}, 0xe00000


; GCNHSA: .amd_kernel_code_t

; GCNHSA: enable_sgpr_private_segment_wave_byte_offset = 1
; GCNHSA: user_sgpr_count = 8
; GCNHSA: enable_sgpr_workgroup_id_x = 1
; GCNHSA: enable_sgpr_workgroup_id_y = 0
; GCNHSA: enable_sgpr_workgroup_id_z = 0
; GCNHSA: enable_sgpr_workgroup_info = 0
; GCNHSA: enable_vgpr_workitem_id = 0

; GCNHSA: enable_sgpr_private_segment_buffer = 1
; GCNHSA: enable_sgpr_dispatch_ptr = 0
; GCNHSA: enable_sgpr_queue_ptr = 0
; GCNHSA: enable_sgpr_kernarg_segment_ptr = 1
; GCNHSA: enable_sgpr_dispatch_id = 0
; GCNHSA: enable_sgpr_flat_scratch_init = 1
; GCNHSA: enable_sgpr_private_segment_size = 0
; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0
; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0
; GCNHSA: enable_sgpr_grid_workgroup_count_z = 0
; GCNHSA: workitem_private_segment_byte_size = 32772
; GCNHSA: private_segment_alignment = 4
; GCNHSA: .end_amd_kernel_code_t

; GFX10HSA: s_add_u32 [[FLAT_SCR_LO:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
; GFX10HSA-DAG: s_addc_u32 [[FLAT_SCR_HI:s[0-9]+]], s{{[0-9]+}}, 0
; GFX10HSA-DAG: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), [[FLAT_SCR_LO]]
Expand All @@ -51,6 +27,39 @@
; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen
; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen

; GCNHSA: .amdhsa_kernel large_alloca_compute_shader
; GCNHSA: .amdhsa_group_segment_fixed_size 0
; GCNHSA: .amdhsa_private_segment_fixed_size 32772
; GCNHSA: .amdhsa_user_sgpr_private_segment_buffer 1
; GCNHSA: .amdhsa_user_sgpr_dispatch_ptr 0
; GCNHSA: .amdhsa_user_sgpr_queue_ptr 0
; GCNHSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; GCNHSA: .amdhsa_user_sgpr_dispatch_id 0
; GCNHSA: .amdhsa_user_sgpr_flat_scratch_init 1
; GCNHSA: .amdhsa_user_sgpr_private_segment_size 0
; GCNHSA: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
; GCNHSA: .amdhsa_system_sgpr_workgroup_id_x 1
; GCNHSA: .amdhsa_system_sgpr_workgroup_id_y 0
; GCNHSA: .amdhsa_system_sgpr_workgroup_id_z 0
; GCNHSA: .amdhsa_system_sgpr_workgroup_info 0
; GCNHSA: .amdhsa_system_vgpr_workitem_id 0
; GCNHSA: .amdhsa_next_free_vgpr 3
; GCNHSA: .amdhsa_next_free_sgpr 10
; GCNHSA: .amdhsa_float_round_mode_32 0
; GCNHSA: .amdhsa_float_round_mode_16_64 0
; GCNHSA: .amdhsa_float_denorm_mode_32 3
; GCNHSA: .amdhsa_float_denorm_mode_16_64 3
; GCNHSA: .amdhsa_dx10_clamp 1
; GCNHSA: .amdhsa_ieee_mode 1
; GCNHSA: .amdhsa_exception_fp_ieee_invalid_op 0
; GCNHSA: .amdhsa_exception_fp_denorm_src 0
; GCNHSA: .amdhsa_exception_fp_ieee_div_zero 0
; GCNHSA: .amdhsa_exception_fp_ieee_overflow 0
; GCNHSA: .amdhsa_exception_fp_ieee_underflow 0
; GCNHSA: .amdhsa_exception_fp_ieee_inexact 0
; GCNHSA: .amdhsa_exception_int_div_zero 0
; GCNHSA: .end_amdhsa_kernel

; Scratch size = alloca size + emergency stack slot, align {{.*}}, addrspace(5)
; ALL: ; ScratchSize: 32772
define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
Expand Down
23 changes: 14 additions & 9 deletions llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GFX8 %s
; RUN: FileCheck -check-prefix=ERR %s < %t

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GFX9 %s
; RUN: FileCheck -check-prefix=ERR %s < %t

@lds = internal addrspace(3) global float undef, align 4
Expand All @@ -25,7 +25,6 @@ define void @func_use_lds_global() {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX9-NEXT: s_trap 2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -35,12 +34,18 @@ define void @func_use_lds_global() {

; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
define void @func_use_lds_global_constexpr_cast() {
; GCN-LABEL: func_use_lds_global_constexpr_cast:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[0:1], s[6:7]
; GCN-NEXT: s_trap 2
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX8-LABEL: func_use_lds_global_constexpr_cast:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX8-NEXT: s_trap 2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_use_lds_global_constexpr_cast:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_trap 2
; GFX9-NEXT: s_setpc_b64 s[30:31]
store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4
ret void
}
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=VI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=VI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s

define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
; GFX9-LABEL: s_lshr_v2i16:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/s_addk_i32.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s

; TODO: Some of those tests fail with OS == amdhsa due to unreasonable register
; allocation differences.
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s

; SI-LABEL: {{^}}s_mulk_i32_k0:
; SI: s_load_dword [[VAL:s[0-9]+]]
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/AMDGPU/sram-ecc-default.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx902 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx904 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s

; Make sure the correct set of targets are marked with
; FeatureDoesNotSupportSRAMECC, and +sram-ecc is ignored if it's never
; FeatureDoesNotSupportSRAMECC, and +sramecc is ignored if it's never
; supported.

; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg:
Expand Down
13 changes: 11 additions & 2 deletions llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=GFX9 %s

; Make sure the stack is never realigned for entry functions.

Expand All @@ -20,6 +20,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; VI-NEXT: .amdhsa_kernel max_alignment_128
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 256
; VI-NEXT: .amdhsa_kernarg_size 0
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
Expand Down Expand Up @@ -67,6 +68,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; GFX9-NEXT: .amdhsa_kernel max_alignment_128
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256
; GFX9-NEXT: .amdhsa_kernarg_size 0
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
Expand All @@ -83,6 +85,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
; GFX9-NEXT: .amdhsa_next_free_sgpr 8
; GFX9-NEXT: .amdhsa_reserve_vcc 0
; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
Expand Down Expand Up @@ -121,6 +124,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; VI-NEXT: .amdhsa_kernel stackrealign_attr
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 8
; VI-NEXT: .amdhsa_kernarg_size 0
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
Expand Down Expand Up @@ -168,6 +172,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8
; GFX9-NEXT: .amdhsa_kernarg_size 0
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
Expand All @@ -184,6 +189,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
; GFX9-NEXT: .amdhsa_next_free_sgpr 8
; GFX9-NEXT: .amdhsa_reserve_vcc 0
; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
Expand Down Expand Up @@ -222,6 +228,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; VI-NEXT: .amdhsa_kernel alignstack_attr
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 128
; VI-NEXT: .amdhsa_kernarg_size 0
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
Expand Down Expand Up @@ -269,6 +276,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; GFX9-NEXT: .amdhsa_kernel alignstack_attr
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128
; GFX9-NEXT: .amdhsa_kernarg_size 0
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
Expand All @@ -285,6 +293,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
; GFX9-NEXT: .amdhsa_next_free_sgpr 8
; GFX9-NEXT: .amdhsa_reserve_vcc 0
; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x12C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() {
entry:
ret void
}

define void @func1() {
entry:
ret void
}

define void @func2() {
entry:
ret void
}
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
; ELF-NEXT: ]

define void @func0() {
entry:
ret void
}

define void @func1() {
entry:
ret void
}

define void @func2() {
entry:
ret void
}
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() #0 {
entry:
ret void
}

define void @func1() #0 {
entry:
ret void
}

define void @func2() #0 {
entry:
ret void
}

attributes #0 = { "target-features"="-xnack" }
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() #0 {
entry:
ret void
}

define void @func1() #0 {
entry:
ret void
}

define void @func2() #0 {
entry:
ret void
}

attributes #0 = { "target-features"="+xnack" }
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() {
entry:
ret void
}

define void @func1() #0 {
entry:
ret void
}

define void @func2() {
entry:
ret void
}

attributes #0 = { "target-features"="-xnack" }
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() #0 {
entry:
ret void
}

define void @func1() {
entry:
ret void
}

define void @func2() {
entry:
ret void
}

attributes #0 = { "target-features"="-xnack" }
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() {
entry:
ret void
}

define void @func1() #0 {
entry:
ret void
}

define void @func2() {
entry:
ret void
}

attributes #0 = { "target-features"="+xnack" }
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() #0 {
entry:
ret void
}

define void @func1() {
entry:
ret void
}

define void @func2() {
entry:
ret void
}

attributes #0 = { "target-features"="+xnack" }
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s 2>&1 | FileCheck --check-prefixes=ERR %s

; ERR: error: xnack setting of 'func2' function does not match module xnack setting

define void @func0() {
entry:
ret void
}

define void @func1() #0 {
entry:
ret void
}

define void @func2() #1 {
entry:
ret void
}

attributes #0 = { "target-features"="-xnack" }
attributes #1 = { "target-features"="+xnack" }
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x12C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() {
entry:
ret void
}
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
; ELF-NEXT: ]

define void @func0() {
entry:
ret void
}
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() #0 {
entry:
ret void
}

attributes #0 = { "target-features"="-xnack" }
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s

; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1

; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]

define void @func0() #0 {
entry:
ret void
}

attributes #0 = { "target-features"="+xnack" }
1,181 changes: 1,181 additions & 0 deletions llvm/test/CodeGen/AMDGPU/trap-abis.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/MC/AMDGPU/hsa-diag-v3.s
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,NONGFX10,AMDHSA
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10,AMDHSA
// RUN: not llvm-mc -triple amdgcn-amd- -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,NONGFX10,AMDHSA
// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10,AMDHSA
// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,NONGFX10,AMDHSA,ALL

.text

// GCN-LABEL: warning: test_target
// GFX8-NOT: error:
// GFX10: error: target must match options
// NONAMDHSA: error: unknown directive
// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1010+xnack
// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-unknown--gfx810
.warning "test_target"
.amdgcn_target "amdgcn-amd-amdhsa--gfx803+xnack"
.amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack"

// GCN-LABEL: warning: test_amdhsa_kernel_no_name
// GCN: error: unknown directive
Expand Down
16 changes: 9 additions & 7 deletions llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s

Expand Down Expand Up @@ -28,7 +28,7 @@
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
Expand Down Expand Up @@ -80,6 +80,7 @@ special_sgpr:
.amdhsa_kernel complete
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
.amdhsa_kernarg_size 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
Expand All @@ -98,7 +99,7 @@ special_sgpr:
.amdhsa_next_free_sgpr 27
.amdhsa_reserve_vcc 0
.amdhsa_reserve_flat_scratch 0
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_xnack_mask 1
.amdhsa_float_round_mode_32 1
.amdhsa_float_round_mode_16_64 1
.amdhsa_float_denorm_mode_32 1
Expand All @@ -121,6 +122,7 @@ special_sgpr:
// ASM: .amdhsa_kernel complete
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 8
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
Expand All @@ -139,7 +141,7 @@ special_sgpr:
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
Expand Down Expand Up @@ -169,7 +171,7 @@ special_sgpr:
.amdhsa_reserve_flat_scratch 1

.amdhsa_reserve_vcc 0
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_xnack_mask 1

.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
Expand All @@ -181,7 +183,7 @@ special_sgpr:
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0
Expand Down
16 changes: 9 additions & 7 deletions llvm/test/MC/AMDGPU/hsa-v3.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s

Expand Down Expand Up @@ -31,7 +31,7 @@
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
Expand Down Expand Up @@ -93,6 +93,7 @@ disabled_user_sgpr:
.amdhsa_kernel complete
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
.amdhsa_kernarg_size 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
Expand All @@ -110,7 +111,7 @@ disabled_user_sgpr:
.amdhsa_next_free_sgpr 27
.amdhsa_reserve_vcc 0
.amdhsa_reserve_flat_scratch 0
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_xnack_mask 1
.amdhsa_float_round_mode_32 1
.amdhsa_float_round_mode_16_64 1
.amdhsa_float_denorm_mode_32 1
Expand All @@ -130,6 +131,7 @@ disabled_user_sgpr:
// ASM: .amdhsa_kernel complete
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 8
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
Expand All @@ -147,7 +149,7 @@ disabled_user_sgpr:
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
Expand All @@ -174,7 +176,7 @@ disabled_user_sgpr:
.amdhsa_reserve_flat_scratch 1

.amdhsa_reserve_vcc 0
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_xnack_mask 1

.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
Expand All @@ -186,7 +188,7 @@ disabled_user_sgpr:
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0
Expand Down
303 changes: 303 additions & 0 deletions llvm/test/MC/AMDGPU/hsa-v4.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s

// READOBJ: Section Headers
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64

// READOBJ: Relocation section '.rela.rodata' at offset
// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310

// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd

// OBJDUMP: Contents of section .rodata
// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
// minimal
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
// special_sgpr
// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000
// disabled_user_sgpr
// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000

.text
// ASM: .text

.amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"

.p2align 8
.type minimal,@function
minimal:
s_endpgm

.p2align 8
.type complete,@function
complete:
s_endpgm

.p2align 8
.type special_sgpr,@function
special_sgpr:
s_endpgm

.p2align 8
.type disabled_user_sgpr,@function
disabled_user_sgpr:
s_endpgm

.rodata
// ASM: .rodata

// Test that only specifying required directives is allowed, and that defaulted
// values are omitted.
.p2align 6
.amdhsa_kernel minimal
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.end_amdhsa_kernel

// ASM: .amdhsa_kernel minimal
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 0
// ASM: .end_amdhsa_kernel

// Test that we can specify all available directives with non-default values.
.p2align 6
.amdhsa_kernel complete
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
.amdhsa_kernarg_size 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
.amdhsa_user_sgpr_kernarg_segment_ptr 1
.amdhsa_user_sgpr_dispatch_id 1
.amdhsa_user_sgpr_flat_scratch_init 1
.amdhsa_user_sgpr_private_segment_size 1
.amdhsa_system_sgpr_private_segment_wavefront_offset 1
.amdhsa_system_sgpr_workgroup_id_x 0
.amdhsa_system_sgpr_workgroup_id_y 1
.amdhsa_system_sgpr_workgroup_id_z 1
.amdhsa_system_sgpr_workgroup_info 1
.amdhsa_system_vgpr_workitem_id 1
.amdhsa_next_free_vgpr 9
.amdhsa_next_free_sgpr 27
.amdhsa_reserve_vcc 0
.amdhsa_reserve_flat_scratch 0
.amdhsa_reserve_xnack_mask 1
.amdhsa_float_round_mode_32 1
.amdhsa_float_round_mode_16_64 1
.amdhsa_float_denorm_mode_32 1
.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
.amdhsa_ieee_mode 0
.amdhsa_fp16_overflow 1
.amdhsa_exception_fp_ieee_invalid_op 1
.amdhsa_exception_fp_denorm_src 1
.amdhsa_exception_fp_ieee_div_zero 1
.amdhsa_exception_fp_ieee_overflow 1
.amdhsa_exception_fp_ieee_underflow 1
.amdhsa_exception_fp_ieee_inexact 1
.amdhsa_exception_int_div_zero 1
.end_amdhsa_kernel

// ASM: .amdhsa_kernel complete
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 8
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
// ASM-NEXT: .amdhsa_next_free_vgpr 9
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0
// ASM-NEXT: .amdhsa_fp16_overflow 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
// ASM-NEXT: .amdhsa_exception_int_div_zero 1
// ASM-NEXT: .end_amdhsa_kernel

// Test that we are including special SGPR usage in the granulated count.
.p2align 6
.amdhsa_kernel special_sgpr
// Same next_free_sgpr as "complete", but...
.amdhsa_next_free_sgpr 27
// ...on GFX9 this should require an additional 6 SGPRs, pushing us from
// 3 granules to 4
.amdhsa_reserve_flat_scratch 1

.amdhsa_reserve_vcc 0
.amdhsa_reserve_xnack_mask 1

.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
.amdhsa_ieee_mode 0
.amdhsa_next_free_vgpr 0
.end_amdhsa_kernel

// ASM: .amdhsa_kernel special_sgpr
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0
// ASM: .end_amdhsa_kernel

// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
// count, i.e. this should produce the same descriptor as minimal.
.p2align 6
.amdhsa_kernel disabled_user_sgpr
.amdhsa_user_sgpr_private_segment_buffer 0
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.end_amdhsa_kernel

// ASM: .amdhsa_kernel disabled_user_sgpr
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 0
// ASM: .end_amdhsa_kernel

.section .foo

.byte .amdgcn.gfx_generation_number
// ASM: .byte 9

.byte .amdgcn.gfx_generation_minor
// ASM: .byte 0

.byte .amdgcn.gfx_generation_stepping
// ASM: .byte 4

.byte .amdgcn.next_free_vgpr
// ASM: .byte 0
.byte .amdgcn.next_free_sgpr
// ASM: .byte 0

v_mov_b32_e32 v7, s10

.byte .amdgcn.next_free_vgpr
// ASM: .byte 8
.byte .amdgcn.next_free_sgpr
// ASM: .byte 11

.set .amdgcn.next_free_vgpr, 0
.set .amdgcn.next_free_sgpr, 0

.byte .amdgcn.next_free_vgpr
// ASM: .byte 0
.byte .amdgcn.next_free_sgpr
// ASM: .byte 0

v_mov_b32_e32 v16, s3

.byte .amdgcn.next_free_vgpr
// ASM: .byte 17
.byte .amdgcn.next_free_sgpr
// ASM: .byte 4

// Metadata

.amdgpu_metadata
amdhsa.version:
- 3
- 0
amdhsa.kernels:
- .name: amd_kernel_code_t_test_all
.symbol: amd_kernel_code_t_test_all@kd
.kernarg_segment_size: 8
.group_segment_fixed_size: 16
.private_segment_fixed_size: 32
.kernarg_segment_align: 64
.wavefront_size: 128
.sgpr_count: 14
.vgpr_count: 40
.max_flat_workgroup_size: 256
- .name: amd_kernel_code_t_minimal
.symbol: amd_kernel_code_t_minimal@kd
.kernarg_segment_size: 8
.group_segment_fixed_size: 16
.private_segment_fixed_size: 32
.kernarg_segment_align: 64
.wavefront_size: 128
.sgpr_count: 14
.vgpr_count: 40
.max_flat_workgroup_size: 256
.end_amdgpu_metadata

// ASM: .amdgpu_metadata
// ASM: amdhsa.kernels:
// ASM: - .group_segment_fixed_size: 16
// ASM: .kernarg_segment_align: 64
// ASM: .kernarg_segment_size: 8
// ASM: .max_flat_workgroup_size: 256
// ASM: .name: amd_kernel_code_t_test_all
// ASM: .private_segment_fixed_size: 32
// ASM: .sgpr_count: 14
// ASM: .symbol: 'amd_kernel_code_t_test_all@kd'
// ASM: .vgpr_count: 40
// ASM: .wavefront_size: 128
// ASM: - .group_segment_fixed_size: 16
// ASM: .kernarg_segment_align: 64
// ASM: .kernarg_segment_size: 8
// ASM: .max_flat_workgroup_size: 256
// ASM: .name: amd_kernel_code_t_minimal
// ASM: .private_segment_fixed_size: 32
// ASM: .sgpr_count: 14
// ASM: .symbol: 'amd_kernel_code_t_minimal@kd'
// ASM: .vgpr_count: 40
// ASM: .wavefront_size: 128
// ASM: amdhsa.version:
// ASM-NEXT: - 3
// ASM-NEXT: - 0
// ASM: .end_amdgpu_metadata
4 changes: 1 addition & 3 deletions llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts,-xnack -show-encoding %s | FileCheck --check-prefix=GFX9 %s

.hsa_code_object_isa
// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
// GFX10: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
6 changes: 3 additions & 3 deletions llvm/test/MC/AMDGPU/isa-version-hsa.s
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s

// OSABI-HSA: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802"
// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-UNK-ERR: error: target id must match options
// OSABI-HSA-ERR: error: target id must match options
// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802"
6 changes: 3 additions & 3 deletions llvm/test/MC/AMDGPU/isa-version-pal.s
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s

// OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"
// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-UNK-ERR: error: target id must match options
// OSABI-HSA-ERR: error: target id must match options
// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"
6 changes: 3 additions & 3 deletions llvm/test/MC/AMDGPU/isa-version-unk.s
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s

// OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802"
// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-UNK-ERR: error: target id must match options
// OSABI-HSA-ERR: error: target id must match options
// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-unknown--gfx802"
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/round-trip.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -preserve-comments -triple amdgcn-amd-amdhsa %s >%t-1.s
# RUN: llvm-mc -preserve-comments -triple amdgcn-amd-amdhsa %t-1.s >%t-2.s
# RUN: llvm-mc -preserve-comments -triple amdgcn-amd- %s >%t-1.s
# RUN: llvm-mc -preserve-comments -triple amdgcn-amd- %t-1.s >%t-2.s
# RUN: diff %t-1.s %t-2.s

# Test that AMDGPU assembly round-trips when run through MC; the first
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,23 @@
# RUN: obj2yaml %t.o.3 | FileCheck --check-prefixes=YAML-SRAM-ECC-XNACK-GFX900 %s

# ELF-SRAM-ECC-NONE: Flags [
# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
# ELF-SRAM-ECC-NONE-NEXT: ]

# ELF-SRAM-ECC-GFX900: Flags [
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-GFX900-NEXT: ]

# ELF-SRAM-ECC-XNACK-GFX900: Flags [
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_XNACK (0x100)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: ]

# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_XNACK_V3, EF_AMDGPU_FEATURE_SRAMECC_V3 ]

# Doc1
--- !ELF
Expand All @@ -35,7 +35,7 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_SRAM_ECC ]
Flags: [ EF_AMDGPU_FEATURE_SRAMECC_V3 ]
...

# Doc2
Expand All @@ -46,7 +46,7 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
...

# Doc3
Expand All @@ -57,5 +57,5 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_XNACK_V3, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
...
12 changes: 6 additions & 6 deletions llvm/test/Object/AMDGPU/elf-header-flags-xnack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
# RUN: obj2yaml %t.o.2 | FileCheck --check-prefixes=YAML-XNACK-GFX801 %s

# ELF-ALL: Flags [
# ELF-XNACK-NONE: EF_AMDGPU_XNACK (0x100)
# ELF-XNACK-NONE: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
# ELF-XNACK-GFX801: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
# ELF-XNACK-GFX801: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28)
# ELF-XNACK-GFX801: EF_AMDGPU_XNACK (0x100)
# ELF-ALL: ]

# YAML-XNACK-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_XNACK ]
# YAML-XNACK-GFX801: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_XNACK ]
# YAML-XNACK-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_FEATURE_XNACK_V3 ]
# YAML-XNACK-GFX801: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_FEATURE_XNACK_V3 ]

# Doc1
--- !ELF
Expand All @@ -22,7 +22,7 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_XNACK ]
Flags: [ EF_AMDGPU_FEATURE_XNACK_V3 ]
...

# Doc2
Expand All @@ -33,5 +33,5 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_XNACK ]
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_FEATURE_XNACK_V3 ]
...
5 changes: 3 additions & 2 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@
my_kernel.kd:
.long 0x00000000 ;; group_segment_fixed_size
.long 0x00000000 ;; private_segment_fixed_size
.quad 0x00FF000000000000 ;; reserved bytes.
.long 0x00000000 ;; kernarg_segment_size.
.long 0x00000000 ;; reserved bytes.
.quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works.

;; 20 reserved bytes.
.quad 0x0000000000000000
.quad 0x00FF000000000000 ;; reserved bytes.
.quad 0x0000000000000000
.long 0x00000000

Expand Down
16 changes: 8 additions & 8 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@

; RUN: split-file %s %t.dir

; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
; RUN: diff %t1 %t1-re-assemble

; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
; RUN: diff %t2 %t2-re-assemble

; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3
; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
; RUN: diff %t3 %t3-re-assemble


Expand All @@ -34,7 +34,7 @@
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.amdhsa_reserve_flat_scratch 1
.amdhsa_reserve_xnack_mask 1
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_vcc 1
.end_amdhsa_kernel

Expand All @@ -44,6 +44,6 @@
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 35
.amdhsa_reserve_flat_scratch 1
.amdhsa_reserve_xnack_mask 1
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_vcc 1
.end_amdhsa_kernel
12 changes: 6 additions & 6 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@

; RUN: split-file %s %t.dir

; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
; RUN: diff %t1 %t1-re-assemble

; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
; RUN: diff %t2 %t2-re-assemble

; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3
; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
; RUN: diff %t3 %t3-re-assemble

;--- 1.s
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
;; Entirely zeroed kernel descriptor (for GFX10).

; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack -filetype=obj -o %t
; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s

;; TODO:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
;; Entirely zeroed kernel descriptor (for GFX9).

; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \
; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: diff %t1 %t2

; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \
; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s

;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details).
Expand Down
393 changes: 302 additions & 91 deletions llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test

Large diffs are not rendered by default.

49 changes: 26 additions & 23 deletions llvm/test/tools/llvm-readobj/ELF/note-amd.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,27 @@

// GNU: Displaying notes found in: .note.no.desc
// GNU-NEXT: Owner Data size Description
// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
// GNU-NEXT: HSA Metadata:
// GNU-NEXT: AMD 0x00000000 NT_AMD_HSA_METADATA (AMD HSA Metadata)
// GNU-NEXT: AMD HSA Metadata:
// GNU-NEXT: {{^ $}}
// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_ISA (ISA Version)
// GNU-NEXT: ISA Version:
// GNU-NEXT: AMD 0x00000000 NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
// GNU-NEXT: AMD HSA ISA Name:
// GNU-NEXT: {{^ $}}
// GNU-EMPTY:
// GNU-NEXT: Displaying notes found in: .note.desc
// GNU-NEXT: Owner Data size Description
// GNU-NEXT: AMD 0x0000000a NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
// GNU-NEXT: HSA Metadata:
// GNU-NEXT: AMD 0x0000000a NT_AMD_HSA_METADATA (AMD HSA Metadata)
// GNU-NEXT: AMD HSA Metadata:
// GNU-NEXT: meta_blah
// GNU-NEXT: AMD 0x00000009 NT_AMD_AMDGPU_ISA (ISA Version)
// GNU-NEXT: ISA Version:
// GNU-NEXT: AMD 0x00000009 NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
// GNU-NEXT: AMD HSA ISA Name:
// GNU-NEXT: isa_blah
// GNU-EMPTY:
// GNU-NEXT: Displaying notes found in: .note.other
// GNU-NEXT: Owner Data size Description
// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
// GNU-NEXT: AMD 0x00000000 NT_AMD_PAL_METADATA (AMD PAL Metadata)
// GNU-NEXT: AMD PAL Metadata:
// GNU-NEXT: {{^ $}}
// GNU-EMPTY:
// GNU-NEXT: Displaying notes found in: .note.unknown
// GNU-NEXT: Owner Data size Description
Expand All @@ -40,14 +42,14 @@
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x0
// LLVM-NEXT: Type: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
// LLVM-NEXT: HSA Metadata:
// LLVM-NEXT: Type: NT_AMD_HSA_METADATA (AMD HSA Metadata)
// LLVM-NEXT: AMD HSA Metadata:
// LLVM-NEXT: }
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x0
// LLVM-NEXT: Type: NT_AMD_AMDGPU_ISA (ISA Version)
// LLVM-NEXT: ISA Version:
// LLVM-NEXT: Type: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
// LLVM-NEXT: AMD HSA ISA Name:
// LLVM-NEXT: }
// LLVM-NEXT: }
// LLVM-NEXT: NoteSection {
Expand All @@ -57,14 +59,14 @@
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0xA
// LLVM-NEXT: Type: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
// LLVM-NEXT: HSA Metadata: meta_blah
// LLVM-NEXT: Type: NT_AMD_HSA_METADATA (AMD HSA Metadata)
// LLVM-NEXT: AMD HSA Metadata: meta_blah
// LLVM-NEXT: }
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x9
// LLVM-NEXT: Type: NT_AMD_AMDGPU_ISA (ISA Version)
// LLVM-NEXT: ISA Version: isa_blah
// LLVM-NEXT: Type: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
// LLVM-NEXT: AMD HSA ISA Name: isa_blah
// LLVM-NEXT: }
// LLVM-NEXT: }
// LLVM-NEXT: NoteSection {
Expand All @@ -74,7 +76,8 @@
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x0
// LLVM-NEXT: Type: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
// LLVM-NEXT: Type: NT_AMD_PAL_METADATA (AMD PAL Metadata)
// LLVM-NEXT: AMD PAL Metadata:
// LLVM-NEXT: }
// LLVM-NEXT: }
// LLVM-NEXT: NoteSection {
Expand All @@ -96,25 +99,25 @@
.align 4
.long 4 /* namesz */
.long 0 /* descsz */
.long 10 /* type = NT_AMD_AMDGPU_HSA_METADATA */
.long 10 /* type = NT_AMD_HSA_METADATA */
.asciz "AMD"
.long 4 /* namesz */
.long 0 /* descsz */
.long 11 /* type = NT_AMD_AMDGPU_ISA */
.long 11 /* type = NT_AMD_HSA_ISA_NAME */
.asciz "AMD"
.section ".note.desc", "a"
.align 4
.long 4 /* namesz */
.long end.meta - begin.meta /* descsz */
.long 10 /* type = NT_AMD_AMDGPU_HSA_METADATA */
.long 10 /* type = NT_AMD_HSA_METADATA */
.asciz "AMD"
begin.meta:
.asciz "meta_blah"
end.meta:
.align 4
.long 4 /* namesz */
.long end.isa - begin.isa /* descsz */
.long 11 /* type = NT_AMD_AMDGPU_ISA */
.long 11 /* type = NT_AMD_HSA_ISA_NAME */
.asciz "AMD"
begin.isa:
.asciz "isa_blah"
Expand All @@ -124,7 +127,7 @@ end.isa:
.align 4
.long 4 /* namesz */
.long 0 /* descsz */
.long 12 /* type = NT_AMD_AMDGPU_PAL_METADATA */
.long 12 /* type = NT_AMD_PAL_METADATA */
.asciz "AMD"
.section ".note.unknown", "a"
.align 4
Expand Down
195 changes: 175 additions & 20 deletions llvm/tools/llvm-readobj/ELFDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1430,7 +1430,7 @@ static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};

static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
Expand Down Expand Up @@ -1477,8 +1477,63 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
};

static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RS880),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV670),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV710),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV730),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV770),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CEDAR),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CYPRESS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_JUNIPER),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_REDWOOD),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_SUMO),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_BARTS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAICOS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAYMAN),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_TURKS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX601),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX602),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX700),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX701),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX702),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX703),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX704),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX705),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX801),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX802),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX803),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX805),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX810),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX900),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX908),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ANY_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_OFF_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
};

static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
Expand Down Expand Up @@ -4944,15 +4999,95 @@ static AMDNote getAMDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
switch (NoteType) {
default:
return {"", ""};
case ELF::NT_AMD_AMDGPU_HSA_METADATA:
case ELF::NT_AMD_HSA_CODE_OBJECT_VERSION: {
struct CodeObjectVersion {
uint32_t MajorVersion;
uint32_t MinorVersion;
};
if (Desc.size() != sizeof(CodeObjectVersion))
return {"AMD HSA Code Object Version",
"Invalid AMD HSA Code Object Version"};
std::string VersionString;
raw_string_ostream StrOS(VersionString);
auto Version = reinterpret_cast<const CodeObjectVersion *>(Desc.data());
StrOS << "[Major: " << Version->MajorVersion
<< ", Minor: " << Version->MinorVersion << "]";
return {"AMD HSA Code Object Version", VersionString};
}
case ELF::NT_AMD_HSA_HSAIL: {
struct HSAILProperties {
uint32_t HSAILMajorVersion;
uint32_t HSAILMinorVersion;
uint8_t Profile;
uint8_t MachineModel;
uint8_t DefaultFloatRound;
};
if (Desc.size() != sizeof(HSAILProperties))
return {"AMD HSA HSAIL Properties", "Invalid AMD HSA HSAIL Properties"};
auto Properties = reinterpret_cast<const HSAILProperties *>(Desc.data());
std::string HSAILPropetiesString;
raw_string_ostream StrOS(HSAILPropetiesString);
StrOS << "[HSAIL Major: " << Properties->HSAILMajorVersion
<< ", HSAIL Minor: " << Properties->HSAILMinorVersion
<< ", Profile: " << Properties->Profile
<< ", Machine Model: " << Properties->MachineModel
<< ", Default Float Round: " << Properties->DefaultFloatRound << "]";
return {"AMD HSA HSAIL Properties", HSAILPropetiesString};
}
case ELF::NT_AMD_HSA_ISA_VERSION: {
struct IsaVersion {
uint16_t VendorNameSize;
uint16_t ArchitectureNameSize;
uint32_t Major;
uint32_t Minor;
uint32_t Stepping;
};
if (Desc.size() < sizeof(IsaVersion))
return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"};
auto Isa = reinterpret_cast<const IsaVersion *>(Desc.data());
if (Desc.size() < sizeof(IsaVersion) +
Isa->VendorNameSize + Isa->ArchitectureNameSize ||
Isa->VendorNameSize == 0 || Isa->ArchitectureNameSize == 0)
return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"};
std::string IsaString;
raw_string_ostream StrOS(IsaString);
StrOS << "[Vendor: "
<< StringRef((const char*)Desc.data() + sizeof(IsaVersion), Isa->VendorNameSize - 1)
<< ", Architecture: "
<< StringRef((const char*)Desc.data() + sizeof(IsaVersion) + Isa->VendorNameSize,
Isa->ArchitectureNameSize - 1)
<< ", Major: " << Isa->Major << ", Minor: " << Isa->Minor
<< ", Stepping: " << Isa->Stepping << "]";
return {"AMD HSA ISA Version", IsaString};
}
case ELF::NT_AMD_HSA_METADATA: {
if (Desc.size() == 0)
return {"AMD HSA Metadata", ""};
return {
"HSA Metadata",
std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
case ELF::NT_AMD_AMDGPU_ISA:
"AMD HSA Metadata",
std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size() - 1)};
}
case ELF::NT_AMD_HSA_ISA_NAME: {
if (Desc.size() == 0)
return {"AMD HSA ISA Name", ""};
return {
"ISA Version",
"AMD HSA ISA Name",
std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
}
case ELF::NT_AMD_PAL_METADATA: {
struct PALMetadata {
uint32_t Key;
uint32_t Value;
};
auto Isa = reinterpret_cast<const PALMetadata *>(Desc.data());
std::string MetadataString;
raw_string_ostream StrOS(MetadataString);
for (size_t I = 0, E = Desc.size() / sizeof(PALMetadata); I < E; ++E) {
StrOS << "[" << Isa[I].Key << ": " << Isa[I].Value << "]";
}
return {"AMD PAL Metadata", MetadataString};
}
}
}

struct AMDGPUNote {
Expand All @@ -4973,11 +5108,11 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
return {"", ""};

AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
std::string HSAMetadataString;
std::string MetadataString;
if (!Verifier.verify(MsgPackDoc.getRoot()))
HSAMetadataString = "Invalid AMDGPU Metadata\n";
MetadataString = "Invalid AMDGPU Metadata\n";

raw_string_ostream StrOS(HSAMetadataString);
raw_string_ostream StrOS(MetadataString);
if (MsgPackDoc.getRoot().isScalar()) {
// TODO: passing a scalar root to toYAML() asserts:
// (PolymorphicTraits<T>::getKind(Val) != NodeKind::Scalar &&
Expand Down Expand Up @@ -5106,11 +5241,13 @@ static const NoteType FreeBSDNoteTypes[] = {
};

static const NoteType AMDNoteTypes[] = {
{ELF::NT_AMD_AMDGPU_HSA_METADATA,
"NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"},
{ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"},
{ELF::NT_AMD_AMDGPU_PAL_METADATA,
"NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"},
{ELF::NT_AMD_HSA_CODE_OBJECT_VERSION,
"NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)"},
{ELF::NT_AMD_HSA_HSAIL, "NT_AMD_HSA_HSAIL (AMD HSA HSAIL Properties)"},
{ELF::NT_AMD_HSA_ISA_VERSION, "NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)"},
{ELF::NT_AMD_HSA_METADATA, "NT_AMD_HSA_METADATA (AMD HSA Metadata)"},
{ELF::NT_AMD_HSA_ISA_NAME, "NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)"},
{ELF::NT_AMD_PAL_METADATA, "NT_AMD_PAL_METADATA (AMD PAL Metadata)"},
};

static const NoteType AMDGPUNoteTypes[] = {
Expand Down Expand Up @@ -6050,10 +6187,28 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderMipsFlags),
unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
unsigned(ELF::EF_MIPS_MACH));
else if (E.e_machine == EM_AMDGPU)
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAMDGPUFlags),
unsigned(ELF::EF_AMDGPU_MACH));
else if (E.e_machine == EM_RISCV)
else if (E.e_machine == EM_AMDGPU) {
switch (E.e_ident[ELF::EI_ABIVERSION]) {
default:
W.printHex("Flags", E.e_flags);
break;
case 0:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
LLVM_FALLTHROUGH;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
W.printFlags("Flags", E.e_flags,
makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion3),
unsigned(ELF::EF_AMDGPU_MACH));
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
W.printFlags("Flags", E.e_flags,
makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion4),
unsigned(ELF::EF_AMDGPU_MACH),
unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4));
break;
}
} else if (E.e_machine == EM_RISCV)
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderRISCVFlags));
else
W.printFlags("Flags", E.e_flags);
Expand Down