Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] Reserve extra SGPR blocks wth XNACK "any" TID Setting
ASMPrinter was relying on feature bits to setup extra SGRPs in the knerel descriptor for the xnack_mask. This was broken for the dynamic XNACK "any" TID setting which could cause user SGPRs to be clobbered if the number of SGPRs reserved was near a granulated block boundary. When XNACK was enabled this worked correctly in the ASMParser which meant some kernels were only failing without "-save-temps". Fixes: SWDEV-382764 Reviewed By: kzhuravl Differential Revision: https://reviews.llvm.org/D145401
- Loading branch information
Showing
10 changed files
with
318 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefixes=ASM %s | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s | ||
|
||
; TODO: Update to check for granulated sgpr count directive once one is added. | ||
|
||
define amdgpu_kernel void @kern() { | ||
; ASM-LABEL: kern: | ||
; ASM: .amdhsa_next_free_sgpr 5 | ||
; ASM: .amdhsa_reserve_xnack_mask 1 | ||
|
||
; Verify that an extra SGPR block is reserved with XNACK "any" tid setting. | ||
; OBJ: Contents of section .rodata: | ||
; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................ | ||
; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................ | ||
; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................ | ||
; OBJ-NEXT: 0030 4000af00 88000000 01000000 00000000 @............... | ||
|
||
; ELF: AMDGPU Metadata | ||
; ELF: .sgpr_count: 9 | ||
entry: | ||
tail call void asm sideeffect "", "~{s[0:4]}"() | ||
ret void | ||
} | ||
|
||
!llvm.module.flags = !{!0} | ||
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack < %s | FileCheck --check-prefixes=ASM %s | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s | ||
|
||
; TODO: Update to check for granulated sgpr count directive once one is added. | ||
|
||
define amdgpu_kernel void @kern() { | ||
; ASM-LABEL: kern: | ||
; ASM: .amdhsa_next_free_sgpr 5 | ||
; ASM: .amdhsa_reserve_xnack_mask 0 | ||
|
||
; Verify that an extra SGPR block is not reserved with XNACK "off" tid setting. | ||
; OBJ: Contents of section .rodata: | ||
; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................ | ||
; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................ | ||
; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................ | ||
; OBJ-NEXT: 0030 0000af00 88000000 01000000 00000000 ................ | ||
|
||
; ELF: AMDGPU Metadata | ||
; ELF: .sgpr_count: 5 | ||
entry: | ||
tail call void asm sideeffect "", "~{s[0:4]}"() | ||
ret void | ||
} | ||
|
||
!llvm.module.flags = !{!0} | ||
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack < %s | FileCheck --check-prefixes=ASM %s | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s | ||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s | ||
|
||
; TODO: Update to check for granulated sgpr count directive once one is added. | ||
|
||
define amdgpu_kernel void @kern() { | ||
; ASM-LABEL: kern: | ||
; ASM: .amdhsa_next_free_sgpr 5 | ||
; ASM: .amdhsa_reserve_xnack_mask 1 | ||
|
||
; Verify that an extra SGPR block is reserved with XNACK "on" tid setting. | ||
; OBJ: Contents of section .rodata: | ||
; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................ | ||
; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................ | ||
; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................ | ||
; OBJ-NEXT: 0030 4000af00 88000000 01000000 00000000 @............... | ||
|
||
; ELF: AMDGPU Metadata | ||
; ELF: .sgpr_count: 9 | ||
entry: | ||
tail call void asm sideeffect "", "~{s[0:4]}"() | ||
ret void | ||
} | ||
|
||
!llvm.module.flags = !{!0} | ||
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} |
Oops, something went wrong.