Skip to content

Commit

Permalink
[AMDGPU] Always reserve flat scratch SGPR for architected flat scratch
Browse files Browse the repository at this point in the history
With architected flat scratch it becomes readonly. We must always
reserve SGPR pair for it even if we do not use scratch at all since
an attempt to write to SGPRs mapped to FLAT_SCRATCH results in
memory violation.

This is not needed since GFX10 with architected flat scratch though
since special SGPRs are not carving space from normal SGPRs.

Differential Revision: https://reviews.llvm.org/D110376
  • Loading branch information
rampitec committed Sep 24, 2021
1 parent 5988a3b commit 082e22f
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Expand Up @@ -769,7 +769,7 @@ GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.

if (HasFlatScratchInit) {
if (HasFlatScratchInit || HasArchitectedFlatScratch) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Expand Up @@ -671,7 +671,8 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
if (XNACKUsed)
ExtraSGPRs = 4;

if (FlatScrUsed)
if (FlatScrUsed ||
STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
ExtraSGPRs = 6;
}

Expand Down
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll
Expand Up @@ -14,6 +14,14 @@
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s

; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s

; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s

; GCN-LABEL: {{^}}no_vcc_no_flat:

; HSA-CI-V2: is_xnack_enabled = 0
Expand All @@ -26,6 +34,8 @@
; CI: ; NumSgprs: 8
; VI-NOXNACK: ; NumSgprs: 8
; VI-XNACK: ; NumSgprs: 12
; GFX9-ARCH-FLAT: ; NumSgprs: 14
; GFX10-ARCH-FLAT: ; NumSgprs: 8
define amdgpu_kernel void @no_vcc_no_flat() {
entry:
call void asm sideeffect "", "~{s7}"()
Expand All @@ -44,6 +54,8 @@ entry:
; CI: ; NumSgprs: 10
; VI-NOXNACK: ; NumSgprs: 10
; VI-XNACK: ; NumSgprs: 12
; GFX9-ARCH-FLAT: ; NumSgprs: 14
; GFX10-ARCH-FLAT: ; NumSgprs: 10
define amdgpu_kernel void @vcc_no_flat() {
entry:
call void asm sideeffect "", "~{s7},~{vcc}"()
Expand All @@ -62,6 +74,8 @@ entry:
; CI: ; NumSgprs: 12
; VI-NOXNACK: ; NumSgprs: 14
; VI-XNACK: ; NumSgprs: 14
; GFX9-ARCH-FLAT: ; NumSgprs: 14
; GFX10-ARCH-FLAT: ; NumSgprs: 8
define amdgpu_kernel void @no_vcc_flat() {
entry:
call void asm sideeffect "", "~{s7},~{flat_scratch}"()
Expand All @@ -80,6 +94,8 @@ entry:
; CI: ; NumSgprs: 12
; VI-NOXNACK: ; NumSgprs: 14
; VI-XNACK: ; NumSgprs: 14
; GFX9-ARCH-FLAT: ; NumSgprs: 14
; GFX10-ARCH-FLAT: ; NumSgprs: 10
define amdgpu_kernel void @vcc_flat() {
entry:
call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"()
Expand All @@ -101,6 +117,8 @@ entry:
; CI: NumSgprs: 4
; VI-NOXNACK: NumSgprs: 6
; VI-XNACK: NumSgprs: 6
; GFX9-ARCH-FLAT: ; NumSgprs: 6
; GFX10-ARCH-FLAT: ; NumSgprs: 0
define amdgpu_kernel void @use_flat_scr() #0 {
entry:
call void asm sideeffect "; clobber ", "~{flat_scratch}"()
Expand All @@ -119,6 +137,8 @@ entry:
; CI: NumSgprs: 4
; VI-NOXNACK: NumSgprs: 6
; VI-XNACK: NumSgprs: 6
; GFX9-ARCH-FLAT: ; NumSgprs: 6
; GFX10-ARCH-FLAT: ; NumSgprs: 0
define amdgpu_kernel void @use_flat_scr_lo() #0 {
entry:
call void asm sideeffect "; clobber ", "~{flat_scratch_lo}"()
Expand All @@ -137,6 +157,8 @@ entry:
; CI: NumSgprs: 4
; VI-NOXNACK: NumSgprs: 6
; VI-XNACK: NumSgprs: 6
; GFX9-ARCH-FLAT: ; NumSgprs: 6
; GFX10-ARCH-FLAT: ; NumSgprs: 0
define amdgpu_kernel void @use_flat_scr_hi() #0 {
entry:
call void asm sideeffect "; clobber ", "~{flat_scratch_hi}"()
Expand Down

0 comments on commit 082e22f

Please sign in to comment.