Skip to content

Commit

Permalink
[AMDGPU] ISel for amdgpu_cs_chain[_preserve] functions
Browse files Browse the repository at this point in the history
Lower formal arguments and returns for functions with the
`amdgpu_cs_chain` and `amdgpu_cs_chain_preserve` calling conventions:

* Put `inreg` arguments into SGPRs, starting at s0, and other arguments
into VGPRs, starting at v8. No arguments should end up on the stack, if
we don't have enough registers we should error out.

* Lower the return (which is always void) as an S_ENDPGM.

* Set the ScratchRSrc register to s48:51, as described in the docs.

* Set the SP to s32, matching amdgpu_gfx. This might be revisited in a
future patch.

Differential Revision: https://reviews.llvm.org/D153517
  • Loading branch information
rovka committed Aug 21, 2023
1 parent 5f2a7fa commit 26dc284
Show file tree
Hide file tree
Showing 9 changed files with 4,246 additions and 1 deletion.
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,16 @@ def CC_AMDGPU : CallingConv<[
CCDelegateTo<CC_AMDGPU_Func>>
]>;

def CC_AMDGPU_CS_CHAIN : CallingConv<[
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
!foreach(i, !range(105), !cast<Register>("SGPR"#i))
>>>,

CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<
!foreach(i, !range(8, 255), !cast<Register>("VGPR"#i))
>>>
]>;

// Trivial class to denote when a def is used only to get a RegMask, i.e.
// SaveList is ignored and the def is not used as part of any calling
// convention.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,9 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_LS:
return CC_AMDGPU;
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
return CC_AMDGPU_CS_CHAIN;
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
Expand All @@ -1025,6 +1028,8 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
case CallingConv::AMDGPU_HS:
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_LS:
Expand Down
15 changes: 14 additions & 1 deletion llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,

MayNeedAGPRs = ST.hasMAIInsts();

if (!isEntryFunction()) {
if (AMDGPU::isChainCC(CC)) {
// Chain functions don't receive an SP from their caller, but are free to
// set one up. For now, we can use s32 to match what amdgpu_gfx functions
// would use if called, but this can be revisited.
// FIXME: Only reserve this if we actually need it.
StackPtrOffsetReg = AMDGPU::SGPR32;

ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;

ArgInfo.PrivateSegmentBuffer =
ArgDescriptor::createRegister(ScratchRSrcReg);

ImplicitArgPtr = false;
} else if (!isEntryFunction()) {
if (CC != CallingConv::AMDGPU_Gfx)
ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;

Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1952,6 +1952,8 @@ bool isShader(CallingConv::ID cc) {
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
case CallingConv::AMDGPU_CS:
return true;
default:
Expand Down Expand Up @@ -1993,6 +1995,16 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC) {
}
}

bool isChainCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
return true;
default:
return false;
}
}

bool isKernelCC(const Function *Func) {
return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,9 @@ bool isEntryFunctionCC(CallingConv::ID CC);
LLVM_READNONE
bool isModuleEntryFunctionCC(CallingConv::ID CC);

LLVM_READNONE
bool isChainCC(CallingConv::ID CC);

bool isKernelCC(const Function *Func);

// FIXME: Remove this when calling conventions cleaned up
Expand Down
376 changes: 376 additions & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll

Large diffs are not rendered by default.

32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s

declare amdgpu_gfx void @use(...)

; FIXME: The values of the counters are undefined on entry to amdgpu_cs_chain_preserve functions, so these waits are unnecessary.

define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) {
; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: s_endpgm
;
; GISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack:
; GISEL-GFX10: ; %bb.0:
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX10-NEXT: s_endpgm
;
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack:
; DAGISEL-GFX11: ; %bb.0:
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX11-NEXT: s_endpgm
;
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack:
; DAGISEL-GFX10: ; %bb.0:
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX10-NEXT: s_endpgm
ret void
}
Loading

0 comments on commit 26dc284

Please sign in to comment.