Skip to content

Commit

Permalink
AMDGPU: Report large stack usage for recursive calls
Browse files Browse the repository at this point in the history
We were previously setting an ignored bit in the kernel headers. The
current behavior is to add the large amount on top of the statically
known size of a single stack frame. I'm not sure if we should just use
the large size as the entire reported size instead.
  • Loading branch information
arsenm committed Nov 11, 2021
1 parent 1da33a5 commit c7a0c2d
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 6 deletions.
23 changes: 19 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
Expand Up @@ -452,6 +452,25 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
if (!IsIndirect)
I = CallGraphResourceInfo.find(Callee);

// FIXME: Call site could have norecurse on it
if (!Callee || !Callee->doesNotRecurse()) {
Info.HasRecursion = true;

// TODO: If we happen to know there is no stack usage in the
// callgraph, we don't need to assume an infinitely growing stack.
if (!MI.isReturn()) {
// We don't need to assume an unknown stack size for tail calls.

// FIXME: This only benefits in the case where the kernel does not
// directly call the tail called function. If a kernel directly
// calls a tail recursive function, we'll assume maximum stack size
// based on the regular call instruction.
CalleeFrameSize =
std::max(CalleeFrameSize,
static_cast<uint64_t>(AssumedStackSizeForExternalCall));
}
}

if (IsIndirect || I == CallGraphResourceInfo.end()) {
CalleeFrameSize =
std::max(CalleeFrameSize,
Expand All @@ -476,10 +495,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
Info.HasRecursion |= I->second.HasRecursion;
Info.HasIndirectCall |= I->second.HasIndirectCall;
}

// FIXME: Call site could have norecurse on it
if (!Callee || !Callee->doesNotRecurse())
Info.HasRecursion = true;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
Expand Up @@ -199,7 +199,7 @@ define amdgpu_kernel void @usage_external_recurse() #0 {
}

; GCN-LABEL: {{^}}direct_recursion_use_stack:
; GCN: ScratchSize: 2064
; GCN: ScratchSize: 18448{{$}}
define void @direct_recursion_use_stack(i32 %val) #2 {
%alloca = alloca [512 x i32], align 4, addrspace(5)
call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
Expand All @@ -218,7 +218,7 @@ ret:
; GCN-LABEL: {{^}}usage_direct_recursion:
; GCN: is_ptr64 = 1
; GCN: is_dynamic_callstack = 1
; GCN: workitem_private_segment_byte_size = 2064
; GCN: workitem_private_segment_byte_size = 18448{{$}}
define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
call void @direct_recursion_use_stack(i32 %n)
ret void
Expand Down
64 changes: 64 additions & 0 deletions llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -0,0 +1,64 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s

; CHECK-LABEL: {{^}}recursive:
; CHECK: ScratchSize: 16
define void @recursive() {
call void @recursive()
store volatile i32 0, i32 addrspace(1)* undef
ret void
}

; CHECK-LABEL: {{^}}tail_recursive:
; CHECK: ScratchSize: 0
define void @tail_recursive() {
tail call void @tail_recursive()
ret void
}

define void @calls_tail_recursive() norecurse {
tail call void @tail_recursive()
ret void
}

; CHECK-LABEL: {{^}}tail_recursive_with_stack:
define void @tail_recursive_with_stack() {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
tail call void @tail_recursive_with_stack()
ret void
}

; For an arbitrary recursive call, report a large number for unknown stack usage.
; CHECK-LABEL: {{^}}calls_recursive:
; CHECK: .amdhsa_private_segment_fixed_size 16400{{$}}
define amdgpu_kernel void @calls_recursive() {
call void @recursive()
ret void
}

; Make sure we do not report a huge stack size for tail recursive
; functions
; CHECK-LABEL: {{^}}kernel_indirectly_calls_tail_recursive:
; CHECK: .amdhsa_private_segment_fixed_size 0{{$}}
define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
call void @calls_tail_recursive()
ret void
}

; TODO: Even though tail_recursive is only called as a tail call, we
; end up treating it as generally recursive call from the regular call
; in the kernel.

; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
define amdgpu_kernel void @kernel_calls_tail_recursive() {
call void @tail_recursive()
ret void
}

; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() {
call void @tail_recursive_with_stack()
ret void
}

0 comments on commit c7a0c2d

Please sign in to comment.