From c7a0c2d0f7be2f456bd72b5c3508966d5b10233b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 10 Nov 2021 18:16:54 -0500 Subject: [PATCH] AMDGPU: Report large stack usage for recursive calls We were previously setting an ignored bit in the kernel headers. The current behavior is to add the large amount on top of the statically known size of a single stack frame. I'm not sure if we should just use the large size as the entire reported size instead. --- .../AMDGPU/AMDGPUResourceUsageAnalysis.cpp | 23 +++++-- .../AMDGPU/call-graph-register-usage.ll | 4 +- llvm/test/CodeGen/AMDGPU/recursion.ll | 64 +++++++++++++++++++ 3 files changed, 85 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/recursion.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index 3c5cb6e190850..cb511e5e34839 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -452,6 +452,25 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage( if (!IsIndirect) I = CallGraphResourceInfo.find(Callee); + // FIXME: Call site could have norecurse on it + if (!Callee || !Callee->doesNotRecurse()) { + Info.HasRecursion = true; + + // TODO: If we happen to know there is no stack usage in the + // callgraph, we don't need to assume an infinitely growing stack. + if (!MI.isReturn()) { + // We don't need to assume an unknown stack size for tail calls. + + // FIXME: This only benefits in the case where the kernel does not + // directly call the tail called function. If a kernel directly + // calls a tail recursive function, we'll assume maximum stack size + // based on the regular call instruction. + CalleeFrameSize = + std::max(CalleeFrameSize, + static_cast(AssumedStackSizeForExternalCall)); + } + } + if (IsIndirect || I == CallGraphResourceInfo.end()) { CalleeFrameSize = std::max(CalleeFrameSize, @@ -476,10 +495,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage( Info.HasRecursion |= I->second.HasRecursion; Info.HasIndirectCall |= I->second.HasIndirectCall; } - - // FIXME: Call site could have norecurse on it - if (!Callee || !Callee->doesNotRecurse()) - Info.HasRecursion = true; } } } diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index 86f5c43e1429a..e91d62c4c3f2e 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -199,7 +199,7 @@ define amdgpu_kernel void @usage_external_recurse() #0 { } ; GCN-LABEL: {{^}}direct_recursion_use_stack: -; GCN: ScratchSize: 2064 +; GCN: ScratchSize: 18448{{$}} define void @direct_recursion_use_stack(i32 %val) #2 { %alloca = alloca [512 x i32], align 4, addrspace(5) call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0 @@ -218,7 +218,7 @@ ret: ; GCN-LABEL: {{^}}usage_direct_recursion: ; GCN: is_ptr64 = 1 ; GCN: is_dynamic_callstack = 1 -; GCN: workitem_private_segment_byte_size = 2064 +; GCN: workitem_private_segment_byte_size = 18448{{$}} define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { call void @direct_recursion_use_stack(i32 %n) ret void diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll new file mode 100644 index 0000000000000..14c97508da6a4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/recursion.ll @@ -0,0 +1,64 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}recursive: +; CHECK: ScratchSize: 16 +define void @recursive() { + call void @recursive() + store volatile i32 0, i32 addrspace(1)* undef + ret void +} + +; CHECK-LABEL: {{^}}tail_recursive: +; CHECK: ScratchSize: 0 +define void @tail_recursive() { + tail call void @tail_recursive() + ret void +} + +define void @calls_tail_recursive() norecurse { + tail call void @tail_recursive() + ret void +} + +; CHECK-LABEL: {{^}}tail_recursive_with_stack: +define void @tail_recursive_with_stack() { + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca + tail call void @tail_recursive_with_stack() + ret void +} + +; For an arbitrary recursive call, report a large number for unknown stack usage. +; CHECK-LABEL: {{^}}calls_recursive: +; CHECK: .amdhsa_private_segment_fixed_size 16400{{$}} +define amdgpu_kernel void @calls_recursive() { + call void @recursive() + ret void +} + +; Make sure we do not report a huge stack size for tail recursive +; functions +; CHECK-LABEL: {{^}}kernel_indirectly_calls_tail_recursive: +; CHECK: .amdhsa_private_segment_fixed_size 0{{$}} +define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() { + call void @calls_tail_recursive() + ret void +} + +; TODO: Even though tail_recursive is only called as a tail call, we +; end up treating it as generally recursive call from the regular call +; in the kernel. + +; CHECK-LABEL: {{^}}kernel_calls_tail_recursive: +; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}} +define amdgpu_kernel void @kernel_calls_tail_recursive() { + call void @tail_recursive() + ret void +} + +; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack: +; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}} +define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() { + call void @tail_recursive_with_stack() + ret void +}