diff --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl index 9403d12afa05a..a05e21b37b912 100644 --- a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl +++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl @@ -1,11 +1,12 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null -// expected-remark@+9 {{Function Name: foo}} -// expected-remark@+8 {{ SGPRs: 13}} -// expected-remark@+7 {{ VGPRs: 10}} -// expected-remark@+6 {{ AGPRs: 12}} -// expected-remark@+5 {{ ScratchSize [bytes/lane]: 0}} +// expected-remark@+10 {{Function Name: foo}} +// expected-remark@+9 {{ SGPRs: 13}} +// expected-remark@+8 {{ VGPRs: 10}} +// expected-remark@+7 {{ AGPRs: 12}} +// expected-remark@+6 {{ ScratchSize [bytes/lane]: 0}} +// expected-remark@+5 {{ Dynamic Stack: False}} // expected-remark@+4 {{ Occupancy [waves/SIMD]: 10}} // expected-remark@+3 {{ SGPRs Spill: 0}} // expected-remark@+2 {{ VGPRs Spill: 0}} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 7cd8e53e65215..4b9c699879e34 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1293,6 +1293,9 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks( EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR); EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]", CurrentProgramInfo.ScratchSize); + StringRef DynamicStackStr = + CurrentProgramInfo.DynamicCallStack ? "True" : "False"; + EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr); EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]", CurrentProgramInfo.Occupancy); EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill", diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll index 2616b04332419..7252aa6120cab 100644 --- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s ; RUN: FileCheck -check-prefix=REMARK %s < %t ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel @@ -6,6 +6,7 @@ ; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9 ; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43 ; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:27:0: Dynamic Stack: False ; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5 ; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0 @@ -55,7 +56,16 @@ ; REMARK-NEXT: Args: ; REMARK-NEXT: - String: ' ScratchSize [bytes/lane]: ' ; REMARK-NEXT: - ScratchSize: '0' -; REMARK-NEXT: ... +; REMARK-NEXT: .. +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: DynamicStack +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: ' Dynamic Stack: +; REMARK-NEXT: - DynamicStack: 'False' +; REMARK-NEXT: .. ; REMARK-NEXT: --- !Analysis ; REMARK-NEXT: Pass: kernel-resource-usage ; REMARK-NEXT: Name: Occupancy @@ -108,6 +118,7 @@ define amdgpu_kernel void @test_kernel() !dbg !3 { ; STDERR-NEXT: remark: foo.cl:42:0: VGPRs: 0 ; STDERR-NEXT: remark: foo.cl:42:0: AGPRs: 0 ; STDERR-NEXT: remark: foo.cl:42:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:42:0: Dynamic Stack: False ; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0 ; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0 @@ -124,6 +135,7 @@ define void @test_func() !dbg !6 { ; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0 ; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0 ; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:8:0: Dynamic Stack: False ; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 8 ; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0 @@ -137,6 +149,7 @@ define amdgpu_kernel void @empty_kernel() !dbg !7 { ; STDERR-NEXT: remark: foo.cl:52:0: VGPRs: 0 ; STDERR-NEXT: remark: foo.cl:52:0: AGPRs: 0 ; STDERR-NEXT: remark: foo.cl:52:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:52:0: Dynamic Stack: False ; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0 ; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0 @@ -144,8 +157,48 @@ define void @empty_func() !dbg !8 { ret void } +; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call +; STDERR-NEXT: remark: foo.cl:64:0: SGPRs: 39 +; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: 32 +; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: 10 +; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True +; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8 +; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 +@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 + +define amdgpu_kernel void @test_indirect_call() !dbg !9 { + %fptr = load ptr, ptr addrspace(4) @gv.fptr0 + call void %fptr() + ret void +} + +; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack +; STDERR-NEXT: remark: foo.cl:74:0: SGPRs: 39 +; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: 32 +; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: 10 +; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 64 +; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True +; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8 +; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 + +declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg) + +define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 { + %alloca = alloca <10 x i64>, align 16, addrspace(5) + call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 false) + %fptr = load ptr, ptr addrspace(4) @gv.fptr0 + call void %fptr() + ret void +} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} +!llvm.module.flags = !{!11} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) !1 = !DIFile(filename: "foo.cl", directory: "/tmp") @@ -156,3 +209,6 @@ define void @empty_func() !dbg !8 { !6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) !7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) !8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!9 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, type: !4, scopeLine: 64, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!10 = distinct !DISubprogram(name: "test_indirect_w_static_stack", scope: !1, file: !1, type: !4, scopeLine: 74, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!11 = !{i32 1, !"amdgpu_code_object_version", i32 500}