Skip to content

Commit

Permalink
[AMDGPU] Add remarks to output some resource usage
Browse files Browse the repository at this point in the history
Add analyis remarks to output kernel name, register usage, occupancy,
scratch usage, spills, and LDS information.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D123878
  • Loading branch information
vangthao95 committed Jul 15, 2022
1 parent 56796ae commit 6735773
Show file tree
Hide file tree
Showing 5 changed files with 243 additions and 0 deletions.
17 changes: 17 additions & 0 deletions clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
@@ -0,0 +1,17 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null

// expected-remark@+9 {{Function Name: foo}}
// expected-remark@+8 {{ SGPRs: 9}}
// expected-remark@+7 {{ VGPRs: 10}}
// expected-remark@+6 {{ AGPRs: 12}}
// expected-remark@+5 {{ ScratchSize [bytes/lane]: 0}}
// expected-remark@+4 {{ Occupancy [waves/SIMD]: 10}}
// expected-remark@+3 {{ SGPRs Spill: 0}}
// expected-remark@+2 {{ VGPRs Spill: 0}}
// expected-remark@+1 {{ LDS Size [bytes/block]: 0}}
__kernel void foo() {
__asm volatile ("; clobber s8" :::"s8");
__asm volatile ("; clobber v9" :::"v9");
__asm volatile ("; clobber a11" :::"a11");
}
63 changes: 63 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Expand Up @@ -27,8 +27,10 @@
#include "SIMachineFunctionInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
Expand Down Expand Up @@ -506,6 +508,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {

emitFunctionBody();

emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
STM.hasMAIInsts());

if (isVerbose()) {
MCSectionELF *CommentSection =
Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
Expand Down Expand Up @@ -875,6 +880,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
LDSAlignShift = 9;
}

ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();

ProgInfo.LDSSize = MFI->getLDSSize();
ProgInfo.LDSBlocks =
alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
Expand Down Expand Up @@ -1180,3 +1188,58 @@ void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AMDGPUResourceUsageAnalysis>();
AsmPrinter::getAnalysisUsage(AU);
}

void AMDGPUAsmPrinter::emitResourceUsageRemarks(
const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
bool isModuleEntryFunction, bool hasMAIInsts) {
if (!ORE)
return;

const char *Name = "kernel-resource-usage";
const char *Indent = " ";

// If the remark is not specifically enabled, do not output to yaml
LLVMContext &Ctx = MF.getFunction().getContext();
if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
return;

auto EmitResourceUsageRemark = [&](StringRef RemarkName,
StringRef RemarkLabel, auto Argument) {
// Add an indent for every line besides the line with the kernel name. This
// makes it easier to tell which resource usage go with which kernel since
// the kernel name will always be displayed first.
std::string LabelStr = RemarkLabel.str() + ": ";
if (!RemarkName.equals("FunctionName"))
LabelStr = Indent + LabelStr;

ORE->emit([&]() {
return MachineOptimizationRemarkAnalysis(Name, RemarkName,
MF.getFunction().getSubprogram(),
&MF.front())
<< LabelStr << ore::NV(RemarkName, Argument);
});
};

// FIXME: Formatting here is pretty nasty because clang does not accept
// newlines from diagnostics. This forces us to emit multiple diagnostic
// remarks to simulate newlines. If and when clang does accept newlines, this
// formatting should be aggregated into one remark with newlines to avoid
// printing multiple diagnostic location and diag opts.
EmitResourceUsageRemark("FunctionName", "Function Name",
MF.getFunction().getName());
EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR);
EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR);
if (hasMAIInsts)
EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
CurrentProgramInfo.ScratchSize);
EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
CurrentProgramInfo.Occupancy);
EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
CurrentProgramInfo.SGPRSpill);
EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
CurrentProgramInfo.VGPRSpill);
if (isModuleEntryFunction)
EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
CurrentProgramInfo.LDSSize);
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Expand Up @@ -69,6 +69,9 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint64_t ScratchSize,
uint64_t CodeSize,
const AMDGPUMachineFunction* MFI);
void emitResourceUsageRemarks(const MachineFunction &MF,
const SIProgramInfo &CurrentProgramInfo,
bool isModuleEntryFunction, bool hasMAIInsts);

uint16_t getAmdhsaKernelCodeProperties(
const MachineFunction &MF) const;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIProgramInfo.h
Expand Up @@ -49,6 +49,8 @@ struct SIProgramInfo {
uint32_t AccumOffset = 0;
uint32_t TgSplit = 0;
uint32_t NumSGPR = 0;
unsigned SGPRSpill = 0;
unsigned VGPRSpill = 0;
uint32_t LDSSize = 0;
bool FlatUsed = false;

Expand Down
158 changes: 158 additions & 0 deletions llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -0,0 +1,158 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s
; RUN: FileCheck -check-prefix=REMARK %s < %t

; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 24
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9
; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43
; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5
; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:27:0: LDS Size [bytes/block]: 512

; REMARK-LABEL: --- !Analysis
; REMARK: Pass: kernel-resource-usage
; REMARK-NEXT: Name: FunctionName
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: 'Function Name: '
; REMARK-NEXT: - FunctionName: test_kernel
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: NumSGPR
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' SGPRs: '
; REMARK-NEXT: - NumSGPR: '24'
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: NumVGPR
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' VGPRs: '
; REMARK-NEXT: - NumVGPR: '9'
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: NumAGPR
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' AGPRs: '
; REMARK-NEXT: - NumAGPR: '43'
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: ScratchSize
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' ScratchSize [bytes/lane]: '
; REMARK-NEXT: - ScratchSize: '0'
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: Occupancy
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' Occupancy [waves/SIMD]: '
; REMARK-NEXT: - Occupancy: '5'
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: SGPRSpill
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' SGPRs Spill: '
; REMARK-NEXT: - SGPRSpill: '0'
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: VGPRSpill
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' VGPRs Spill: '
; REMARK-NEXT: - VGPRSpill: '0'
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: BytesLDS
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' LDS Size [bytes/block]: '
; REMARK-NEXT: - BytesLDS: '512'
; REMARK-NEXT: ...

@lds = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4

define amdgpu_kernel void @test_kernel() !dbg !3 {
call void asm sideeffect "; clobber v8", "~{v8}"()
call void asm sideeffect "; clobber s23", "~{s23}"()
call void asm sideeffect "; clobber a42", "~{a42}"()
call void asm sideeffect "; use $0", "v"([128 x i32] addrspace(3)* @lds)
ret void
}

; STDERR: remark: foo.cl:42:0: Function Name: test_func
; STDERR-NEXT: remark: foo.cl:42:0: SGPRs: 0
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs: 0
; STDERR-NEXT: remark: foo.cl:42:0: AGPRs: 0
; STDERR-NEXT: remark: foo.cl:42:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0
; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0
; STDERR-NOT: LDS Size
define void @test_func() !dbg !6 {
call void asm sideeffect "; clobber v17", "~{v17}"()
call void asm sideeffect "; clobber s11", "~{s11}"()
call void asm sideeffect "; clobber a9", "~{a9}"()
ret void
}

; STDERR: remark: foo.cl:8:0: Function Name: empty_kernel
; STDERR-NEXT: remark: foo.cl:8:0: SGPRs: 0
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0
; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0
; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 10
; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:8:0: LDS Size [bytes/block]: 0
define amdgpu_kernel void @empty_kernel() !dbg !7 {
ret void
}

; STDERR: remark: foo.cl:52:0: Function Name: empty_func
; STDERR-NEXT: remark: foo.cl:52:0: SGPRs: 0
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs: 0
; STDERR-NEXT: remark: foo.cl:52:0: AGPRs: 0
; STDERR-NEXT: remark: foo.cl:52:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0
; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0
define void @empty_func() !dbg !8 {
ret void
}

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!1 = !DIFile(filename: "foo.cl", directory: "/tmp")
!2 = !{i32 2, !"Debug Info Version", i32 3}
!3 = distinct !DISubprogram(name: "test_kernel", scope: !1, file: !1, type: !4, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!4 = !DISubroutineType(types: !5)
!5 = !{null}
!6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)

0 comments on commit 6735773

Please sign in to comment.