Skip to content

Commit

Permalink
[AMDGPU] separate accounting for agprs
Browse files Browse the repository at this point in the history
Account and report agprs separately on gfx908. Other targets
do not change the reporting.

Differential Revision: https://reviews.llvm.org/D68307

llvm-svn: 373411
  • Loading branch information
rampitec committed Oct 2, 2019
1 parent d838cf7 commit 075bc48
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 17 deletions.
53 changes: 46 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Expand Up @@ -342,13 +342,20 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) {
// Print comments that apply to both callable functions and entry points.
void AMDGPUAsmPrinter::emitCommonFunctionComments(
uint32_t NumVGPR,
Optional<uint32_t> NumAGPR,
uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
const AMDGPUMachineFunction *MFI) {
OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
if (NumAGPR) {
OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
false);
}
OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
false);
Expand Down Expand Up @@ -474,14 +481,20 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
emitCommonFunctionComments(
Info.NumVGPR,
STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
Info.getTotalNumVGPRs(STM),
Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
Info.PrivateSegmentSize,
getFunctionCodeSize(MF), MFI);
return false;
}

OutStreamer->emitRawComment(" Kernel info:", false);
emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
STM.hasMAIInsts()
? CurrentProgramInfo.NumAccVGPR
: Optional<uint32_t>(),
CurrentProgramInfo.NumVGPR,
CurrentProgramInfo.NumSGPR,
CurrentProgramInfo.ScratchSize,
getFunctionCodeSize(MF), MFI);
Expand Down Expand Up @@ -592,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
UsesVCC, UsesFlatScratch);
}

int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
const GCNSubtarget &ST) const {
return std::max(NumVGPR, NumAGPR);
}

AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
const MachineFunction &MF) const {
SIFunctionResourceInfo Info;
Expand Down Expand Up @@ -638,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
HighestVGPRReg = Reg;
break;
}
MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
if (MRI.isPhysRegUsed(AReg)) {
HighestVGPRReg = AReg;
break;
}

if (ST.hasMAIInsts()) {
MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
if (MRI.isPhysRegUsed(Reg)) {
HighestAGPRReg = Reg;
break;
}
}
Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
TRI.getHWRegIndex(HighestAGPRReg) + 1;
}

MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
Expand All @@ -664,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
}

int32_t MaxVGPR = -1;
int32_t MaxAGPR = -1;
int32_t MaxSGPR = -1;
uint64_t CalleeFrameSize = 0;

Expand All @@ -673,6 +699,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
for (const MachineOperand &MO : MI.operands()) {
unsigned Width = 0;
bool IsSGPR = false;
bool IsAGPR = false;

if (!MO.isReg())
continue;
Expand Down Expand Up @@ -748,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 1;
} else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
Expand All @@ -759,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 2;
} else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 2;
} else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
IsSGPR = false;
Expand All @@ -775,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 4;
} else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
Expand All @@ -794,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 16;
} else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 16;
} else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
IsSGPR = true;
Expand All @@ -803,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Width = 32;
} else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 32;
} else {
llvm_unreachable("Unknown register class");
Expand All @@ -811,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
int MaxUsed = HWReg + Width - 1;
if (IsSGPR) {
MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
} else if (IsAGPR) {
MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
} else {
MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
}
Expand All @@ -832,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
MaxAGPR = std::max(MaxAGPR, 23);

CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
Info.UsesVCC = true;
Expand All @@ -856,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(

MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
CalleeFrameSize
= std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
Info.UsesVCC |= I->second.UsesVCC;
Expand All @@ -872,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(

Info.NumExplicitSGPR = MaxSGPR + 1;
Info.NumVGPR = MaxVGPR + 1;
Info.NumAGPR = MaxAGPR + 1;
Info.PrivateSegmentSize += CalleeFrameSize;

return Info;
Expand All @@ -880,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFunction &MF) {
SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

ProgInfo.NumVGPR = Info.NumVGPR;
ProgInfo.NumArchVGPR = Info.NumVGPR;
ProgInfo.NumAccVGPR = Info.NumAGPR;
ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
ProgInfo.NumSGPR = Info.NumExplicitSGPR;
ProgInfo.ScratchSize = Info.PrivateSegmentSize;
ProgInfo.VCCUsed = Info.UsesVCC;
Expand All @@ -894,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
MF.getFunction().getContext().diagnose(DiagStackSize);
}

const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

// TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Expand Up @@ -43,6 +43,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
// Track the number of explicitly used VGPRs. Special registers reserved at
// the end are tracked separately.
int32_t NumVGPR = 0;
int32_t NumAGPR = 0;
int32_t NumExplicitSGPR = 0;
uint64_t PrivateSegmentSize = 0;
bool UsesVCC = false;
Expand All @@ -51,6 +52,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
bool HasRecursion = false;

int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};

SIProgramInfo CurrentProgramInfo;
Expand All @@ -77,6 +79,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
void EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);
void emitCommonFunctionComments(uint32_t NumVGPR,
Optional<uint32_t> NumAGPR,
uint32_t TotalNumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
uint64_t CodeSize,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIProgramInfo.h
Expand Up @@ -41,6 +41,8 @@ struct SIProgramInfo {
uint64_t ComputePGMRSrc2 = 0;

uint32_t NumVGPR = 0;
uint32_t NumArchVGPR = 0;
uint32_t NumAccVGPR = 0;
uint32_t NumSGPR = 0;
uint32_t LDSSize = 0;
bool FlatUsed = false;
Expand Down
139 changes: 129 additions & 10 deletions llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -1,15 +1,134 @@
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s

declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
; GCN-LABEL: {{^}}kernel_32_agprs:
; GCN: .amdhsa_next_free_vgpr 32
; GCN: NumVgprs: 9
; GCN: NumAgprs: 32
; GCN: TotalNumVgprs: 32
; GCN: VGPRBlocks: 7
; GCN: NumVGPRsForWavesPerEU: 32
; GCN: Occupancy: 8
define amdgpu_kernel void @kernel_32_agprs() {
bb:
call void asm sideeffect "", "~{v8}" ()
call void asm sideeffect "", "~{a31}" ()
ret void
}

; GCN-LABEL: {{^}}kernel_0_agprs:
; GCN: .amdhsa_next_free_vgpr 1
; GCN: NumVgprs: 1
; GCN: NumAgprs: 0
; GCN: TotalNumVgprs: 1
; GCN: VGPRBlocks: 0
; GCN: NumVGPRsForWavesPerEU: 1
; GCN: Occupancy: 10
define amdgpu_kernel void @kernel_0_agprs() {
bb:
call void asm sideeffect "", "~{v0}" ()
ret void
}

; GCN-LABEL: {{^}}kernel_40_vgprs:
; GCN: .amdhsa_next_free_vgpr 40
; GCN: NumVgprs: 40
; GCN: NumAgprs: 16
; GCN: TotalNumVgprs: 40
; GCN: VGPRBlocks: 9
; GCN: NumVGPRsForWavesPerEU: 40
; GCN: Occupancy: 6
define amdgpu_kernel void @kernel_40_vgprs() {
bb:
call void asm sideeffect "", "~{v39}" ()
call void asm sideeffect "", "~{a15}" ()
ret void
}

; GCN-LABEL: {{^}}func_32_agprs:
; GCN: NumVgprs: 9
; GCN: NumAgprs: 32
; GCN: TotalNumVgprs: 32
define void @func_32_agprs() #0 {
bb:
call void asm sideeffect "", "~{v8}" ()
call void asm sideeffect "", "~{a31}" ()
ret void
}

; GCN-LABEL: {{^}}func_32_vgprs:
; GCN: NumVgprs: 32
; GCN: NumAgprs: 9
; GCN: TotalNumVgprs: 32
define void @func_32_vgprs() {
bb:
call void asm sideeffect "", "~{v31}" ()
call void asm sideeffect "", "~{a8}" ()
ret void
}

; GCN-LABEL: {{^}}test_32_agprs:
; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}},
; GCN-NOT: v28
; GCN: NumVgprs: 32
; GCN: VGPRBlocks: 7
define amdgpu_kernel void @test_32_agprs(<32 x float> addrspace(1)* %arg) {
; GCN-LABEL: {{^}}func_0_agprs:
; GCN: NumVgprs: 1
; GCN: NumAgprs: 0
; GCN: TotalNumVgprs: 1
define amdgpu_kernel void @func_0_agprs() {
bb:
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
call void asm sideeffect "", "~{v0}" ()
ret void
}

; GCN-LABEL: {{^}}kernel_max_gprs:
; GCN: .amdhsa_next_free_vgpr 256
; GCN: NumVgprs: 256
; GCN: NumAgprs: 256
; GCN: TotalNumVgprs: 256
; GCN: VGPRBlocks: 63
; GCN: NumVGPRsForWavesPerEU: 256
; GCN: Occupancy: 1
define amdgpu_kernel void @kernel_max_gprs() {
bb:
call void asm sideeffect "", "~{v255}" ()
call void asm sideeffect "", "~{a255}" ()
ret void
}

; GCN-LABEL: {{^}}kernel_call_func_32_agprs:
; GCN: .amdhsa_next_free_vgpr 32
; GCN: NumVgprs: 9
; GCN: NumAgprs: 32
; GCN: TotalNumVgprs: 32
; GCN: VGPRBlocks: 7
; GCN: NumVGPRsForWavesPerEU: 32
; GCN: Occupancy: 8
define amdgpu_kernel void @kernel_call_func_32_agprs() {
bb:
call void @func_32_agprs() #0
ret void
}

; GCN-LABEL: {{^}}func_call_func_32_agprs:
; GCN: NumVgprs: 9
; GCN: NumAgprs: 32
; GCN: TotalNumVgprs: 32
define void @func_call_func_32_agprs() {
bb:
call void @func_32_agprs() #0
ret void
}

declare void @undef_func()

; GCN-LABEL: {{^}}kernel_call_undef_func:
; GCN: .amdhsa_next_free_vgpr 24
; GCN: NumVgprs: 24
; GCN: NumAgprs: 24
; GCN: TotalNumVgprs: 24
; GCN: VGPRBlocks: 5
; GCN: NumVGPRsForWavesPerEU: 24
; GCN: Occupancy: 10
define amdgpu_kernel void @kernel_call_undef_func() {
bb:
call void @undef_func()
ret void
}

attributes #0 = { nounwind noinline }

0 comments on commit 075bc48

Please sign in to comment.