diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index c756a134bdad1f..58c436836d19dc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1225,6 +1225,12 @@ bool GCNTargetMachine::parseMachineFunctionInfo( MFI->initializeBaseYamlFields(YamlMFI); + if (MFI->Occupancy == 0) { + // Fixup the subtarget dependent default value. + const GCNSubtarget &ST = MF.getSubtarget(); + MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize()); + } + auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) { Register TempReg; if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 8b4e1ba9328894..9a0cdc7b1f4dfe 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -538,6 +538,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( HasSpilledSGPRs(MFI.hasSpilledSGPRs()), HasSpilledVGPRs(MFI.hasSpilledVGPRs()), HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), + Occupancy(MFI.getOccupancy()), ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), @@ -555,6 +556,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( LDSSize = YamlMFI.LDSSize; DynLDSAlign = YamlMFI.DynLDSAlign; HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; + Occupancy = YamlMFI.Occupancy; IsEntryFunction = YamlMFI.IsEntryFunction; NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; MemoryBound = YamlMFI.MemoryBound; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index a6195adace22d3..11daf63e030590 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -275,6 +275,9 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { bool HasSpilledVGPRs = false; uint32_t HighBitsOf32BitAddress = 0; + // TODO: 10 may be a better default since it's the maximum. + unsigned Occupancy = 0; + StringValue ScratchRSrcReg = "$private_rsrc_reg"; StringValue FrameOffsetReg = "$fp_reg"; StringValue StackPtrOffsetReg = "$sp_reg"; @@ -313,6 +316,7 @@ template <> struct MappingTraits { YamlIO.mapOptional("mode", MFI.Mode, SIMode()); YamlIO.mapOptional("highBitsOf32BitAddress", MFI.HighBitsOf32BitAddress, 0u); + YamlIO.mapOptional("occupancy", MFI.Occupancy, 0); } }; diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir index e395e43667ac54..3cbfd21dedf816 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -32,6 +32,7 @@ # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 +# FULL-NEXT: occupancy: 10 # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -50,6 +51,7 @@ # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' } # SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' } +# SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: kernel0 machineFunctionInfo: @@ -102,12 +104,14 @@ body: | # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 +# FULL-NEXT: occupancy: 10 # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: maxKernArgAlign: 1 # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } +# SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: no_mfi @@ -143,12 +147,14 @@ body: | # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 +# FULL-NEXT: occupancy: 10 # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: maxKernArgAlign: 1 # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } +# SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: empty_mfi @@ -185,6 +191,7 @@ body: | # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 +# FULL-NEXT: occupancy: 10 # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -192,6 +199,7 @@ body: | # SIMPLE-NEXT: isEntryFunction: true # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } +# SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: empty_mfi_entry_func @@ -306,3 +314,29 @@ body: | S_ENDPGM 0 ... + +--- +# ALL-LABEL: name: occupancy_0 +# ALL: occupancy: 10 +name: occupancy_0 +machineFunctionInfo: + occupancy: 0 + +body: | + bb.0: + S_ENDPGM 0 + +... + +--- +# ALL-LABEL: name: occupancy_3 +# ALL: occupancy: 3 +name: occupancy_3 +machineFunctionInfo: + occupancy: 3 + +body: | + bb.0: + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll index e3e78ddcb71f57..a9736c471bf9aa 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -35,6 +35,7 @@ ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 +; CHECK-NEXT: occupancy: 10 ; CHECK-NEXT: body: define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { %gep = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %arg0 @@ -68,6 +69,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 +; CHECK-NEXT: occupancy: 10 ; CHECK-NEXT: body: define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { ret void @@ -98,6 +100,7 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 +; CHECK-NEXT: occupancy: 10 ; CHECK-NEXT: body: define void @function() { ret void @@ -128,6 +131,7 @@ define void @function() { ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 +; CHECK-NEXT: occupancy: 10 ; CHECK-NEXT: body: define void @function_nsz() #0 { ret void