diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp index 8186c329c4daf..cb30822179f4e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCResourceInfo.h" +#include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" @@ -298,9 +299,37 @@ void MCResourceInfo::gatherResourceInfo( } }; + auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) { + MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal); + LLVM_DEBUG( + dbgs() << "MCResUse: " << Sym->getName() << ": Adding " << LocalValue + << ", no further propagation as indirect callee found within\n"); + Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext)); + }; + LLVM_DEBUG(dbgs() << "MCResUse: " << FnSym->getName() << '\n'); - SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR); - SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR); + + CallingConv::ID CC = MF.getFunction().getCallingConv(); + + // When DynamicVGPR is enabled, chain functions should not propagate VGPR + // counts from other chain callees since each chain function can have its own + // VGPR allocation, but should still propagate from non-chain callees. + if (MF.getInfo()->isDynamicVGPREnabled() && + (CC == CallingConv::AMDGPU_CS_Chain || CC == CallingConv::AMDGPU_CS)) { + SmallVector NonChainCallees; + for (const Function *Callee : FRI.Callees) { + if (!AMDGPU::isChainCC(Callee->getCallingConv()) && + !Callee->isDeclaration()) + NonChainCallees.push_back(Callee); + } + assignResourceInfoExpr(FRI.NumVGPR, RIK_NumVGPR, AMDGPUMCExpr::AGVK_Max, MF, + NonChainCallees, OutContext); + assignResourceInfoExpr(FRI.NumAGPR, RIK_NumAGPR, AMDGPUMCExpr::AGVK_Max, MF, + NonChainCallees, OutContext); + } else { + SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR); + SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR); + } SetMaxReg(MaxSGPRSym, FRI.NumExplicitSGPR, RIK_NumSGPR); SetMaxReg(MaxNamedBarrierSym, FRI.NumNamedBarrier, RIK_NumNamedBarrier); @@ -355,14 +384,6 @@ void MCResourceInfo::gatherResourceInfo( Sym->setVariableValue(localConstExpr); } - auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) { - MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal); - LLVM_DEBUG( - dbgs() << "MCResUse: " << Sym->getName() << ": Adding " << LocalValue - << ", no further propagation as indirect callee found within\n"); - Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext)); - }; - if (!FRI.HasIndirectCall) { assignResourceInfoExpr(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC, AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext); diff --git a/llvm/test/CodeGen/AMDGPU/dvgpr-vgpr-count-propagation.ll b/llvm/test/CodeGen/AMDGPU/dvgpr-vgpr-count-propagation.ll new file mode 100644 index 0000000000000..2a920bbe01449 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dvgpr-vgpr-count-propagation.ll @@ -0,0 +1,131 @@ +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=DVGPR %s +; RUN: sed 's/"amdgpu-dynamic-vgpr-block-size"="16"/"amdgpu-dynamic-vgpr-block-size"="0"/' %s \ +; RUN: | llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 | FileCheck -check-prefix=NODVGPR %s + +; DVGPR: .set gfx_func_a.num_vgpr, 40 +; DVGPR: .set gfx_func_b2.num_vgpr, 80 +; DVGPR: .set gfx_func_b.num_vgpr, max(61, gfx_func_b2.num_vgpr) +; DVGPR: .set amdgpu_cs_main.num_vgpr, max(42, gfx_func_a.num_vgpr) +; DVGPR: .set func.0.num_vgpr, 11 +; DVGPR: .set func.1.num_vgpr, max(11, gfx_func_a.num_vgpr, gfx_func_b.num_vgpr) +; DVGPR: .set func.2.num_vgpr, max(11, gfx_func_a.num_vgpr) +; DVGPR: .set func.3.num_vgpr, max(11, gfx_func_b.num_vgpr) +; DVGPR: .set retry_vgpr_alloc.num_vgpr, max(11, amdgpu.max_num_vgpr) +; DVGPR: .set first_retry_wrapper.num_vgpr, max(11, amdgpu.max_num_vgpr) +; DVGPR: .set amdgpu.max_num_vgpr, 80 + +; NODVGPR: .set gfx_func_a.num_vgpr, 40 +; NODVGPR: .set gfx_func_b2.num_vgpr, 80 +; NODVGPR: .set gfx_func_b.num_vgpr, max(61, gfx_func_b2.num_vgpr) +; NODVGPR: .set amdgpu_cs_main.num_vgpr, max(42, amdgpu.max_num_vgpr) +; NODVGPR: .set func.0.num_vgpr, max(11, amdgpu.max_num_vgpr) +; NODVGPR: .set func.1.num_vgpr, max(11, amdgpu.max_num_vgpr) +; NODVGPR: .set func.2.num_vgpr, max(11, amdgpu.max_num_vgpr) +; NODVGPR: .set func.3.num_vgpr, max(11, amdgpu.max_num_vgpr) +; NODVGPR: .set retry_vgpr_alloc.num_vgpr, max(11, amdgpu.max_num_vgpr) +; NODVGPR: .set first_retry_wrapper.num_vgpr, max(11, amdgpu.max_num_vgpr) +; NODVGPR: .set amdgpu.max_num_vgpr, 80 + +; DVGPR: - .hardware_stages: +; DVGPR: .vgpr_count: 0x2a +; DVGPR: .shader_functions: +; DVGPR: func.0: +; DVGPR: .vgpr_count: 0xb +; DVGPR: func.1: +; DVGPR: .vgpr_count: 0x50 +; DVGPR: func.2: +; DVGPR: .vgpr_count: 0x28 +; DVGPR: func.3: +; DVGPR: .vgpr_count: 0x50 +; DVGPR: gfx_func_a: +; DVGPR: .vgpr_count: 0x28 +; DVGPR: gfx_func_b: +; DVGPR: .vgpr_count: 0x50 +; DVGPR: gfx_func_b2: +; DVGPR: .vgpr_count: 0x50 + +; NODVGPR: - .hardware_stages: +; NODVGPR: .vgpr_count: 0x50 +; NODVGPR: .shader_functions: +; NODVGPR: func.0: +; NODVGPR: .vgpr_count: 0x50 +; NODVGPR: func.1: +; NODVGPR: .vgpr_count: 0x50 +; NODVGPR: func.2: +; NODVGPR: .vgpr_count: 0x50 +; NODVGPR: func.3: +; NODVGPR: .vgpr_count: 0x50 +; NODVGPR: gfx_func_a: +; NODVGPR: .vgpr_count: 0x28 +; NODVGPR: gfx_func_b: +; NODVGPR: .vgpr_count: 0x50 +; NODVGPR: gfx_func_b2: +; NODVGPR: .vgpr_count: 0x50 + +define amdgpu_gfx void @gfx_func_a() #0 { + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() + ret void +} + +define amdgpu_gfx void @gfx_func_b2() #0 { + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39},~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49},~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59},~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69},~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}"() + ret void +} + +define amdgpu_gfx void @gfx_func_b() #0 { + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39},~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49},~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}"() + call amdgpu_gfx void @gfx_func_b2() + ret void +} + +define amdgpu_cs void @amdgpu_cs_main(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 { + %fptr = load ptr, ptr inttoptr(i64 0 to ptr) + call amdgpu_gfx void @gfx_func_a() + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @func.1) + unreachable +} + +define amdgpu_cs_chain void @func.0(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 { + call void asm sideeffect "", "~{v0},~{v1}"() + %fptr = load ptr, ptr inttoptr(i64 0 to ptr) + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @first_retry_wrapper) + unreachable +} + +define amdgpu_cs_chain void @func.1(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 { + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3}"() + call amdgpu_gfx void @gfx_func_a() + call amdgpu_gfx void @gfx_func_b() + %fptr = load ptr, ptr inttoptr(i64 0 to ptr) + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @first_retry_wrapper) + unreachable +} + +define amdgpu_cs_chain void @func.2(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 { + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5}"() + call amdgpu_gfx void @gfx_func_a() + %fptr = load ptr, ptr inttoptr(i64 0 to ptr) + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @first_retry_wrapper) + unreachable +} + +define amdgpu_cs_chain void @func.3(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 { + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() + call amdgpu_gfx void @gfx_func_b() + %fptr = load ptr, ptr inttoptr(i64 0 to ptr) + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @first_retry_wrapper) + unreachable +} + +define amdgpu_cs_chain_preserve void @retry_vgpr_alloc(<3 x i32> inreg %sgprs) #0 { + %fptr = load ptr, ptr inttoptr(i64 0 to ptr) + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @retry_vgpr_alloc) + unreachable +} + +define amdgpu_cs_chain_preserve void @first_retry_wrapper(<3 x i32> inreg %sgprs) #0 { + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg @retry_vgpr_alloc, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @retry_vgpr_alloc) + unreachable +} + +attributes #0 = { "amdgpu-dynamic-vgpr-block-size"="16" }