diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 9f419a7fbf6834..94d93390d0916f 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -973,12 +973,17 @@ void GCNSchedStage::checkScheduling() { LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " << DAG.MinOccupancy << ".\n"); } - + // The maximum number of arch VGPR on non-unified register file, or the + // maximum VGPR + AGPR in the unified register file case. unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); + // The maximum number of arch VGPR for both unified and non-unified register + // file. + unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs()); unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); - if (PressureAfter.getVGPRNum(false) > MaxVGPRs || - PressureAfter.getAGPRNum() > MaxVGPRs || + if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs || + PressureAfter.getVGPRNum(false) > MaxArchVGPRs || + PressureAfter.getAGPRNum() > MaxArchVGPRs || PressureAfter.getSGPRNum() > MaxSGPRs) { DAG.RescheduleRegions[RegionIdx] = true; DAG.RegionsWithHighRP[RegionIdx] = true; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir index 091b29c23d60e2..e93595b9ef2735 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir @@ -4,6 +4,8 @@ --- | define amdgpu_kernel void @single-wave-phase-2b(ptr addrspace(3) noalias %in0, ptr addrspace(3) noalias %in1, ptr addrspace(3) noalias %in2, ptr addrspace(3) noalias %in3, ptr addrspace(3) noalias %in4, ptr addrspace(3) noalias %in5, ptr addrspace(3) noalias %in6, ptr addrspace(3) noalias %in7, ptr addrspace(3) noalias %in8, ptr addrspace(3) noalias %in9, ptr addrspace(3) noalias %in10, ptr addrspace(3) noalias %in11, ptr addrspace(7) noalias %in12, ptr addrspace(7) noalias %in13, ptr addrspace(7) noalias %in14, ptr addrspace(7) noalias %in15, ptr addrspace(7) noalias %in16, ptr addrspace(7) noalias %in17, ptr addrspace(7) noalias %in18, ptr addrspace(7) noalias %in19, ptr addrspace(7) noalias %in20, ptr addrspace(7) noalias %in21, ptr addrspace(7) noalias %in22, ptr addrspace(7) noalias %in23, ptr addrspace(7) noalias %in24, ptr addrspace(7) noalias %in25, ptr addrspace(7) noalias %in26, ptr addrspace(7) noalias %in27, ptr addrspace(7) noalias %in28, ptr addrspace(7) noalias %in29) #0 { ret void } + attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" } + !0 = distinct !{!0} !1 = !{!1, !0} ...