diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 9fbf9e5fe8eeb..be30e15fbaa86 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -75,6 +75,22 @@ static cl::opt PendingQueueLimit( "Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256)); +static cl::opt SpillCopyLatencyScale( + "amdgpu-spill-copy-latency-scale", cl::Hidden, + cl::desc( + "Sets the factor by which we scale the latency impact of allowing" + "AGPR/VGPR copies to be inserted for spilling."), + cl::init(.5)); + +static cl::opt AllowAVGPRCopiesForSpill( + "amdgpu-allow-avgpr-copies-for-spill", cl::Hidden, + cl::desc( + "Allow the introduction of avgpr copies for vgpr spilling" + "rather than reverting the schedule. 0=disallow (default), " + "1=allow if no memory spilling, 2=same as 1, but require" + "an improved schedule"), + cl::init(0)); + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) #define DUMP_MAX_REG_PRESSURE static cl::opt PrintMaxRPRegUsageBeforeScheduler( @@ -108,6 +124,8 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) { Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); VGPRExcessLimit = Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); + AGPRExcessLimit = + Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::AGPR_32RegClass); SIMachineFunctionInfo &MFI = *MF->getInfo(); // Set the initial TargetOccupnacy to the maximum occupancy that we can @@ -145,6 +163,7 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) { VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit); SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit); VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit); + SGPRExcessLimit -= std::min(AGPRLimitBias + ErrorMargin, AGPRExcessLimit); LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit << ", VGPRExcessLimit = " << VGPRExcessLimit @@ -1232,6 +1251,7 @@ bool UnclusteredHighRPStage::initGCNSchedStage() { // stage. Temporarily increase occupancy target in the region. S.SGPRLimitBias = S.HighRPSGPRBias; S.VGPRLimitBias = S.HighRPVGPRBias; + S.AGPRLimitBias = S.HighRPAGPRBias; if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy) MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); @@ -1318,7 +1338,7 @@ void GCNSchedStage::finalizeGCNSchedStage() { void UnclusteredHighRPStage::finalizeGCNSchedStage() { SavedMutations.swap(DAG.Mutations); - S.SGPRLimitBias = S.VGPRLimitBias = 0; + S.SGPRLimitBias = S.VGPRLimitBias = S.AGPRLimitBias = 0; if (DAG.MinOccupancy > InitialOccupancy) { LLVM_DEBUG(dbgs() << StageID << " stage successfully increased occupancy to " @@ -1739,9 +1759,73 @@ bool MemoryClauseInitialScheduleStage::shouldRevertScheduling( return mayCauseSpilling(WavesAfter); } +bool GCNSchedStage::spillsAsCopiesProfitable() { + if (!AllowAVGPRCopiesForSpill) + return false; + + unsigned MaxAGPR = S.AGPRExcessLimit; + if (MaxAGPR == 0) + // AGPR not supported on architecture. + return false; + + // For now, only consider allowing copies profitable if occupancy was + // already 1. + unsigned TargetOccupancy = std::min( + S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second); + unsigned WavesBefore = std::min( + TargetOccupancy, + PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize())); + + if (WavesBefore != 1) + return false; + + // Only allow copies when VGPR pressure is the problem. + unsigned MaxSGPR = S.SGPRExcessLimit; + unsigned NumSGPR = PressureAfter.getSGPRNum(); + if (NumSGPR > MaxSGPR) + return false; + + unsigned MaxVGPR = S.VGPRExcessLimit; + unsigned NumAGPR = PressureAfter.getAGPRNum(); + unsigned NumVGPR = PressureAfter.getVGPRNum(ST.hasGFX90AInsts()); + unsigned NumAVGPR = PressureAfter.getAVGPRNum(); + + // We are assuming that in the presence of excessive VGPR requirements, that + // AVGPR virtuals with be assigned to AGPRs. This is almost certainly too + // optimistic as these can still generate copies, but we can't know how many + // we will get. + int NumSpillCopies = NumVGPR - MaxVGPR - NumAVGPR; + if (NumSpillCopies <= 0) + return true; + + assert(NumVGPR > MaxVGPR); + if (NumAGPR + NumSpillCopies > MaxAGPR) + return false; + + if (AllowAVGPRCopiesForSpill == 1) + return true; + + ScheduleMetrics MBefore = getScheduleMetrics(DAG.SUnits); + auto LengthBefore = MBefore.getLength(); + ScheduleMetrics MAfter = getScheduleMetrics(DAG); + auto LengthAfter = MAfter.getLength(); + + const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel(); + unsigned AccReadLatency = + SM.computeInstrLatency(AMDGPU::V_ACCVGPR_READ_B32_e64); + unsigned AccWriteLatency = + SM.computeInstrLatency(AMDGPU::V_ACCVGPR_WRITE_B32_e64); + unsigned SpillCopyLatency = + NumSpillCopies * (AccReadLatency + AccWriteLatency); + + SpillCopyLatency *= SpillCopyLatencyScale; + + return (LengthAfter + SpillCopyLatency) < LengthBefore; +} + bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) { if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() && - !PressureAfter.less(MF, PressureBefore)) { + !PressureAfter.less(MF, PressureBefore) && !spillsAsCopiesProfitable()) { LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 975781fea9452..0496cdf7ff24d 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -74,10 +74,6 @@ class GCNSchedStrategy : public GenericScheduler { std::vector MaxPressure; - unsigned SGPRExcessLimit; - - unsigned VGPRExcessLimit; - unsigned TargetOccupancy; MachineFunction *MF; @@ -114,14 +110,25 @@ class GCNSchedStrategy : public GenericScheduler { // Bias for VGPR limits under a high register pressure. const unsigned HighRPVGPRBias = 7; + // Bias for AGPR limits under a high register pressure. + const unsigned HighRPAGPRBias = 7; + unsigned SGPRCriticalLimit; unsigned VGPRCriticalLimit; + unsigned SGPRExcessLimit; + + unsigned VGPRExcessLimit; + + unsigned AGPRExcessLimit; + unsigned SGPRLimitBias = 0; unsigned VGPRLimitBias = 0; + unsigned AGPRLimitBias = 0; + GCNSchedStrategy(const MachineSchedContext *C); SUnit *pickNode(bool &IsTopNode) override; @@ -394,6 +401,11 @@ class GCNSchedStage { // The region number this stage is currently working on unsigned getRegionIdx() { return RegionIdx; } + // Returns true if spilling caused by the new schedule will be in + // the form of AVGPR <-> VGPR copies and adding those copies to + // the new schedule is still better than reverting. + bool spillsAsCopiesProfitable(); + // Returns true if the new schedule may result in more spilling. bool mayCauseSpilling(unsigned WavesAfter);