Expand Up
@@ -9,6 +9,18 @@
// / \file
// / This contains a MachineSchedStrategy implementation for maximizing wave
// / occupancy on GCN hardware.
// /
// / This pass will apply multiple scheduling stages to the same function.
// / Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
// / entry point for the scheduling of those regions is
// / GCNScheduleDAGMILive::runSchedStages.
// / Generally, the reason for having multiple scheduling stages is to account
// / for the kernel-wide effect of register usage on occupancy. Usually, only a
// / few scheduling regions will have register pressure high enough to limit
// / occupancy for the kernel, so constraints can be relaxed to improve ILP in
// / other regions.
// /
// ===----------------------------------------------------------------------===//
#include " GCNSchedStrategy.h"
Expand All
@@ -20,9 +32,9 @@
using namespace llvm ;
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy (
const MachineSchedContext *C) :
GenericScheduler(C), TargetOccupancy(0 ), HasClusteredNodes( false ),
HasExcessPressure (false ), MF( nullptr ) { }
const MachineSchedContext *C)
: GenericScheduler(C), TargetOccupancy(0 ), MF( nullptr ),
HasClusteredNodes (false ), HasExcessPressure( false ) {}
void GCNMaxOccupancySchedStrategy::initialize (ScheduleDAGMI *DAG) {
GenericScheduler::initialize (DAG);
Expand Down
Expand Up
@@ -302,210 +314,30 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
return SU;
}
GCNScheduleDAGMILive::GCNScheduleDAGMILive (MachineSchedContext *C,
std::unique_ptr<MachineSchedStrategy> S) :
ScheduleDAGMILive(C, std::move(S)),
ST(MF.getSubtarget<GCNSubtarget>()),
MFI(*MF.getInfo<SIMachineFunctionInfo>()),
StartingOccupancy(MFI.getOccupancy()),
MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0 ) {
GCNScheduleDAGMILive::GCNScheduleDAGMILive (
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
: ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
MFI(*MF.getInfo<SIMachineFunctionInfo>()),
StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
LLVM_DEBUG (dbgs () << " Starting occupancy is " << StartingOccupancy << " .\n " );
}
void GCNScheduleDAGMILive::schedule () {
if (Stage == Collect) {
// Just record regions at the first pass.
Regions.push_back (std::make_pair (RegionBegin, RegionEnd));
return ;
}
std::vector<MachineInstr*> Unsched;
Unsched.reserve (NumRegionInstrs);
for (auto &I : *this ) {
Unsched.push_back (&I);
}
GCNRegPressure PressureBefore;
if (LIS) {
PressureBefore = Pressure[RegionIdx];
LLVM_DEBUG (dbgs () << " Pressure before scheduling:\n Region live-ins:" ;
GCNRPTracker::printLiveRegs (dbgs (), LiveIns[RegionIdx], MRI);
dbgs () << " Region live-in pressure: " ;
llvm::getRegPressure (MRI, LiveIns[RegionIdx]).print (dbgs ());
dbgs () << " Region register pressure: " ;
PressureBefore.print (dbgs ()));
}
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
// Set HasClusteredNodes to true for late stages where we have already
// collected it. That way pickNode() will not scan SDep's when not needed.
S.HasClusteredNodes = Stage > InitialSchedule;
S.HasExcessPressure = false ;
ScheduleDAGMILive::schedule ();
Regions[RegionIdx] = std::make_pair (RegionBegin, RegionEnd);
RescheduleRegions[RegionIdx] = false ;
if (Stage == InitialSchedule && S.HasClusteredNodes )
RegionsWithClusters[RegionIdx] = true ;
if (S.HasExcessPressure )
RegionsWithHighRP[RegionIdx] = true ;
if (!LIS)
return ;
// Check the results of scheduling.
auto PressureAfter = getRealRegPressure ();
LLVM_DEBUG (dbgs () << " Pressure after scheduling: " ;
PressureAfter.print (dbgs ()));
if (PressureAfter.getSGPRNum () <= S.SGPRCriticalLimit &&
PressureAfter.getVGPRNum (ST.hasGFX90AInsts ()) <= S.VGPRCriticalLimit ) {
Pressure[RegionIdx] = PressureAfter;
RegionsWithMinOcc[RegionIdx] =
PressureAfter.getOccupancy (ST) == MinOccupancy;
LLVM_DEBUG (dbgs () << " Pressure in desired limits, done.\n " );
return ;
}
unsigned WavesAfter =
std::min (S.TargetOccupancy , PressureAfter.getOccupancy (ST));
unsigned WavesBefore =
std::min (S.TargetOccupancy , PressureBefore.getOccupancy (ST));
LLVM_DEBUG (dbgs () << " Occupancy before scheduling: " << WavesBefore
<< " , after " << WavesAfter << " .\n " );
// We may not be able to keep the current target occupancy because of the just
// scheduled region. We might still be able to revert scheduling if the
// occupancy before was higher, or if the current schedule has register
// pressure higher than the excess limits which could lead to more spilling.
unsigned NewOccupancy = std::max (WavesAfter, WavesBefore);
// Allow memory bound functions to drop to 4 waves if not limited by an
// attribute.
if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
WavesAfter >= MFI.getMinAllowedOccupancy ()) {
LLVM_DEBUG (dbgs () << " Function is memory bound, allow occupancy drop up to "
<< MFI.getMinAllowedOccupancy () << " waves\n " );
NewOccupancy = WavesAfter;
}
if (NewOccupancy < MinOccupancy) {
MinOccupancy = NewOccupancy;
MFI.limitOccupancy (MinOccupancy);
RegionsWithMinOcc.reset ();
LLVM_DEBUG (dbgs () << " Occupancy lowered for the function to "
<< MinOccupancy << " .\n " );
}
unsigned MaxVGPRs = ST.getMaxNumVGPRs (MF);
unsigned MaxSGPRs = ST.getMaxNumSGPRs (MF);
if (PressureAfter.getVGPRNum (false ) > MaxVGPRs ||
PressureAfter.getAGPRNum () > MaxVGPRs ||
PressureAfter.getSGPRNum () > MaxSGPRs) {
RescheduleRegions[RegionIdx] = true ;
RegionsWithHighRP[RegionIdx] = true ;
}
// If this condition is true, then either the occupancy before and after
// scheduling is the same, or we are allowing the occupancy to drop because
// the function is memory bound. Even if we are OK with the current occupancy,
// we still need to verify that we will not introduce any extra chance of
// spilling.
if (WavesAfter >= MinOccupancy) {
if (Stage == UnclusteredReschedule &&
!PressureAfter.less (ST, PressureBefore)) {
LLVM_DEBUG (dbgs () << " Unclustered reschedule did not help.\n " );
} else if (WavesAfter > MFI.getMinWavesPerEU () ||
PressureAfter.less (ST, PressureBefore) ||
!RescheduleRegions[RegionIdx]) {
Pressure[RegionIdx] = PressureAfter;
RegionsWithMinOcc[RegionIdx] =
PressureAfter.getOccupancy (ST) == MinOccupancy;
if (!RegionsWithClusters[RegionIdx] &&
(Stage + 1 ) == UnclusteredReschedule)
RescheduleRegions[RegionIdx] = false ;
return ;
} else {
LLVM_DEBUG (dbgs () << " New pressure will result in more spilling.\n " );
}
}
RegionsWithMinOcc[RegionIdx] =
PressureBefore.getOccupancy (ST) == MinOccupancy;
LLVM_DEBUG (dbgs () << " Attempting to revert scheduling.\n " );
RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
(Stage + 1 ) != UnclusteredReschedule;
RegionEnd = RegionBegin;
int SkippedDebugInstr = 0 ;
for (MachineInstr *MI : Unsched) {
if (MI->isDebugInstr ()) {
++SkippedDebugInstr;
continue ;
}
if (MI->getIterator () != RegionEnd) {
BB->remove (MI);
BB->insert (RegionEnd, MI);
if (!MI->isDebugInstr ())
LIS->handleMove (*MI, true );
}
// Reset read-undef flags and update them later.
for (auto &Op : MI->operands ())
if (Op.isReg () && Op.isDef ())
Op.setIsUndef (false );
RegisterOperands RegOpers;
RegOpers.collect (*MI, *TRI, MRI, ShouldTrackLaneMasks, false );
if (!MI->isDebugInstr ()) {
if (ShouldTrackLaneMasks) {
// Adjust liveness and add missing dead+read-undef flags.
SlotIndex SlotIdx = LIS->getInstructionIndex (*MI).getRegSlot ();
RegOpers.adjustLaneLiveness (*LIS, MRI, SlotIdx, MI);
} else {
// Adjust for missing dead-def flags.
RegOpers.detectDeadDefs (*MI, *LIS);
}
}
RegionEnd = MI->getIterator ();
++RegionEnd;
LLVM_DEBUG (dbgs () << " Scheduling " << *MI);
}
// After reverting schedule, debug instrs will now be at the end of the block
// and RegionEnd will point to the first debug instr. Increment RegionEnd
// pass debug instrs to the actual end of the scheduling region.
while (SkippedDebugInstr-- > 0 )
++RegionEnd;
// If Unsched.front() instruction is a debug instruction, this will actually
// shrink the region since we moved all debug instructions to the end of the
// block. Find the first instruction that is not a debug instruction.
RegionBegin = Unsched.front ()->getIterator ();
if (RegionBegin->isDebugInstr ()) {
for (MachineInstr *MI : Unsched) {
if (MI->isDebugInstr ())
continue ;
RegionBegin = MI->getIterator ();
break ;
}
}
// Then move the debug instructions back into their correct place and set
// RegionBegin and RegionEnd if needed.
placeDebugValues ();
Regions[RegionIdx] = std::make_pair (RegionBegin, RegionEnd);
// Collect all scheduling regions. The actual scheduling is performed in
// GCNScheduleDAGMILive::finalizeSchedule.
Regions.push_back (std::make_pair (RegionBegin, RegionEnd));
}
GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure () const {
GCNRegPressure
GCNScheduleDAGMILive::getRealRegPressure (unsigned RegionIdx) const {
GCNDownwardRPTracker RPTracker (*LIS);
RPTracker.advance (begin (), end (), &LiveIns[RegionIdx]);
return RPTracker.moveMaxPressure ();
}
void GCNScheduleDAGMILive::computeBlockPressure (const MachineBasicBlock *MBB) {
void GCNScheduleDAGMILive::computeBlockPressure (unsigned RegionIdx,
const MachineBasicBlock *MBB) {
GCNDownwardRPTracker RPTracker (*LIS);
// If the block has the only successor then live-ins of that successor are
Expand Down
Expand Up
@@ -542,7 +374,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
RPTracker.reset (*I, &LRS);
}
for ( ; ; ) {
for (;; ) {
I = RPTracker.getNext ();
if (Regions[CurRegion].first == I || NonDbgMI == I) {
Expand Down
Expand Up
@@ -588,8 +420,9 @@ GCNScheduleDAGMILive::getBBLiveInMap() const {
}
void GCNScheduleDAGMILive::finalizeSchedule () {
LLVM_DEBUG (dbgs () << " All regions recorded, starting actual scheduling.\n " );
// Start actual scheduling here. This function is called by the base
// MachineScheduler after all regions have been recorded by
// GCNScheduleDAGMILive::schedule().
LiveIns.resize (Regions.size ());
Pressure.resize (Regions.size ());
RescheduleRegions.resize (Regions.size ());
Expand All
@@ -601,153 +434,481 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
RegionsWithHighRP.reset ();
RegionsWithMinOcc.reset ();
runSchedStages ();
}
void GCNScheduleDAGMILive::runSchedStages () {
LLVM_DEBUG (dbgs () << " All regions recorded, starting actual scheduling.\n " );
InitialScheduleStage S0 (GCNSchedStageID::InitialSchedule, *this );
UnclusteredRescheduleStage S1 (GCNSchedStageID::UnclusteredReschedule, *this );
ClusteredLowOccStage S2 (GCNSchedStageID::ClusteredLowOccupancyReschedule,
*this );
PreRARematStage S3 (GCNSchedStageID::PreRARematerialize, *this );
GCNSchedStage *SchedStages[] = {&S0, &S1, &S2, &S3};
if (!Regions.empty ())
BBLiveInMap = getBBLiveInMap ();
std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
for (auto *Stage : SchedStages) {
if (!Stage->initGCNSchedStage ())
continue ;
do {
Stage++;
RegionIdx = 0 ;
MachineBasicBlock *MBB = nullptr ;
for (auto Region : Regions) {
RegionBegin = Region.first ;
RegionEnd = Region.second ;
// Setup for scheduling the region and check whether it should be skipped.
if (!Stage->initGCNRegion ()) {
Stage->advanceRegion ();
exitRegion ();
continue ;
}
if (Stage > InitialSchedule) {
if (!LIS)
break ;
ScheduleDAGMILive::schedule ();
Stage-> finalizeGCNRegion ();
}
// Retry function scheduling if we found resulting occupancy and it is
// lower than used for first pass scheduling. This will give more freedom
// to schedule low register pressure blocks.
// Code is partially copied from MachineSchedulerBase::scheduleRegions().
Stage->finalizeGCNSchedStage ();
}
}
if (Stage == UnclusteredReschedule) {
if (RescheduleRegions.none ())
continue ;
LLVM_DEBUG (dbgs () <<
" Retrying function scheduling without clustering.\n " );
}
#ifndef NDEBUG
raw_ostream &llvm::operator <<(raw_ostream &OS, const GCNSchedStageID &StageID) {
switch (StageID) {
case GCNSchedStageID::InitialSchedule:
OS << " Initial Schedule" ;
break ;
case GCNSchedStageID::UnclusteredReschedule:
OS << " Unclustered Reschedule" ;
break ;
case GCNSchedStageID::ClusteredLowOccupancyReschedule:
OS << " Clustered Low Occupancy Reschedule" ;
break ;
case GCNSchedStageID::PreRARematerialize:
OS << " Pre-RA Rematerialize" ;
break ;
}
return OS;
}
#endif
if (Stage == ClusteredLowOccupancyReschedule) {
if (StartingOccupancy <= MinOccupancy)
break ;
GCNSchedStage::GCNSchedStage (GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
: DAG(DAG), S( static_cast <GCNMaxOccupancySchedStrategy &>(*DAG.SchedImpl)),
MF(DAG.MF), MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
LLVM_DEBUG (
dbgs ()
<< " Retrying function scheduling with lowest recorded occupancy "
<< MinOccupancy << " .\n " );
}
bool GCNSchedStage::initGCNSchedStage () {
if (!DAG.LIS )
return false ;
if (Stage == PreRARematerialize) {
if (RegionsWithMinOcc.none () || Regions.size () == 1 )
break ;
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
const TargetInstrInfo *TII = MF.getSubtarget ().getInstrInfo ();
// Check maximum occupancy
if (ST.computeOccupancy (MF.getFunction (), MFI.getLDSSize ()) ==
MinOccupancy)
break ;
// FIXME: This pass will invalidate cached MBBLiveIns for regions
// inbetween the defs and region we sinked the def to. Cached pressure
// for regions where a def is sinked from will also be invalidated. Will
// need to be fixed if there is another pass after this pass.
static_assert (LastStage == PreRARematerialize,
" Passes after PreRARematerialize are not supported" );
collectRematerializableInstructions ();
if (RematerializableInsts.empty () || !sinkTriviallyRematInsts (ST, TII))
break ;
LLVM_DEBUG (
dbgs () << " Retrying function scheduling with improved occupancy of "
<< MinOccupancy << " from rematerializing\n " );
}
}
LLVM_DEBUG (dbgs () << " Starting scheduling stage: " << StageID << " \n " );
return true ;
}
if (Stage == UnclusteredReschedule)
SavedMutations.swap (Mutations);
bool UnclusteredRescheduleStage::initGCNSchedStage () {
if (!GCNSchedStage::initGCNSchedStage ())
return false ;
for (auto Region : Regions) {
if (((Stage == UnclusteredReschedule || Stage == PreRARematerialize) &&
!RescheduleRegions[RegionIdx]) ||
(Stage == ClusteredLowOccupancyReschedule &&
!RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
if (DAG.RescheduleRegions .none ())
return false ;
++RegionIdx;
continue ;
}
SavedMutations.swap (DAG.Mutations );
RegionBegin = Region.first ;
RegionEnd = Region.second ;
LLVM_DEBUG (dbgs () << " Retrying function scheduling without clustering.\n " );
return true ;
}
if (RegionBegin->getParent () != MBB) {
if (MBB) finishBlock ();
MBB = RegionBegin->getParent ();
startBlock (MBB);
if (Stage == InitialSchedule)
computeBlockPressure (MBB);
}
bool ClusteredLowOccStage::initGCNSchedStage () {
if (!GCNSchedStage::initGCNSchedStage ())
return false ;
unsigned NumRegionInstrs = std::distance (begin (), end ());
enterRegion (MBB, begin (), end (), NumRegionInstrs);
// Don't bother trying to improve ILP in lower RP regions if occupancy has not
// been dropped. All regions will have already been scheduled with the ideal
// occupancy targets.
if (DAG.StartingOccupancy <= DAG.MinOccupancy )
return false ;
// Skip empty scheduling regions (0 or 1 schedulable instructions).
if (begin () == end () || begin () == std::prev (end ())) {
exitRegion ();
++RegionIdx;
continue ;
}
LLVM_DEBUG (
dbgs () << " Retrying function scheduling with lowest recorded occupancy "
<< DAG.MinOccupancy << " .\n " );
return true ;
}
LLVM_DEBUG (dbgs () << " ********** MI Scheduling **********\n " );
LLVM_DEBUG (dbgs () << MF.getName () << " :" << printMBBReference (*MBB) << " "
<< MBB->getName () << " \n From: " << *begin ()
<< " To: " ;
if (RegionEnd != MBB->end ()) dbgs () << *RegionEnd;
else dbgs () << " End" ;
dbgs () << " RegionInstrs: " << NumRegionInstrs << ' \n ' );
bool PreRARematStage::initGCNSchedStage () {
if (!GCNSchedStage::initGCNSchedStage ())
return false ;
if (DAG.RegionsWithMinOcc .none () || DAG.Regions .size () == 1 )
return false ;
schedule ();
const TargetInstrInfo *TII = MF.getSubtarget ().getInstrInfo ();
// Check maximum occupancy
if (ST.computeOccupancy (MF.getFunction (), MFI.getLDSSize ()) ==
DAG.MinOccupancy )
return false ;
// FIXME: This pass will invalidate cached MBBLiveIns for regions
// inbetween the defs and region we sinked the def to. Cached pressure
// for regions where a def is sinked from will also be invalidated. Will
// need to be fixed if there is another pass after this pass.
collectRematerializableInstructions ();
if (RematerializableInsts.empty () || !sinkTriviallyRematInsts (ST, TII))
return false ;
exitRegion ();
++RegionIdx;
LLVM_DEBUG (
dbgs () << " Retrying function scheduling with improved occupancy of "
<< DAG.MinOccupancy << " from rematerializing\n " );
return true ;
}
void GCNSchedStage::finalizeGCNSchedStage () {
DAG.finishBlock ();
LLVM_DEBUG (dbgs () << " Ending scheduling stage: " << StageID << " \n " );
}
void UnclusteredRescheduleStage::finalizeGCNSchedStage () {
SavedMutations.swap (DAG.Mutations );
GCNSchedStage::finalizeGCNSchedStage ();
}
bool GCNSchedStage::initGCNRegion () {
// Check whether this new region is also a new block.
if (DAG.RegionBegin ->getParent () != CurrentMBB)
setupNewBlock ();
unsigned NumRegionInstrs = std::distance (DAG.begin (), DAG.end ());
DAG.enterRegion (CurrentMBB, DAG.begin (), DAG.end (), NumRegionInstrs);
// Skip empty scheduling regions (0 or 1 schedulable instructions).
if (DAG.begin () == DAG.end () || DAG.begin () == std::prev (DAG.end ()))
return false ;
LLVM_DEBUG (dbgs () << " ********** MI Scheduling **********\n " );
LLVM_DEBUG (dbgs () << MF.getName () << " :" << printMBBReference (*CurrentMBB)
<< " " << CurrentMBB->getName ()
<< " \n From: " << *DAG.begin () << " To: " ;
if (DAG.RegionEnd != CurrentMBB->end ()) dbgs () << *DAG.RegionEnd ;
else dbgs () << " End" ;
dbgs () << " RegionInstrs: " << NumRegionInstrs << ' \n ' );
// Save original instruction order before scheduling for possible revert.
Unsched.clear ();
Unsched.reserve (DAG.NumRegionInstrs );
for (auto &I : DAG)
Unsched.push_back (&I);
PressureBefore = DAG.Pressure [RegionIdx];
LLVM_DEBUG (
dbgs () << " Pressure before scheduling:\n Region live-ins:" ;
GCNRPTracker::printLiveRegs (dbgs (), DAG.LiveIns [RegionIdx], DAG.MRI );
dbgs () << " Region live-in pressure: " ;
llvm::getRegPressure (DAG.MRI , DAG.LiveIns [RegionIdx]).print (dbgs ());
dbgs () << " Region register pressure: " ; PressureBefore.print (dbgs ()));
// Set HasClusteredNodes to true for late stages where we have already
// collected it. That way pickNode() will not scan SDep's when not needed.
S.HasClusteredNodes = StageID > GCNSchedStageID::InitialSchedule;
S.HasExcessPressure = false ;
return true ;
}
bool UnclusteredRescheduleStage::initGCNRegion () {
if (!DAG.RescheduleRegions [RegionIdx])
return false ;
return GCNSchedStage::initGCNRegion ();
}
bool ClusteredLowOccStage::initGCNRegion () {
// We may need to reschedule this region if it doesn't have clusters so it
// wasn't rescheduled in the last stage, or if we found it was testing
// critical register pressure limits in the unclustered reschedule stage. The
// later is because we may not have been able to raise the min occupancy in
// the previous stage so the region may be overly constrained even if it was
// already rescheduled.
if (!DAG.RegionsWithClusters [RegionIdx] && !DAG.RegionsWithHighRP [RegionIdx])
return false ;
return GCNSchedStage::initGCNRegion ();
}
bool PreRARematStage::initGCNRegion () {
if (!DAG.RescheduleRegions [RegionIdx])
return false ;
return GCNSchedStage::initGCNRegion ();
}
void GCNSchedStage::setupNewBlock () {
if (CurrentMBB)
DAG.finishBlock ();
CurrentMBB = DAG.RegionBegin ->getParent ();
DAG.startBlock (CurrentMBB);
// Get real RP for the region if it hasn't be calculated before. After the
// initial schedule stage real RP will be collected after scheduling.
if (StageID == GCNSchedStageID::InitialSchedule)
DAG.computeBlockPressure (RegionIdx, CurrentMBB);
}
void GCNSchedStage::finalizeGCNRegion () {
DAG.Regions [RegionIdx] = std::make_pair (DAG.RegionBegin , DAG.RegionEnd );
DAG.RescheduleRegions [RegionIdx] = false ;
if (S.HasExcessPressure )
DAG.RegionsWithHighRP [RegionIdx] = true ;
// Revert scheduling if we have dropped occupancy or there is some other
// reason that the original schedule is better.
checkScheduling ();
DAG.exitRegion ();
RegionIdx++;
}
void InitialScheduleStage::finalizeGCNRegion () {
// Record which regions have clustered nodes for the next unclustered
// reschedule stage.
assert (nextStage (StageID) == GCNSchedStageID::UnclusteredReschedule);
if (S.HasClusteredNodes )
DAG.RegionsWithClusters [RegionIdx] = true ;
GCNSchedStage::finalizeGCNRegion ();
}
void GCNSchedStage::checkScheduling () {
// Check the results of scheduling.
PressureAfter = DAG.getRealRegPressure (RegionIdx);
LLVM_DEBUG (dbgs () << " Pressure after scheduling: " ;
PressureAfter.print (dbgs ()));
if (PressureAfter.getSGPRNum () <= S.SGPRCriticalLimit &&
PressureAfter.getVGPRNum (ST.hasGFX90AInsts ()) <= S.VGPRCriticalLimit ) {
DAG.Pressure [RegionIdx] = PressureAfter;
DAG.RegionsWithMinOcc [RegionIdx] =
PressureAfter.getOccupancy (ST) == DAG.MinOccupancy ;
// Early out if we have achieve the occupancy target.
LLVM_DEBUG (dbgs () << " Pressure in desired limits, done.\n " );
return ;
}
unsigned WavesAfter =
std::min (S.getTargetOccupancy (), PressureAfter.getOccupancy (ST));
unsigned WavesBefore =
std::min (S.getTargetOccupancy (), PressureBefore.getOccupancy (ST));
LLVM_DEBUG (dbgs () << " Occupancy before scheduling: " << WavesBefore
<< " , after " << WavesAfter << " .\n " );
// We may not be able to keep the current target occupancy because of the just
// scheduled region. We might still be able to revert scheduling if the
// occupancy before was higher, or if the current schedule has register
// pressure higher than the excess limits which could lead to more spilling.
unsigned NewOccupancy = std::max (WavesAfter, WavesBefore);
// Allow memory bound functions to drop to 4 waves if not limited by an
// attribute.
if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
WavesAfter >= MFI.getMinAllowedOccupancy ()) {
LLVM_DEBUG (dbgs () << " Function is memory bound, allow occupancy drop up to "
<< MFI.getMinAllowedOccupancy () << " waves\n " );
NewOccupancy = WavesAfter;
}
if (NewOccupancy < DAG.MinOccupancy ) {
DAG.MinOccupancy = NewOccupancy;
MFI.limitOccupancy (DAG.MinOccupancy );
DAG.RegionsWithMinOcc .reset ();
LLVM_DEBUG (dbgs () << " Occupancy lowered for the function to "
<< DAG.MinOccupancy << " .\n " );
}
unsigned MaxVGPRs = ST.getMaxNumVGPRs (MF);
unsigned MaxSGPRs = ST.getMaxNumSGPRs (MF);
if (PressureAfter.getVGPRNum (false ) > MaxVGPRs ||
PressureAfter.getAGPRNum () > MaxVGPRs ||
PressureAfter.getSGPRNum () > MaxSGPRs) {
DAG.RescheduleRegions [RegionIdx] = true ;
DAG.RegionsWithHighRP [RegionIdx] = true ;
}
// Revert if this region's schedule would cause a drop in occupancy or
// spilling.
if (shouldRevertScheduling (WavesAfter)) {
revertScheduling ();
} else {
DAG.Pressure [RegionIdx] = PressureAfter;
DAG.RegionsWithMinOcc [RegionIdx] =
PressureAfter.getOccupancy (ST) == DAG.MinOccupancy ;
}
}
bool GCNSchedStage::shouldRevertScheduling (unsigned WavesAfter) {
if (WavesAfter < DAG.MinOccupancy )
return true ;
return false ;
}
bool InitialScheduleStage::shouldRevertScheduling (unsigned WavesAfter) {
if (GCNSchedStage::shouldRevertScheduling (WavesAfter))
return true ;
if (mayCauseSpilling (WavesAfter))
return true ;
assert (nextStage (StageID) == GCNSchedStageID::UnclusteredReschedule);
// Don't reschedule the region in the next stage if it doesn't have clusters.
if (!DAG.RegionsWithClusters [RegionIdx])
DAG.RescheduleRegions [RegionIdx] = false ;
return false ;
}
bool UnclusteredRescheduleStage::shouldRevertScheduling (unsigned WavesAfter) {
if (GCNSchedStage::shouldRevertScheduling (WavesAfter))
return true ;
// If RP is not reduced in the unclustred reschedule stage, revert to the old
// schedule.
if (!PressureAfter.less (ST, PressureBefore)) {
LLVM_DEBUG (dbgs () << " Unclustered reschedule did not help.\n " );
return true ;
}
return false ;
}
bool ClusteredLowOccStage::shouldRevertScheduling (unsigned WavesAfter) {
if (GCNSchedStage::shouldRevertScheduling (WavesAfter))
return true ;
if (mayCauseSpilling (WavesAfter))
return true ;
return false ;
}
bool PreRARematStage::shouldRevertScheduling (unsigned WavesAfter) {
if (GCNSchedStage::shouldRevertScheduling (WavesAfter))
return true ;
if (mayCauseSpilling (WavesAfter))
return true ;
return false ;
}
bool GCNSchedStage::mayCauseSpilling (unsigned WavesAfter) {
if (WavesAfter <= MFI.getMinWavesPerEU () &&
!PressureAfter.less (ST, PressureBefore) &&
DAG.RescheduleRegions [RegionIdx]) {
LLVM_DEBUG (dbgs () << " New pressure will result in more spilling.\n " );
return true ;
}
return false ;
}
void GCNSchedStage::revertScheduling () {
DAG.RegionsWithMinOcc [RegionIdx] =
PressureBefore.getOccupancy (ST) == DAG.MinOccupancy ;
LLVM_DEBUG (dbgs () << " Attempting to revert scheduling.\n " );
DAG.RescheduleRegions [RegionIdx] =
DAG.RegionsWithClusters [RegionIdx] ||
(nextStage (StageID)) != GCNSchedStageID::UnclusteredReschedule;
DAG.RegionEnd = DAG.RegionBegin ;
int SkippedDebugInstr = 0 ;
for (MachineInstr *MI : Unsched) {
if (MI->isDebugInstr ()) {
++SkippedDebugInstr;
continue ;
}
if (MI->getIterator () != DAG.RegionEnd ) {
DAG.BB ->remove (MI);
DAG.BB ->insert (DAG.RegionEnd , MI);
if (!MI->isDebugInstr ())
DAG.LIS ->handleMove (*MI, true );
}
// Reset read-undef flags and update them later.
for (auto &Op : MI->operands ())
if (Op.isReg () && Op.isDef ())
Op.setIsUndef (false );
RegisterOperands RegOpers;
RegOpers.collect (*MI, *DAG.TRI , DAG.MRI , DAG.ShouldTrackLaneMasks , false );
if (!MI->isDebugInstr ()) {
if (DAG.ShouldTrackLaneMasks ) {
// Adjust liveness and add missing dead+read-undef flags.
SlotIndex SlotIdx = DAG.LIS ->getInstructionIndex (*MI).getRegSlot ();
RegOpers.adjustLaneLiveness (*DAG.LIS , DAG.MRI , SlotIdx, MI);
} else {
// Adjust for missing dead-def flags.
RegOpers.detectDeadDefs (*MI, *DAG.LIS );
}
}
finishBlock ();
DAG.RegionEnd = MI->getIterator ();
++DAG.RegionEnd ;
LLVM_DEBUG (dbgs () << " Scheduling " << *MI);
}
// After reverting schedule, debug instrs will now be at the end of the block
// and RegionEnd will point to the first debug instr. Increment RegionEnd
// pass debug instrs to the actual end of the scheduling region.
while (SkippedDebugInstr-- > 0 )
++DAG.RegionEnd ;
// If Unsched.front() instruction is a debug instruction, this will actually
// shrink the region since we moved all debug instructions to the end of the
// block. Find the first instruction that is not a debug instruction.
DAG.RegionBegin = Unsched.front ()->getIterator ();
if (DAG.RegionBegin ->isDebugInstr ()) {
for (MachineInstr *MI : Unsched) {
if (MI->isDebugInstr ())
continue ;
DAG.RegionBegin = MI->getIterator ();
break ;
}
}
// Then move the debug instructions back into their correct place and set
// RegionBegin and RegionEnd if needed.
DAG.placeDebugValues ();
if (Stage == UnclusteredReschedule)
SavedMutations.swap (Mutations);
} while (Stage != LastStage);
DAG.Regions [RegionIdx] = std::make_pair (DAG.RegionBegin , DAG.RegionEnd );
}
void GCNScheduleDAGMILive ::collectRematerializableInstructions () {
const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo *>(TRI);
for (unsigned I = 0 , E = MRI.getNumVirtRegs (); I != E; ++I) {
void PreRARematStage ::collectRematerializableInstructions () {
const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo *>(DAG. TRI );
for (unsigned I = 0 , E = DAG. MRI .getNumVirtRegs (); I != E; ++I) {
Register Reg = Register::index2VirtReg (I);
if (!LIS->hasInterval (Reg))
if (!DAG. LIS ->hasInterval (Reg))
continue ;
// TODO: Handle AGPR and SGPR rematerialization
if (!SRI->isVGPRClass (MRI.getRegClass (Reg)) || !MRI. hasOneDef (Reg ) ||
!MRI.hasOneNonDBGUse (Reg))
if (!SRI->isVGPRClass (DAG. MRI .getRegClass (Reg)) ||
!DAG. MRI . hasOneDef (Reg) || !DAG. MRI .hasOneNonDBGUse (Reg))
continue ;
MachineOperand *Op = MRI.getOneDef (Reg);
MachineOperand *Op = DAG. MRI .getOneDef (Reg);
MachineInstr *Def = Op->getParent ();
if (Op->getSubReg () != 0 || !isTriviallyReMaterializable (*Def))
continue ;
MachineInstr *UseI = &*MRI.use_instr_nodbg_begin (Reg);
MachineInstr *UseI = &*DAG. MRI .use_instr_nodbg_begin (Reg);
if (Def->getParent () == UseI->getParent ())
continue ;
// We are only collecting defs that are defined in another block and are
// live-through or used inside regions at MinOccupancy. This means that the
// register must be in the live-in set for the region.
bool AddedToRematList = false ;
for (unsigned I = 0 , E = Regions.size (); I != E; ++I) {
auto It = LiveIns[I].find (Reg);
if (It != LiveIns[I].end () && !It->second .none ()) {
if (RegionsWithMinOcc[I]) {
for (unsigned I = 0 , E = DAG. Regions .size (); I != E; ++I) {
auto It = DAG. LiveIns [I].find (Reg);
if (It != DAG. LiveIns [I].end () && !It->second .none ()) {
if (DAG. RegionsWithMinOcc [I]) {
RematerializableInsts[I][Def] = UseI;
AddedToRematList = true ;
}
Expand All
@@ -762,8 +923,8 @@ void GCNScheduleDAGMILive::collectRematerializableInstructions() {
}
}
bool GCNScheduleDAGMILive ::sinkTriviallyRematInsts (const GCNSubtarget &ST,
const TargetInstrInfo *TII) {
bool PreRARematStage ::sinkTriviallyRematInsts (const GCNSubtarget &ST,
const TargetInstrInfo *TII) {
// Temporary copies of cached variables we will be modifying and replacing if
// sinking succeeds.
SmallVector<
Expand All
@@ -772,9 +933,10 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
DenseMap<unsigned , GCNRPTracker::LiveRegSet> NewLiveIns;
DenseMap<unsigned , GCNRegPressure> NewPressure;
BitVector NewRescheduleRegions;
LiveIntervals *LIS = DAG.LIS ;
NewRegions.resize (Regions.size ());
NewRescheduleRegions.resize (Regions.size ());
NewRegions.resize (DAG. Regions .size ());
NewRescheduleRegions.resize (DAG. Regions .size ());
// Collect only regions that has a rematerializable def as a live-in.
SmallSet<unsigned , 16 > ImpactedRegions;
Expand All
@@ -784,16 +946,16 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// Make copies of register pressure and live-ins cache that will be updated
// as we rematerialize.
for (auto Idx : ImpactedRegions) {
NewPressure[Idx] = Pressure[Idx];
NewLiveIns[Idx] = LiveIns[Idx];
NewPressure[Idx] = DAG. Pressure [Idx];
NewLiveIns[Idx] = DAG. LiveIns [Idx];
}
NewRegions = Regions;
NewRegions = DAG. Regions ;
NewRescheduleRegions.reset ();
DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
bool Improved = false ;
for (auto I : ImpactedRegions) {
if (!RegionsWithMinOcc[I])
if (!DAG. RegionsWithMinOcc [I])
continue ;
Improved = false ;
Expand All
@@ -802,12 +964,12 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// TODO: Handle occupancy drop due to AGPR and SGPR.
// Check if cause of occupancy drop is due to VGPR usage and not SGPR.
if (ST.getOccupancyWithNumSGPRs (SGPRUsage) == MinOccupancy)
if (ST.getOccupancyWithNumSGPRs (SGPRUsage) == DAG. MinOccupancy )
break ;
// The occupancy of this region could have been improved by a previous
// iteration's sinking of defs.
if (NewPressure[I].getOccupancy (ST) > MinOccupancy) {
if (NewPressure[I].getOccupancy (ST) > DAG. MinOccupancy ) {
NewRescheduleRegions[I] = true ;
Improved = true ;
continue ;
Expand All
@@ -827,7 +989,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs (VGPRsAfterSink);
// If in the most optimistic scenario, we cannot improve occupancy, then do
// not attempt to sink any instructions.
if (OptimisticOccupancy <= MinOccupancy)
if (OptimisticOccupancy <= DAG. MinOccupancy )
break ;
unsigned ImproveOccupancy = 0 ;
Expand All
@@ -842,7 +1004,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// call LiveRangeEdit::allUsesAvailableAt() and
// LiveRangeEdit::canRematerializeAt().
TII->reMaterialize (*InsertPos->getParent (), InsertPos, Reg,
Def->getOperand (0 ).getSubReg (), *Def, *TRI);
Def->getOperand (0 ).getSubReg (), *Def, *DAG. TRI );
MachineInstr *NewMI = &*(--InsertPos);
LIS->InsertMachineInstrInMaps (*NewMI);
LIS->removeInterval (Reg);
Expand All
@@ -851,21 +1013,21 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// Update region boundaries in scheduling region we sinked from since we
// may sink an instruction that was at the beginning or end of its region
updateRegionBoundaries (NewRegions, Def, /* NewMI =*/ nullptr ,
/* Removing =*/ true );
DAG. updateRegionBoundaries (NewRegions, Def, /* NewMI =*/ nullptr ,
/* Removing =*/ true );
// Update region boundaries in region we sinked to.
updateRegionBoundaries (NewRegions, InsertPos, NewMI);
DAG. updateRegionBoundaries (NewRegions, InsertPos, NewMI);
LaneBitmask PrevMask = NewLiveIns[I][Reg];
// FIXME: Also update cached pressure for where the def was sinked from.
// Update RP for all regions that has this reg as a live-in and remove
// the reg from all regions as a live-in.
for (auto Idx : RematDefToLiveInRegions[Def]) {
NewLiveIns[Idx].erase (Reg);
if (InsertPos->getParent () != Regions[Idx].first ->getParent ()) {
if (InsertPos->getParent () != DAG. Regions [Idx].first ->getParent ()) {
// Def is live-through and not used in this block.
NewPressure[Idx].inc (Reg, PrevMask, LaneBitmask::getNone (), MRI);
NewPressure[Idx].inc (Reg, PrevMask, LaneBitmask::getNone (), DAG. MRI );
} else {
// Def is used and rematerialized into this block.
GCNDownwardRPTracker RPT (*LIS);
Expand All
@@ -879,7 +1041,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
SinkedDefs.push_back (Def);
ImproveOccupancy = NewPressure[I].getOccupancy (ST);
if (ImproveOccupancy > MinOccupancy)
if (ImproveOccupancy > DAG. MinOccupancy )
break ;
}
Expand All
@@ -888,7 +1050,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
for (auto TrackedIdx : RematDefToLiveInRegions[Def])
RematerializableInsts[TrackedIdx].erase (Def);
if (ImproveOccupancy <= MinOccupancy)
if (ImproveOccupancy <= DAG. MinOccupancy )
break ;
NewRescheduleRegions[I] = true ;
Expand Down
Expand Up
@@ -917,7 +1079,7 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
MachineInstr *OldMI = Entry.second ;
// Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
BBLiveInMap.erase (OldMI);
DAG. BBLiveInMap .erase (OldMI);
// Remove OldMI and update LIS
Register Reg = MI->getOperand (0 ).getReg ();
Expand All
@@ -929,22 +1091,22 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// Update live-ins, register pressure, and regions caches.
for (auto Idx : ImpactedRegions) {
LiveIns[Idx] = NewLiveIns[Idx];
Pressure[Idx] = NewPressure[Idx];
MBBLiveIns.erase (Regions[Idx].first ->getParent ());
DAG. LiveIns [Idx] = NewLiveIns[Idx];
DAG. Pressure [Idx] = NewPressure[Idx];
DAG. MBBLiveIns .erase (DAG. Regions [Idx].first ->getParent ());
}
Regions = NewRegions;
RescheduleRegions = NewRescheduleRegions;
DAG. Regions = NewRegions;
DAG. RescheduleRegions = NewRescheduleRegions;
SIMachineFunctionInfo &MFI = *MF.getInfo <SIMachineFunctionInfo>();
MFI.increaseOccupancy (MF, ++MinOccupancy);
MFI.increaseOccupancy (MF, ++DAG. MinOccupancy );
return true ;
}
// Copied from MachineLICM
bool GCNScheduleDAGMILive ::isTriviallyReMaterializable (const MachineInstr &MI) {
if (!TII->isTriviallyReMaterializable (MI))
bool PreRARematStage ::isTriviallyReMaterializable (const MachineInstr &MI) {
if (!DAG. TII ->isTriviallyReMaterializable (MI))
return false ;
for (const MachineOperand &MO : MI.operands ())
Expand Down