Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 131 additions & 1 deletion llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"

using namespace llvm;
Expand Down Expand Up @@ -459,10 +460,14 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,

GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
const MachineRegisterInfo &MRI,
GCNRegPressure::RegKind RegKind) {
GCNRPTracker::LiveRegSet LiveRegs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
auto Reg = Register::index2VirtReg(I);
if (RegKind != GCNRegPressure::TOTAL_KINDS &&
GCNRegPressure::getRegKind(Reg, MRI) != RegKind)
continue;
if (!LIS.hasInterval(Reg))
continue;
auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
Expand Down Expand Up @@ -986,3 +991,128 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {

#undef PFX
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
GCNRegPressure::RegKind Kind,
LiveIntervals &LIS,
const MachineLoopInfo *MLI) {

const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
auto &OS = dbgs();
const char *RegName = GCNRegPressure::getName(Kind);

unsigned MaxNumRegs = 0;
const MachineInstr *MaxPressureMI = nullptr;
GCNUpwardRPTracker RPT(LIS);
for (const MachineBasicBlock &MBB : MF) {
RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
for (const MachineInstr &MI : reverse(MBB)) {
RPT.recede(MI);
unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind);
if (NumRegs > MaxNumRegs) {
MaxNumRegs = NumRegs;
MaxPressureMI = &MI;
}
}
}

SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI);

// Max pressure can occur at either the early-clobber or register slot.
// Choose the maximum liveset between both slots. This is ugly but this is
// diagnostic code.
SlotIndex ECSlot = MISlot.getRegSlot(true);
SlotIndex RSlot = MISlot.getRegSlot(false);
GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind);
GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind);
unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind);
unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind);
GCNRPTracker::LiveRegSet *LiveSet =
ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet;
SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot;
assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs);

// Split live registers into single-def and multi-def sets.
GCNRegPressure SDefPressure, MDefPressure;
SmallVector<Register, 16> SDefRegs, MDefRegs;
for (auto [Reg, LaneMask] : *LiveSet) {
assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind);
LiveInterval &LI = LIS.getInterval(Reg);
if (LI.getNumValNums() == 1 ||
(LI.hasSubRanges() &&
llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) {
return SR.getNumValNums() == 1;
}))) {
SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
SDefRegs.push_back(Reg);
} else {
MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
MDefRegs.push_back(Reg);
}
}
unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind);
unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind);
assert(SDefNumRegs + MDefNumRegs == MaxNumRegs);

auto printLoc = [&](const MachineBasicBlock *MBB, SlotIndex SI) {
return Printable([&, MBB, SI](raw_ostream &OS) {
OS << SI << ':' << printMBBReference(*MBB);
if (MLI)
if (const MachineLoop *ML = MLI->getLoopFor(MBB))
OS << " (LoopHdr " << printMBBReference(*ML->getHeader())
<< ", Depth " << ML->getLoopDepth() << ")";
});
};

auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) {
GCNRegPressure RegPressure;
RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI);
OS << " " << printReg(Reg, TRI) << ':'
<< TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask "
<< PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' '
<< RegName << "s)\n";

// Use std::map to sort def/uses by SlotIndex.
std::map<SlotIndex, const MachineInstr *> Instrs;
for (const MachineInstr &MI : MRI.reg_nodbg_instructions(Reg)) {
Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI;
}

for (const auto &[SI, MI] : Instrs) {
OS << " ";
if (MI->definesRegister(Reg, TRI))
OS << "def ";
if (MI->readsRegister(Reg, TRI))
OS << "use ";
OS << printLoc(MI->getParent(), SI) << ": " << *MI;
}
};

OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName()
<< " ***\n";
OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at "
<< printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": "
<< *MaxPressureMI;

OS << "\nLive registers with single definition (" << SDefNumRegs << ' '
<< RegName << "s):\n";

// Sort SDefRegs by number of uses (smallest first)
llvm::sort(SDefRegs, [&](Register A, Register B) {
return std::distance(MRI.use_nodbg_begin(A), MRI.use_nodbg_end()) <
std::distance(MRI.use_nodbg_begin(B), MRI.use_nodbg_end());
});

for (const Register Reg : SDefRegs) {
PrintRegInfo(Reg, LiveSet->lookup(Reg));
}

OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' '
<< RegName << "s):\n";
for (const Register Reg : MDefRegs) {
PrintRegInfo(Reg, LiveSet->lookup(Reg));
}
}
#endif
31 changes: 26 additions & 5 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ class SlotIndex;
struct GCNRegPressure {
enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS };

static constexpr const char *getName(RegKind Kind) {
const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"};
assert(Kind < TOTAL_KINDS);
return Names[Kind];
}

GCNRegPressure() {
clear();
}
Expand All @@ -41,6 +47,11 @@ struct GCNRegPressure {

void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }

unsigned getNumRegs(RegKind Kind) const {
assert(Kind < TOTAL_KINDS);
return Value[Kind];
}

/// \returns the SGPR32 pressure
unsigned getSGPRNum() const { return Value[SGPR]; }
/// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
Expand Down Expand Up @@ -138,6 +149,12 @@ struct GCNRegPressure {

void dump() const;

static RegKind getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) {
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
return (RegKind)getRegKind(MRI.getRegClass(Reg), STI);
}

private:
static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2;

Expand Down Expand Up @@ -294,8 +311,10 @@ class GCNRPTracker {
}
};

GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
GCNRPTracker::LiveRegSet
getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI,
GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS);

////////////////////////////////////////////////////////////////////////////////
// GCNUpwardRPTracker
Expand Down Expand Up @@ -428,9 +447,6 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
const MachineRegisterInfo &MRI,
LaneBitmask LaneMaskFilter = LaneBitmask::getAll());

GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);

/// creates a map MachineInstr -> LiveRegSet
/// R - range of iterators on instructions
/// After - upon entry or exit of every instruction
Expand Down Expand Up @@ -524,6 +540,11 @@ struct GCNRegPressurePrinter : public MachineFunctionPass {
}
};

LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF,
GCNRegPressure::RegKind Kind,
LiveIntervals &LIS,
const MachineLoopInfo *MLI);

} // end namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
31 changes: 31 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,21 @@ static cl::opt<bool> GCNTrackers(
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
cl::init(false));

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
#define DUMP_MAX_REG_PRESSURE
static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(
"amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden,
cl::desc("Print a list of live registers along with their def/uses at the "
"point of maximum register pressure before scheduling."),
cl::init(false));

static cl::opt<bool> PrintMaxRPRegUsageAfterScheduler(
"amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden,
cl::desc("Print a list of live registers along with their def/uses at the "
"point of maximum register pressure after scheduling."),
cl::init(false));
#endif

const unsigned ScheduleMetrics::ScaleFactor = 100;

GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
Expand Down Expand Up @@ -960,6 +975,14 @@ void GCNScheduleDAGMILive::runSchedStages() {
RegionLiveOuts.buildLiveRegMap();
}

#ifdef DUMP_MAX_REG_PRESSURE
if (PrintMaxRPRegUsageBeforeScheduler) {
dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);
dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);
LIS->dump();
Comment on lines +980 to +982
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lambda not doing anything, just inline it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it called twice, before and after scheduling.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's still trivial enough to just duplicate

}
#endif

GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
while (S.advanceStage()) {
auto Stage = createSchedStage(S.getCurrentStage());
Expand Down Expand Up @@ -995,6 +1018,14 @@ void GCNScheduleDAGMILive::runSchedStages() {

Stage->finalizeGCNSchedStage();
}

#ifdef DUMP_MAX_REG_PRESSURE
if (PrintMaxRPRegUsageAfterScheduler) {
dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);
dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);
LIS->dump();
}
#endif
}

#ifndef NDEBUG
Expand Down