Skip to content

Commit

Permalink
[AMDGPU] Add Lower Bound to PipelineSolver
Browse files Browse the repository at this point in the history
  • Loading branch information
jrbyrnes committed Apr 5, 2023
1 parent ca6ceeb commit 3c42a58
Show file tree
Hide file tree
Showing 2 changed files with 351 additions and 33 deletions.
143 changes: 110 additions & 33 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
Expand Up @@ -61,6 +61,15 @@ static cl::opt<bool> UseCostHeur(
"Experimentally, results are mixed, so this should be set on a "
"case-by-case basis."));

static cl::opt<bool> EnableLowerBound(
"amdgpu-igrouplp-exact-solver-lower-bound", cl::Hidden,
cl::desc("Whether to use a lower bound when calculating the cost "
"for a partial fit using the exact solver. The lower bound "
"calculates the cost of assigning the remaining instructions "
"under idealized conditions. The LB reduces the overall search "
"space but adds time complexity per branch explored."),
cl::init(false));

// Components of the mask that determines which instruction types may be may be
// classified into a SchedGroup.
enum class SchedGroupMask {
Expand Down Expand Up @@ -109,7 +118,11 @@ class SchedGroup {

const SIInstrInfo *TII;

// Try to add and edge from SU A to SU B.
// Try to add and edge from SU A to SU B. This returns false if there is a
// dependency which makes adding the A->B edge impossible, otherwise it
// returns true. The result is that it will return true even if no edge was
// added. For example, if there is already an edge between A->B, this will
// return true, even though DAG->addEdge does not add edge.
bool tryAddEdge(SUnit *A, SUnit *B);

// Use SGMask to determine whether we can classify MI as a member of this
Expand All @@ -131,7 +144,7 @@ class SchedGroup {
// Add DAG dependencies and track which edges are added, and the count of
// missed edges
int link(SUnit &SU, bool MakePred,
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
SmallVectorImpl<std::pair<SUnit *, SUnit *>> &AddedEdges);

// Add DAG dependencies from all SUnits in this SchedGroup and this SU.
// Use the predicate to determine whether SU should be a predecessor (P =
Expand Down Expand Up @@ -243,6 +256,9 @@ class PipelineSolver {
int BestCost = -1;
int CurrCost = 0;

// A lower bound on the optimal cost for a complete pipeline
int StaticLowerBound = 0;

// Index pointing to the conflicting instruction that is currently being
// fitted
int CurrConflInstNo = 0;
Expand Down Expand Up @@ -270,14 +286,19 @@ class PipelineSolver {
void populateReadyList(SUToCandSGsPair &CurrSU,
SmallVectorImpl<std::pair<int, int>> &ReadyList,
SmallVectorImpl<SchedGroup> &SyncPipeline);
// Calculate best cost assignment of an unassigned SU without assigning it.
// The sum of these costs across SUs represents a Lower Bound on the true best
// cost for the set of unassigned SUs.
int calculateLowerBound();
// Add edges corresponding to the SchedGroups as assigned by solver
void makePipeline();
// Add the edges from the SU to the other SchedGroups in pipeline, and
// return the number of edges missed.
int addEdges(SmallVectorImpl<SchedGroup> &SyncPipeline, SUnit *SU, int SGID,
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
SmallVectorImpl<std::pair<SUnit *, SUnit *>> &AddedEdges,
int BestCost = -1);
// Remove the edges passed via AddedEdges
void removeEdges(const std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
void removeEdges(SmallVectorImpl<std::pair<SUnit *, SUnit *>> &AddedEdges);
// Convert the passed in maps to arrays for bidirectional iterators
void convertSyncMapsToArrays();

Expand Down Expand Up @@ -395,7 +416,7 @@ void PipelineSolver::makePipeline() {

int PipelineSolver::addEdges(
SmallVectorImpl<SchedGroup> &SyncPipeline, SUnit *SU, int SGID,
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges) {
SmallVectorImpl<std::pair<SUnit *, SUnit *>> &AddedEdges, int BestCost) {
int AddedCost = 0;
bool MakePred = false;

Expand All @@ -406,6 +427,8 @@ int PipelineSolver::addEdges(
// linked as a predecessor of the subsequent SchedGroups
auto GroupNo = (int)SyncPipeline.size() - 1;
for (; GroupNo >= 0; GroupNo--) {
if (BestCost != -1 && AddedCost >= BestCost)
return AddedCost;
if (SyncPipeline[GroupNo].getSGID() == SGID) {
MakePred = true;
continue;
Expand All @@ -419,15 +442,18 @@ int PipelineSolver::addEdges(
}

void PipelineSolver::removeEdges(
const std::vector<std::pair<SUnit *, SUnit *>> &EdgesToRemove) {
SmallVectorImpl<std::pair<SUnit *, SUnit *>> &EdgesToRemove) {
// Only remove the edges that we have added when testing
// the fit.
for (auto &PredSuccPair : EdgesToRemove) {
SUnit *Pred = PredSuccPair.first;
SUnit *Succ = PredSuccPair.second;

auto Match = llvm::find_if(
Succ->Preds, [&Pred](SDep &P) { return P.getSUnit() == Pred; });
auto Match =
std::find_if(Succ->Preds.begin(), Succ->Preds.end(), [&Pred](SDep &P) {
return P.getSUnit() == Pred && P.isArtificial();
});

if (Match != Succ->Preds.end()) {
assert(Match->isArtificial());
Succ->removePred(*Match);
Expand Down Expand Up @@ -478,7 +504,7 @@ bool PipelineSolver::checkOptimal() {
if (BestCost == -1 || CurrCost < BestCost) {
BestPipeline = CurrPipeline;
BestCost = CurrCost;
LLVM_DEBUG(dbgs() << "Found Fit with cost " << BestCost << "\n");
LLVM_DEBUG(dbgs() << "Found Fit with cost " << BestCost << '\n');
}
assert(BestCost >= 0);
}
Expand All @@ -487,7 +513,7 @@ bool PipelineSolver::checkOptimal() {
if (MaxBranchesExplored > 0 && BranchesExplored >= MaxBranchesExplored)
DoneExploring = true;

return (DoneExploring || BestCost == 0);
return (DoneExploring || BestCost == StaticLowerBound);
}

void PipelineSolver::populateReadyList(
Expand All @@ -496,8 +522,9 @@ void PipelineSolver::populateReadyList(
assert(CurrSU.second.size() >= 1);
auto I = CurrSU.second.rbegin();
auto E = CurrSU.second.rend();
SmallVector<std::pair<SUnit *, SUnit *>, 16> AddedEdges;
for (; I != E; ++I) {
std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;

int CandSGID = *I;
SchedGroup *Match;
for (auto &SG : SyncPipeline) {
Expand All @@ -510,6 +537,7 @@ void PipelineSolver::populateReadyList(
ReadyList.push_back(std::pair(*I, MissPenalty));
continue;
}
AddedEdges.clear();

int TempCost = addEdges(SyncPipeline, CurrSU.first, CandSGID, AddedEdges);
ReadyList.push_back(std::pair(*I, TempCost));
Expand All @@ -528,6 +556,52 @@ void PipelineSolver::populateReadyList(
assert(ReadyList.size() == CurrSU.second.size());
}

int PipelineSolver::calculateLowerBound() {
if (CurrSyncGroupIdx >= (int)CurrPipeline.size())
return 0;
int TempConflInstNo = CurrConflInstNo;
int TmpSyncGroupIdx = CurrSyncGroupIdx;
int MinimumCost = 0;
SmallVector<std::pair<SUnit *, SUnit *>, 16> AddedEdges;

for (; TmpSyncGroupIdx < (int)CurrPipeline.size(); TmpSyncGroupIdx++) {
auto SyncPipeline = CurrPipeline[TmpSyncGroupIdx];
for (; TempConflInstNo < (int)PipelineInstrs[TmpSyncGroupIdx].size();
TempConflInstNo++) {
auto CurrSU = PipelineInstrs[TmpSyncGroupIdx][TempConflInstNo];
auto I = CurrSU.second.rbegin();
auto E = CurrSU.second.rend();
int MinCostForSU = -1;
for (; I != E; I++) {
int CandSGID = *I;
SchedGroup *Match;
for (auto &SG : SyncPipeline) {
if (SG.getSGID() == CandSGID)
Match = &SG;
}

if (Match->isFull()) {
if (MinCostForSU == -1 || MissPenalty < MinCostForSU)
MinCostForSU = MissPenalty;
continue;
}
AddedEdges.clear();
int TempCost = addEdges(SyncPipeline, CurrSU.first, CandSGID,
AddedEdges, MinCostForSU);
if (MinCostForSU == -1 || TempCost < MinCostForSU)
MinCostForSU = TempCost;

removeEdges(AddedEdges);
if (MinCostForSU == 0)
break;
}
MinimumCost += MinCostForSU;
}
TempConflInstNo = 0;
}
return MinimumCost;
}

bool PipelineSolver::solveExact() {
if (checkOptimal())
return true;
Expand All @@ -540,12 +614,13 @@ bool PipelineSolver::solveExact() {
PipelineInstrs[CurrSyncGroupIdx].size());
SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
LLVM_DEBUG(dbgs() << "Fitting SU(" << CurrSU.first->NodeNum
<< ") in Pipeline # " << CurrSyncGroupIdx << "\n");
<< ") in Pipeline # " << CurrSyncGroupIdx << '\n');

// SchedGroup -> Cost pairs
SmallVector<std::pair<int, int>, 4> ReadyList;
// Prioritize the candidate sched groups in terms of lowest cost first
populateReadyList(CurrSU, ReadyList, CurrPipeline[CurrSyncGroupIdx]);
SmallVector<std::pair<SUnit *, SUnit *>, 16> AddedEdges;

auto I = ReadyList.begin();
auto E = ReadyList.end();
Expand All @@ -558,7 +633,6 @@ bool PipelineSolver::solveExact() {

int CandSGID = I->first;
int AddedCost = 0;
std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
auto &SyncPipeline = CurrPipeline[CurrSyncGroupIdx];
SchedGroup *Match;
for (auto &SG : SyncPipeline) {
Expand All @@ -571,19 +645,22 @@ bool PipelineSolver::solveExact() {

LLVM_DEBUG(dbgs() << "Assigning to SchedGroup with Mask "
<< (int)Match->getMask() << "and ID " << CandSGID
<< "\n");
<< '\n');
Match->add(*CurrSU.first);
AddedEdges.clear();
AddedCost = addEdges(SyncPipeline, CurrSU.first, CandSGID, AddedEdges);
LLVM_DEBUG(dbgs() << "Cost of Assignment: " << AddedCost << "\n");
LLVM_DEBUG(dbgs() << "Cost of Assignment: " << AddedCost << '\n');
CurrCost += AddedCost;
advancePosition();
++BranchesExplored;
bool FinishedExploring = false;
// If the Cost after adding edges is greater than a known solution,
// backtrack
if (CurrCost < BestCost || BestCost == -1) {
int LBCost =
(EnableLowerBound && BestCost != -1) ? calculateLowerBound() : 0;
if (BestCost == -1 || CurrCost + LBCost < BestCost) {
if (solveExact()) {
FinishedExploring = BestCost != 0;
FinishedExploring = BestCost != StaticLowerBound;
if (!FinishedExploring)
return true;
}
Expand All @@ -609,7 +686,7 @@ bool PipelineSolver::solveExact() {
bool FinishedExploring = false;
if (CurrCost < BestCost || BestCost == -1) {
if (solveExact()) {
bool FinishedExploring = BestCost != 0;
bool FinishedExploring = BestCost != StaticLowerBound;
if (!FinishedExploring)
return true;
}
Expand All @@ -622,7 +699,7 @@ bool PipelineSolver::solveExact() {

bool PipelineSolver::solveGreedy() {
BestCost = 0;
std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
SmallVector<std::pair<SUnit *, SUnit *>, 16> AddedEdges;

while (static_cast<size_t>(CurrSyncGroupIdx) < PipelineInstrs.size()) {
SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
Expand All @@ -632,7 +709,7 @@ bool PipelineSolver::solveGreedy() {
int BestGroupID = -1;
auto &SyncPipeline = CurrPipeline[CurrSyncGroupIdx];
LLVM_DEBUG(dbgs() << "Fitting SU(" << CurrSU.first->NodeNum
<< ") in Pipeline # " << CurrSyncGroupIdx << "\n");
<< ") in Pipeline # " << CurrSyncGroupIdx << '\n');

// Since we have added the potential SchedGroups from bottom up, but
// traversed the DAG from top down, parse over the groups from last to
Expand All @@ -641,7 +718,7 @@ bool PipelineSolver::solveGreedy() {
auto I = CurrSU.second.rbegin();
auto E = CurrSU.second.rend();
for (; I != E; ++I) {
std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
SmallVector<std::pair<SUnit *, SUnit *>, 16> AddedEdges;
int CandSGID = *I;
SchedGroup *Match;
for (auto &SG : SyncPipeline) {
Expand All @@ -650,14 +727,15 @@ bool PipelineSolver::solveGreedy() {
}

LLVM_DEBUG(dbgs() << "Trying SGID # " << CandSGID << " with Mask "
<< (int)Match->getMask() << "\n");
<< (int)Match->getMask() << '\n');

if (Match->isFull()) {
LLVM_DEBUG(dbgs() << "SGID # " << CandSGID << " is full\n");
continue;
}
TempCost = addEdges(SyncPipeline, CurrSU.first, CandSGID, AddedEdges);
LLVM_DEBUG(dbgs() << "Cost of Group " << TempCost << "\n");
TempCost = addEdges(SyncPipeline, CurrSU.first, CandSGID, AddedEdges,
BestNodeCost);
LLVM_DEBUG(dbgs() << "Cost of Group " << TempCost << '\n');
if (TempCost < BestNodeCost || BestNodeCost == -1) {
BestGroup = Match;
BestNodeCost = TempCost;
Expand All @@ -672,7 +750,7 @@ bool PipelineSolver::solveGreedy() {
BestGroup->add(*CurrSU.first);
addEdges(SyncPipeline, CurrSU.first, BestGroupID, AddedEdges);
LLVM_DEBUG(dbgs() << "Best Group has ID: " << BestGroupID << " and Mask"
<< (int)BestGroup->getMask() << "\n");
<< (int)BestGroup->getMask() << '\n');
BestCost += TempCost;
} else
BestCost += MissPenalty;
Expand Down Expand Up @@ -709,11 +787,14 @@ void PipelineSolver::solve() {
LLVM_DEBUG(dbgs() << "Starting Greedy pipeline solver\n");
solveGreedy();
reset();
LLVM_DEBUG(dbgs() << "Greedy produced best cost of " << BestCost << "\n");
if (BestCost > 0) {
LLVM_DEBUG(dbgs() << "Greedy produced best cost of " << BestCost << '\n');
StaticLowerBound = calculateLowerBound();
LLVM_DEBUG(dbgs() << "Lower Bound on Pipeline Cost is " << StaticLowerBound
<< '\n');
if (BestCost > StaticLowerBound) {
LLVM_DEBUG(dbgs() << "Starting EXACT pipeline solver\n");
solveExact();
LLVM_DEBUG(dbgs() << "Exact produced best cost of " << BestCost << "\n");
LLVM_DEBUG(dbgs() << "Exact produced best cost of " << BestCost << '\n');
}
} else { // Use the Greedy Algorithm by default
LLVM_DEBUG(dbgs() << "Starting GREEDY pipeline solver\n");
Expand Down Expand Up @@ -897,7 +978,7 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
}

int SchedGroup::link(SUnit &SU, bool MakePred,
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges) {
SmallVectorImpl<std::pair<SUnit *, SUnit *>> &AddedEdges) {
int MissedEdges = 0;
for (auto *A : Collection) {
SUnit *B = &SU;
Expand All @@ -906,10 +987,6 @@ int SchedGroup::link(SUnit &SU, bool MakePred,
if (MakePred)
std::swap(A, B);

if (DAG->IsReachable(B, A))
continue;
// tryAddEdge returns false if there is a dependency that makes adding
// the A->B edge impossible, otherwise it returns true;
bool Added = tryAddEdge(A, B);
if (Added)
AddedEdges.push_back(std::pair(A, B));
Expand Down

0 comments on commit 3c42a58

Please sign in to comment.