Skip to content

Commit

Permalink
AMDGPU/InsertWaitcnts: Remove kill-related logic
Browse files Browse the repository at this point in the history
Summary:
This is not needed, because we don't actually insert relevant branches
for KILLs that late in the compilation flow.

Besides, this was always checking for the wrong kill opcode anyway...

Reviewers: msearles, rampitec, scott.linder, kanarayan

Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D54085

llvm-svn: 346362
  • Loading branch information
nhaehnle committed Nov 7, 2018
1 parent 15e90e3 commit 0ab31c9
Showing 1 changed file with 1 addition and 101 deletions.
102 changes: 1 addition & 101 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Expand Up @@ -382,8 +382,6 @@ class SIInsertWaitcnts : public MachineFunctionPass {

DenseMap<MachineLoop *, std::unique_ptr<LoopWaitcntData>> LoopWaitcntDataMap;

std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;

// ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0
// because of amdgpu-waitcnt-forcezero flag
bool ForceEmitZeroWaitcnts;
Expand All @@ -410,13 +408,6 @@ class SIInsertWaitcnts : public MachineFunctionPass {
MachineFunctionPass::getAnalysisUsage(AU);
}

void addKillWaitBracket(BlockWaitcntBrackets *Bracket) {
// The waitcnt information is copied because it changes as the block is
// traversed.
KillWaitBrackets.push_back(
llvm::make_unique<BlockWaitcntBrackets>(*Bracket));
}

bool isForceEmitWaitcnt() const {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1))
Expand Down Expand Up @@ -1425,24 +1416,6 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
MixedExpTypes |= PredScoreBrackets->mixedExpTypes();
}

// TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
// Also handle kills for exit block.
if (Block.succ_empty() && !KillWaitBrackets.empty()) {
for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
int Span = KillWaitBrackets[I]->getScoreUB(T) -
KillWaitBrackets[I]->getScoreLB(T);
MaxPending[T] = std::max(MaxPending[T], Span);
Span = KillWaitBrackets[I]->pendingFlat(T) -
KillWaitBrackets[I]->getScoreLB(T);
MaxFlat[T] = std::max(MaxFlat[T], Span);
}

MixedExpTypes |= KillWaitBrackets[I]->mixedExpTypes();
}
}

// Special handling for GDS_GPR_LOCK and EXP_GPR_LOCK.
for (MachineBasicBlock *Pred : Block.predecessors()) {
BlockWaitcntBrackets *PredScoreBrackets =
Expand All @@ -1460,18 +1433,6 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
}

// TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
if (Block.succ_empty() && !KillWaitBrackets.empty()) {
for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
int GDSSpan = KillWaitBrackets[I]->getEventUB(GDS_GPR_LOCK) -
KillWaitBrackets[I]->getScoreLB(EXP_CNT);
MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
int EXPSpan = KillWaitBrackets[I]->getEventUB(EXP_GPR_LOCK) -
KillWaitBrackets[I]->getScoreLB(EXP_CNT);
MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
}
}

#if 0
// LC does not (unlike) add a waitcnt at beginning. Leaving it as marker.
// TODO: how does LC distinguish between function entry and main entry?
Expand Down Expand Up @@ -1551,60 +1512,6 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
}
}

// TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
// Set the register scoreboard.
if (Block.succ_empty() && !KillWaitBrackets.empty()) {
for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
// Now merge the gpr_reg_score information.
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
int PredLB = KillWaitBrackets[I]->getScoreLB(T);
int PredUB = KillWaitBrackets[I]->getScoreUB(T);
if (PredLB < PredUB) {
int PredScale = MaxPending[T] - PredUB;
// Merge vgpr scores.
for (int J = 0; J <= KillWaitBrackets[I]->getMaxVGPR(); J++) {
int PredRegScore = KillWaitBrackets[I]->getRegScore(J, T);
if (PredRegScore <= PredLB)
continue;
int NewRegScore = PredScale + PredRegScore;
ScoreBrackets->setRegScore(
J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
}
// Also need to merge sgpr scores for lgkm_cnt.
if (T == LGKM_CNT) {
for (int J = 0; J <= KillWaitBrackets[I]->getMaxSGPR(); J++) {
int PredRegScore =
KillWaitBrackets[I]->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
if (PredRegScore <= PredLB)
continue;
int NewRegScore = PredScale + PredRegScore;
ScoreBrackets->setRegScore(
J + NUM_ALL_VGPRS, LGKM_CNT,
std::max(
ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
NewRegScore));
}
}
}
}

// Also merge the WaitEvent information.
ForAllWaitEventType(W) {
enum InstCounterType T = KillWaitBrackets[I]->eventCounter(W);
int PredEventUB = KillWaitBrackets[I]->getEventUB(W);
if (PredEventUB > KillWaitBrackets[I]->getScoreLB(T)) {
int NewEventUB =
MaxPending[T] + PredEventUB - KillWaitBrackets[I]->getScoreUB(T);
if (NewEventUB > 0) {
ScoreBrackets->setEventUB(
W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
}
}
}
}
}

// Special case handling of GDS_GPR_LOCK and EXP_GPR_LOCK. Merge this for the
// sequencing predecessors, because changes to EXEC require waitcnts due to
// the delayed nature of these operations.
Expand Down Expand Up @@ -1701,13 +1608,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
continue;
}

// Kill instructions generate a conditional branch to the endmain block.
// Merge the current waitcnt state into the endmain block information.
// TODO: Are there other flavors of KILL instruction?
if (Inst.getOpcode() == AMDGPU::KILL) {
addKillWaitBracket(ScoreBrackets);
}

bool VCCZBugWorkAround = false;
if (readsVCCZ(Inst) &&
(!VCCZBugHandledSet.count(&Inst))) {
Expand Down Expand Up @@ -1871,7 +1771,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
LoopWaitcntDataMap.clear();
BlockWaitcntProcessedSet.clear();

// Walk over the blocks in reverse post-dominator order, inserting
// Walk over the blocks in reverse post order, inserting
// s_waitcnt where needed.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
bool Modified = false;
Expand Down

0 comments on commit 0ab31c9

Please sign in to comment.