Skip to content

Commit

Permalink
[AMDGPU] Enable merging m0 initializations.
Browse files Browse the repository at this point in the history
Summary:
Enable hoisting and merging m0 defs that are initialized with the same
immediate value. Fixes bug where removed instructions are not considered
to interfere with other inits, and make sure to not hoist inits before block
prologues.

Reviewers: rampitec, arsenm

Reviewed By: rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64766

llvm-svn: 366135
  • Loading branch information
kerbowa committed Jul 15, 2019
1 parent 46b84fa commit 423b4a1
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 22 deletions.
47 changes: 32 additions & 15 deletions llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
Expand Up @@ -103,7 +103,7 @@ using namespace llvm;
static cl::opt<bool> EnableM0Merge(
"amdgpu-enable-merge-m0",
cl::desc("Merge and hoist M0 initializations"),
cl::init(false));
cl::init(true));

namespace {

Expand Down Expand Up @@ -452,18 +452,32 @@ static bool isReachable(const MachineInstr *From,
(const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
}

// Return the first non-prologue instruction in the block.
static MachineBasicBlock::iterator
getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) {
MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
while (I != MBB->end() && TII->isBasicBlockPrologue(*I))
++I;

return I;
}

// Hoist and merge identical SGPR initializations into a common predecessor.
// This is intended to combine M0 initializations, but can work with any
// SGPR. A VGPR cannot be processed since we cannot guarantee vector
// executioon.
static bool hoistAndMergeSGPRInits(unsigned Reg,
const MachineRegisterInfo &MRI,
MachineDominatorTree &MDT) {
MachineDominatorTree &MDT,
const TargetInstrInfo *TII) {
// List of inits by immediate value.
using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
InitListMap Inits;
// List of clobbering instructions.
SmallVector<MachineInstr*, 8> Clobbers;
// List of instructions marked for deletion.
SmallSet<MachineInstr*, 8> MergedInstrs;

bool Changed = false;

for (auto &MI : MRI.def_instructions(Reg)) {
Expand Down Expand Up @@ -492,8 +506,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
MachineInstr *MI2 = *I2;

// Check any possible interference
auto intereferes = [&](MachineBasicBlock::iterator From,
MachineBasicBlock::iterator To) -> bool {
auto interferes = [&](MachineBasicBlock::iterator From,
MachineBasicBlock::iterator To) -> bool {

assert(MDT.dominates(&*To, &*From));

Expand Down Expand Up @@ -525,23 +539,23 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
};

if (MDT.dominates(MI1, MI2)) {
if (!intereferes(MI2, MI1)) {
if (!interferes(MI2, MI1)) {
LLVM_DEBUG(dbgs()
<< "Erasing from "
<< printMBBReference(*MI2->getParent()) << " " << *MI2);
MI2->eraseFromParent();
Defs.erase(I2++);
MergedInstrs.insert(MI2);
Changed = true;
++I2;
continue;
}
} else if (MDT.dominates(MI2, MI1)) {
if (!intereferes(MI1, MI2)) {
if (!interferes(MI1, MI2)) {
LLVM_DEBUG(dbgs()
<< "Erasing from "
<< printMBBReference(*MI1->getParent()) << " " << *MI1);
MI1->eraseFromParent();
Defs.erase(I1++);
MergedInstrs.insert(MI1);
Changed = true;
++I1;
break;
}
} else {
Expand All @@ -552,18 +566,18 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
continue;
}

MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
MachineBasicBlock::iterator I = getFirstNonPrologue(MBB, TII);
if (!interferes(MI1, I) && !interferes(MI2, I)) {
LLVM_DEBUG(dbgs()
<< "Erasing from "
<< printMBBReference(*MI1->getParent()) << " " << *MI1
<< "and moving from "
<< printMBBReference(*MI2->getParent()) << " to "
<< printMBBReference(*I->getParent()) << " " << *MI2);
I->getParent()->splice(I, MI2->getParent(), MI2);
MI1->eraseFromParent();
Defs.erase(I1++);
MergedInstrs.insert(MI1);
Changed = true;
++I1;
break;
}
}
Expand All @@ -573,6 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
}
}

for (auto MI : MergedInstrs)
MI->removeFromParent();

if (Changed)
MRI.clearKillFlags(Reg);

Expand Down Expand Up @@ -723,7 +740,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}

if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);

return true;
}
108 changes: 101 additions & 7 deletions llvm/test/CodeGen/AMDGPU/merge-m0.mir
Expand Up @@ -47,13 +47,7 @@
# GCN-NEXT: DS_WRITE_B32

---
name: test
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
name: merge-m0-many-init
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
Expand Down Expand Up @@ -129,3 +123,103 @@ body: |
S_BRANCH %bb.0.entry
...

# GCN: bb.0.entry:
# GCN: SI_INIT_M0 65536
# GCN-NEXT: DS_WRITE_B32

#GCN: bb.1:
#GCN-NOT: SI_INIT_M0 65536
#GCN-NOT: SI_INIT_M0 -1

#GCN: bb.2:
#GCN: SI_INIT_M0 -1

#GCN: bb.3:
#GCN: SI_INIT_M0 -1

---
name: merge-m0-dont-hoist-past-init-with-different-initializer
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
body: |
bb.0.entry:
successors: %bb.1
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
SI_INIT_M0 65536, implicit-def $m0
DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.2, %bb.3
SI_INIT_M0 65536, implicit-def $m0
DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
S_CBRANCH_VCCZ %bb.2, implicit undef $vcc
S_BRANCH %bb.3
bb.2:
successors: %bb.4
SI_INIT_M0 -1, implicit-def $m0
DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.4
bb.3:
successors: %bb.4
SI_INIT_M0 -1, implicit-def $m0
DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.4
bb.4:
S_ENDPGM 0
...

# GCN: bb.0.entry:
# GCN-NOT: SI_INIT_M0
# GCN: S_OR_B64
# GCN-NEXT: SI_INIT_M0

#GCN: bb.1:
#GCN-NOT: SI_INIT_M0 -1

#GCN: bb.2:
#GCN-NOT: SI_INIT_MO -1

---
name: merge-m0-after-prologue
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
body: |
bb.0.entry:
successors: %bb.1, %bb.2
liveins: $sgpr0_sgpr1
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
S_BRANCH %bb.2
bb.1:
successors: %bb.3
SI_INIT_M0 -1, implicit-def $m0
DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.3
bb.2:
successors: %bb.3
SI_INIT_M0 -1, implicit-def $m0
DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.3
bb.3:
S_ENDPGM 0
...

0 comments on commit 423b4a1

Please sign in to comment.