Skip to content

Commit

Permalink
[SDA] Bug fix: Use IPD outside the loop as divergence bound
Browse files Browse the repository at this point in the history
Summary:
The immediate post dominator of the loop header may be part of the divergent loop.
Since this /was/ the divergence propagation bound the SDA would not detect joins of divergent paths outside the loop.

Reviewers: nhaehnle

Reviewed By: nhaehnle

Subscribers: mmasten, arsenm, jvesely, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59042

llvm-svn: 358681
  • Loading branch information
nhaehnle committed Apr 18, 2019
1 parent b2c9fc0 commit 523f90a
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 9 deletions.
28 changes: 19 additions & 9 deletions llvm/lib/Analysis/SyncDependenceAnalysis.cpp
Expand Up @@ -218,14 +218,9 @@ struct DivergencePropagator {
template <typename SuccessorIterable>
std::unique_ptr<ConstBlockSet>
computeJoinPoints(const BasicBlock &RootBlock,
SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
assert(JoinBlocks);

// immediate post dominator (no join block beyond that block)
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
const auto *IpdNode = PdNode->getIDom();
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;

// bootstrap with branch targets
for (const auto *SuccBlock : NodeSuccessors) {
DefMap.emplace(SuccBlock, SuccBlock);
Expand Down Expand Up @@ -340,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {

// already available in cache?
auto ItCached = CachedLoopExitJoins.find(&Loop);
if (ItCached != CachedLoopExitJoins.end())
if (ItCached != CachedLoopExitJoins.end()) {
return *ItCached->second;
}

// dont propagte beyond the immediate post dom of the loop
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
const auto *IpdNode = PdNode->getIDom();
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
IpdNode = IpdNode->getIDom();
PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
}

// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
*Loop.getHeader(), LoopExits, Loop.getParentLoop());
*Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);

auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
assert(ItInserted.second);
Expand All @@ -365,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
if (ItCached != CachedBranchJoins.end())
return *ItCached->second;

// dont propagate beyond the immediate post dominator of the branch
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
const auto *IpdNode = PdNode->getIDom();
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;

// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
const auto &TermBlock = *Term.getParent();
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);

auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
assert(ItInserted.second);
Expand Down
37 changes: 37 additions & 0 deletions llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
Expand Up @@ -21,6 +21,43 @@ merge:
ret void
}

define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.var = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %cond.var = icmp
%cond.uni = icmp slt i32 %n, 0
; CHECK-NOT: DIVERGENT: %cond.uni = icmp
br label %for.header
for.header:
br i1 %cond.var, label %A, label %B
A:
br label %C
B:
br label %C
C:
br i1 %cond.uni, label %E, label %D
D:
br i1 %cond.var, label %for.header, label %F

E:
%e.lcssa.uni = phi i32 [ 0, %C ]
; CHECK-NOT: DIVERGENT: %e.lcssa.uni = phi i32
br label %G

F:
%f.lcssa.uni = phi i32 [ 1, %D ]
; CHECK-NOT: DIVERGENT: %f.lcssa.uni = phi i32
br label %G

G:
%g.join.var = phi i32 [ %e.lcssa.uni, %E ], [ %f.lcssa.uni, %F ]
; CHECK: DIVERGENT: %g.join.var = phi i32
ret void
}


declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }

0 comments on commit 523f90a

Please sign in to comment.