Skip to content

Commit

Permalink
[DivergenceAnalysis] Add methods for querying divergence at use
Browse files Browse the repository at this point in the history
Summary:
The existing isDivergent(Value) methods query whether a value is
divergent at its definition. However even if a value is uniform at its
definition, a use of it in another basic block can be divergent because
of divergent control flow between the def and the use.

This patch adds new isDivergent(Use) methods to DivergenceAnalysis,
LegacyDivergenceAnalysis and GPUDivergenceAnalysis.

This might allow D63953 or other similar workarounds to be removed.

Reviewers: alex-t, nhaehnle, arsenm, rtaylor, rampitec, simoll, jingyue

Reviewed By: nhaehnle

Subscribers: jfb, jvesely, wdng, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D65141

llvm-svn: 367218
  • Loading branch information
jayfoad committed Jul 29, 2019
1 parent a4f08dd commit dcb7532
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 20 deletions.
16 changes: 13 additions & 3 deletions llvm/include/llvm/Analysis/DivergenceAnalysis.h
Expand Up @@ -73,9 +73,12 @@ class DivergenceAnalysis {
/// operands
bool isAlwaysUniform(const Value &Val) const;

/// \brief Whether \p Val is a divergent value
/// \brief Whether \p Val is divergent at its definition.
bool isDivergent(const Value &Val) const;

/// \brief Whether \p U is divergent. Uses of a uniform value can be divergent.
bool isDivergentUse(const Use &U) const;

void print(raw_ostream &OS, const Module *) const;

private:
Expand Down Expand Up @@ -189,12 +192,19 @@ class GPUDivergenceAnalysis {
/// The GPU kernel this analysis result is for
const Function &getFunction() const { return DA.getFunction(); }

/// Whether \p V is divergent.
/// Whether \p V is divergent at its definition.
bool isDivergent(const Value &V) const;

/// Whether \p V is uniform/non-divergent
/// Whether \p U is divergent. Uses of a uniform value can be divergent.
bool isDivergentUse(const Use &U) const;

/// Whether \p V is uniform/non-divergent.
bool isUniform(const Value &V) const { return !isDivergent(V); }

/// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
/// divergent.
bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }

/// Print all divergent values in the kernel.
void print(raw_ostream &OS, const Module *) const;
};
Expand Down
16 changes: 10 additions & 6 deletions llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
Expand Up @@ -39,17 +39,18 @@ class LegacyDivergenceAnalysis : public FunctionPass {
void print(raw_ostream &OS, const Module *) const override;

// Returns true if V is divergent at its definition.
//
// Even if this function returns false, V may still be divergent when used
// in a different basic block.
bool isDivergent(const Value *V) const;

// Returns true if U is divergent. Uses of a uniform value can be divergent.
bool isDivergentUse(const Use *U) const;

// Returns true if V is uniform/non-divergent.
//
// Even if this function returns true, V may still be divergent when used
// in a different basic block.
bool isUniform(const Value *V) const { return !isDivergent(V); }

// Returns true if U is uniform/non-divergent. Uses of a uniform value can be
// divergent.
bool isUniformUse(const Use *U) const { return !isDivergentUse(U); }

// Keep the analysis results uptodate by removing an erased value.
void removeValue(const Value *V) { DivergentValues.erase(V); }

Expand All @@ -62,6 +63,9 @@ class LegacyDivergenceAnalysis : public FunctionPass {

// Stores all divergent values.
DenseSet<const Value *> DivergentValues;

// Stores divergent uses of possibly uniform values.
DenseSet<const Use *> DivergentUses;
};
} // End llvm namespace

Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Analysis/DivergenceAnalysis.cpp
Expand Up @@ -412,6 +412,12 @@ bool DivergenceAnalysis::isDivergent(const Value &V) const {
return DivergentValues.find(&V) != DivergentValues.end();
}

bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
Value &V = *U.get();
Instruction &I = *cast<Instruction>(U.getUser());
return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
}

void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if (DivergentValues.empty())
return;
Expand Down Expand Up @@ -449,6 +455,10 @@ bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
return DA.isDivergent(val);
}

bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
return DA.isDivergentUse(use);
}

void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
DA.print(OS, mod);
Expand Down
29 changes: 22 additions & 7 deletions llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
Expand Up @@ -93,8 +93,9 @@ namespace {
class DivergencePropagator {
public:
DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
PostDominatorTree &PDT, DenseSet<const Value *> &DV)
: F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
PostDominatorTree &PDT, DenseSet<const Value *> &DV,
DenseSet<const Use *> &DU)
: F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
void populateWithSourcesOfDivergence();
void propagate();

Expand All @@ -118,11 +119,14 @@ class DivergencePropagator {
PostDominatorTree &PDT;
std::vector<Value *> Worklist; // Stack for DFS.
DenseSet<const Value *> &DV; // Stores all divergent values.
DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform
// values.
};

void DivergencePropagator::populateWithSourcesOfDivergence() {
Worklist.clear();
DV.clear();
DU.clear();
for (auto &I : instructions(F)) {
if (TTI.isSourceOfDivergence(&I)) {
Worklist.push_back(&I);
Expand Down Expand Up @@ -197,8 +201,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
// dominators of TI until it is outside the influence region.
BasicBlock *InfluencedBB = ThisBB;
while (InfluenceRegion.count(InfluencedBB)) {
for (auto &I : *InfluencedBB)
findUsersOutsideInfluenceRegion(I, InfluenceRegion);
for (auto &I : *InfluencedBB) {
if (!DV.count(&I))
findUsersOutsideInfluenceRegion(I, InfluenceRegion);
}
DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
if (IDomNode == nullptr)
break;
Expand All @@ -208,9 +214,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {

void DivergencePropagator::findUsersOutsideInfluenceRegion(
Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
for (User *U : I.users()) {
Instruction *UserInst = cast<Instruction>(U);
for (Use &Use : I.uses()) {
Instruction *UserInst = cast<Instruction>(Use.getUser());
if (!InfluenceRegion.count(UserInst->getParent())) {
DU.insert(&Use);
if (DV.insert(UserInst).second)
Worklist.push_back(UserInst);
}
Expand Down Expand Up @@ -320,6 +327,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
return false;

DivergentValues.clear();
DivergentUses.clear();
gpuDA = nullptr;

auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
Expand All @@ -332,7 +340,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {

} else {
// run LLVM's existing DivergenceAnalysis
DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
DP.populateWithSourcesOfDivergence();
DP.propagate();
}
Expand All @@ -351,6 +359,13 @@ bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
return DivergentValues.count(V);
}

bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const {
if (gpuDA) {
return gpuDA->isDivergentUse(*U);
}
return DivergentValues.count(U->get()) || DivergentUses.count(U);
}

void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
return;
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
Expand Up @@ -142,11 +142,11 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {

// If the pointer operand is divergent, then each lane is doing an atomic
// operation on a different address, and we cannot optimize that.
if (DA->isDivergent(I.getOperand(PtrIdx))) {
if (DA->isDivergentUse(&I.getOperandUse(PtrIdx))) {
return;
}

const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));

// If the value operand is divergent, each lane is contributing a different
// value to the atomic calculation. We can only optimize divergent values if
Expand Down Expand Up @@ -219,7 +219,7 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {

const unsigned ValIdx = 0;

const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));

// If the value operand is divergent, each lane is contributing a different
// value to the atomic calculation. We can only optimize divergent values if
Expand All @@ -232,7 +232,7 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
// If any of the other arguments to the intrinsic are divergent, we can't
// optimize the operation.
for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) {
if (DA->isDivergent(I.getOperand(Idx))) {
if (DA->isDivergentUse(&I.getOperandUse(Idx))) {
return;
}
}
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/AMDGPU/divergence-at-use.ll
@@ -0,0 +1,20 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s -use-gpu-divergence-analysis | FileCheck %s

@local = addrspace(3) global i32 undef

define void @reducible(i32 %x) {
; CHECK-LABEL: reducible:
; CHECK-NOT: dpp
entry:
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %i1, %loop ]
%gep = getelementptr i32, i32 addrspace(3)* @local, i32 %i
%cond = icmp ult i32 %i, %x
%i1 = add i32 %i, 1
br i1 %cond, label %loop, label %exit
exit:
%old = atomicrmw add i32 addrspace(3)* %gep, i32 %x acq_rel
ret void
}

0 comments on commit dcb7532

Please sign in to comment.