Skip to content

Commit

Permalink
Irreducible loop metadata for more accurate block frequency under PGO.
Browse files Browse the repository at this point in the history
Summary:
Currently the block frequency analysis is an approximation for irreducible
loops.

The new irreducible loop metadata is used to annotate the irreducible loop
headers with their header weights based on the PGO profile (currently this is
approximated to be evenly weighted) and to help improve the accuracy of the
block frequency analysis for irreducible loops.

This patch is a basic support for this.

Reviewers: davidxl

Reviewed By: davidxl

Subscribers: mehdi_amini, llvm-commits, eraman

Differential Revision: https://reviews.llvm.org/D39028

llvm-svn: 317278
  • Loading branch information
hjyamauchi committed Nov 2, 2017
1 parent 64b6e5a commit dce9def
Show file tree
Hide file tree
Showing 21 changed files with 600 additions and 10 deletions.
23 changes: 23 additions & 0 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5194,6 +5194,29 @@ the loop identifier metadata node directly:
!1 = !{!1} ; an identifier for the inner loop
!2 = !{!2} ; an identifier for the outer loop

'``irr_loop``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^

``irr_loop`` metadata may be attached to the terminator instruction of a basic
block that's an irreducible loop header (note that an irreducible loop has more
than once header basic blocks.) If ``irr_loop`` metadata is attached to the
terminator instruction of a basic block that is not really an irreducible loop
header, the behavior is undefined. The intent of this metadata is to improve the
accuracy of the block frequency propagation. For example, in the code below, the
block ``header0`` may have a loop header weight (relative to the other headers of
the irreducible loop) of 100:

.. code-block:: llvm

header0:
...
br i1 %cmp, label %t1, label %t2, !irr_loop !0

...
!0 = !{"loop_header_weight", i64 100}

Irreducible loop header weights are typically based on profile data.

'``invariant.group``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/BlockFrequencyInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ class BlockFrequencyInfo {
/// the enclosing function's count (if available) and returns the value.
Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const;

/// \brief Returns true if \p BB is an irreducible loop header
/// block. Otherwise false.
bool isIrrLoopHeader(const BasicBlock *BB);

// Set the frequency of the given basic block.
void setBlockFreq(const BasicBlock *BB, uint64_t Freq);

Expand Down
49 changes: 43 additions & 6 deletions llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/BasicBlock.h"
Expand Down Expand Up @@ -414,6 +415,10 @@ class BlockFrequencyInfoImplBase {
/// \brief Data about each block. This is used downstream.
std::vector<FrequencyData> Freqs;

/// \brief Whether each block is an irreducible loop header.
/// This is used downstream.
SparseBitVector<> IsIrrLoopHeader;

/// \brief Loop data: see initializeLoops().
std::vector<WorkingData> Working;

Expand Down Expand Up @@ -492,6 +497,8 @@ class BlockFrequencyInfoImplBase {
/// the backedges going into each of the loop headers.
void adjustLoopHeaderMass(LoopData &Loop);

void distributeIrrLoopHeaderMass(Distribution &Dist);

/// \brief Package up a loop.
void packageLoop(LoopData &Loop);

Expand Down Expand Up @@ -520,6 +527,7 @@ class BlockFrequencyInfoImplBase {
const BlockNode &Node) const;
Optional<uint64_t> getProfileCountFromFreq(const Function &F,
uint64_t Freq) const;
bool isIrrLoopHeader(const BlockNode &Node);

void setBlockFreq(const BlockNode &Node, uint64_t Freq);

Expand Down Expand Up @@ -973,6 +981,10 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq);
}

bool isIrrLoopHeader(const BlockT *BB) {
return BlockFrequencyInfoImplBase::isIrrLoopHeader(getNode(BB));
}

void setBlockFreq(const BlockT *BB, uint64_t Freq);

Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
Expand Down Expand Up @@ -1140,17 +1152,39 @@ bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n");

if (Loop.isIrreducible()) {
BlockMass Remaining = BlockMass::getFull();
DEBUG(dbgs() << "isIrreducible = true\n");
Distribution Dist;
unsigned NumHeadersWithWeight = 0;
for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
auto &Mass = Working[Loop.Nodes[H].Index].getMass();
Mass = Remaining * BranchProbability(1, Loop.NumHeaders - H);
Remaining -= Mass;
auto &HeaderNode = Loop.Nodes[H];
const BlockT *Block = getBlock(HeaderNode);
IsIrrLoopHeader.set(Loop.Nodes[H].Index);
Optional<uint64_t> HeaderWeight = Block->getIrrLoopHeaderWeight();
if (!HeaderWeight)
continue;
DEBUG(dbgs() << getBlockName(HeaderNode)
<< " has irr loop header weight " << HeaderWeight.getValue()
<< "\n");
NumHeadersWithWeight++;
uint64_t HeaderWeightValue = HeaderWeight.getValue();
if (HeaderWeightValue)
Dist.addLocal(HeaderNode, HeaderWeightValue);
}
if (NumHeadersWithWeight != Loop.NumHeaders) {
// Not all headers have a weight metadata. Distribute weight evenly.
Dist = Distribution();
for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
auto &HeaderNode = Loop.Nodes[H];
Dist.addLocal(HeaderNode, 1);
}
}
distributeIrrLoopHeaderMass(Dist);
for (const BlockNode &M : Loop.Nodes)
if (!propagateMassToSuccessors(&Loop, M))
llvm_unreachable("unhandled irreducible control flow");

adjustLoopHeaderMass(Loop);
if (NumHeadersWithWeight != Loop.NumHeaders)
// Not all headers have a weight metadata. Adjust header mass.
adjustLoopHeaderMass(Loop);
} else {
Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
Expand Down Expand Up @@ -1285,6 +1319,9 @@ raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const {
BlockFrequencyInfoImplBase::getBlockProfileCount(
*F->getFunction(), getNode(&BB)))
OS << ", count = " << ProfileCount.getValue();
if (Optional<uint64_t> IrrLoopHeaderWeight =
BB.getIrrLoopHeaderWeight())
OS << ", irr_loop_header_weight = " << IrrLoopHeaderWeight.getValue();
OS << "\n";
}

Expand Down
10 changes: 10 additions & 0 deletions llvm/include/llvm/CodeGen/MachineBasicBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class MachineBasicBlock
using const_probability_iterator =
std::vector<BranchProbability>::const_iterator;

Optional<uint64_t> IrrLoopHeaderWeight;

/// Keep track of the physical registers that are livein of the basicblock.
using LiveInVector = std::vector<RegisterMaskPair>;
LiveInVector LiveIns;
Expand Down Expand Up @@ -729,6 +731,14 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
MCSymbol *getSymbol() const;

Optional<uint64_t> getIrrLoopHeaderWeight() const {
return IrrLoopHeaderWeight;
}

void setIrrLoopHeaderWeight(uint64_t Weight) {
IrrLoopHeaderWeight = Weight;
}

private:
/// Return probability iterator corresponding to the I successor iterator.
probability_iterator getProbabilityIterator(succ_iterator I);
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ class MachineBlockFrequencyInfo : public MachineFunctionPass {
Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const;
Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const;

bool isIrrLoopHeader(const MachineBasicBlock *MBB);

const MachineFunction *getFunction() const;
const MachineBranchProbabilityInfo *getMBPI() const;
void view(const Twine &Name, bool isSimple = true) const;
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/BasicBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,8 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
/// \brief Return true if it is legal to hoist instructions into this block.
bool isLegalToHoistInto() const;

Optional<uint64_t> getIrrLoopHeaderWeight() const;

private:
/// \brief Increment the internal refcount of the number of BlockAddresses
/// referencing this BasicBlock by \p Amt.
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/LLVMContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class LLVMContext {
MD_absolute_symbol = 21, // "absolute_symbol"
MD_associated = 22, // "associated"
MD_callees = 23, // "callees"
MD_irr_loop = 24, // "irr_loop"
};

/// Known operand bundle tag IDs, which always have the same value. All
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/MDBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ class MDBuilder {
/// base type, access type and offset relative to the base type.
MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
uint64_t Offset, bool IsConstant = false);

/// \brief Return metadata containing an irreducible loop header weight.
MDNode *createIrrLoopHeaderWeight(uint64_t Weight);
};

} // end namespace llvm
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Transforms/PGOInstrumentation.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class PGOMemOPSizeOpt : public PassInfoMixin<PGOMemOPSizeOpt> {
void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts,
uint64_t MaxCount);

void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count);

} // end namespace llvm

#endif // LLVM_TRANSFORMS_PGOINSTRUMENTATION_H
5 changes: 5 additions & 0 deletions llvm/lib/Analysis/BlockFrequencyInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,11 @@ BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
return BFI->getProfileCountFromFreq(*getFunction(), Freq);
}

bool BlockFrequencyInfo::isIrrLoopHeader(const BasicBlock *BB) {
assert(BFI && "Expected analysis to be available");
return BFI->isIrrLoopHeader(BB);
}

void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) {
assert(BFI && "Expected analysis to be available");
BFI->setBlockFreq(BB, Freq);
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ void BlockFrequencyInfoImplBase::clear() {
// Swap with a default-constructed std::vector, since std::vector<>::clear()
// does not actually clear heap storage.
std::vector<FrequencyData>().swap(Freqs);
IsIrrLoopHeader.clear();
std::vector<WorkingData>().swap(Working);
Loops.clear();
}
Expand All @@ -280,8 +281,10 @@ void BlockFrequencyInfoImplBase::clear() {
/// Releases all memory not used downstream. In particular, saves Freqs.
static void cleanup(BlockFrequencyInfoImplBase &BFI) {
std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
BFI.clear();
BFI.Freqs = std::move(SavedFreqs);
BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
}

bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
Expand Down Expand Up @@ -572,6 +575,13 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
return BlockCount.getLimitedValue();
}

bool
BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) {
if (!Node.isValid())
return false;
return IsIrrLoopHeader.test(Node.Index);
}

Scaled64
BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
if (!Node.isValid())
Expand Down Expand Up @@ -819,3 +829,14 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
}
}

void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) {
BlockMass LoopMass = BlockMass::getFull();
DitheringDistributer D(Dist, LoopMass);
for (const Weight &W : Dist.Weights) {
BlockMass Taken = D.takeMass(W.Amount);
assert(W.Type == Weight::Local && "all weights should be local");
Working[W.TargetNode.Index].getMass() = Taken;
DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
}
}
8 changes: 8 additions & 0 deletions llvm/lib/CodeGen/MachineBasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ using namespace llvm;
MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
: BB(B), Number(-1), xParent(&MF) {
Insts.Parent = this;
if (B)
IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight();
}

MachineBasicBlock::~MachineBasicBlock() {
Expand Down Expand Up @@ -338,6 +340,12 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
OS << '\n';
}
if (IrrLoopHeaderWeight) {
if (Indexes) OS << '\t';
OS << " Irreducible loop header weight: "
<< IrrLoopHeaderWeight.getValue();
OS << '\n';
}
}

void MachineBasicBlock::printAsOperand(raw_ostream &OS,
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,12 @@ MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None;
}

bool
MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) {
assert(MBFI && "Expected analysis to be available");
return MBFI->isIrrLoopHeader(MBB);
}

const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
return MBFI ? MBFI->getFunction() : nullptr;
}
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/IR/BasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,16 @@ bool BasicBlock::isLandingPad() const {
const LandingPadInst *BasicBlock::getLandingPadInst() const {
return dyn_cast<LandingPadInst>(getFirstNonPHI());
}

Optional<uint64_t> BasicBlock::getIrrLoopHeaderWeight() const {
const TerminatorInst *TI = getTerminator();
if (MDNode *MDIrrLoopHeader =
TI->getMetadata(LLVMContext::MD_irr_loop)) {
MDString *MDName = cast<MDString>(MDIrrLoopHeader->getOperand(0));
if (MDName->getString().equals("loop_header_weight")) {
auto *CI = mdconst::extract<ConstantInt>(MDIrrLoopHeader->getOperand(1));
return Optional<uint64_t>(CI->getValue().getZExtValue());
}
}
return Optional<uint64_t>();
}
1 change: 1 addition & 0 deletions llvm/lib/IR/LLVMContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
{MD_absolute_symbol, "absolute_symbol"},
{MD_associated, "associated"},
{MD_callees, "callees"},
{MD_irr_loop, "irr_loop"},
};

for (auto &MDKind : MDKinds) {
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/IR/MDBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,10 @@ MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
}
return MDNode::get(Context, {BaseType, AccessType, createConstant(Off)});
}

MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) {
SmallVector<Metadata *, 2> Vals(2);
Vals[0] = createString("loop_header_weight");
Vals[1] = createConstant(ConstantInt::get(Type::getInt64Ty(Context), Weight));
return MDNode::get(Context, Vals);
}

0 comments on commit dce9def

Please sign in to comment.