Skip to content

Commit

Permalink
Updates branch_weights annotation for call instructions during inlining.
Browse files Browse the repository at this point in the history
Summary: Inliner should update the branch_weights annotation to scale it to proper value.

Reviewers: davidxl, eraman

Reviewed By: eraman

Subscribers: zzheng, llvm-commits

Differential Revision: https://reviews.llvm.org/D30767

llvm-svn: 298270
  • Loading branch information
danielcdh committed Mar 20, 2017
1 parent 6c2949d commit e593049
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 11 deletions.
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/Instruction.h
Expand Up @@ -252,6 +252,9 @@ class Instruction : public User,
/// Returns false if no metadata was found.
bool extractProfTotalWeight(uint64_t &TotalVal) const;

/// Updates branch_weights metadata by scaling it by \p S / \p T.
void updateProfWeight(uint64_t S, uint64_t T);

/// Set the debug location information for this instruction.
void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); }

Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/IR/Instruction.cpp
Expand Up @@ -17,6 +17,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
using namespace llvm;
Expand Down Expand Up @@ -629,3 +630,25 @@ Instruction *Instruction::clone() const {
New->copyMetadata(*this);
return New;
}

void Instruction::updateProfWeight(uint64_t S, uint64_t T) {
auto *ProfileData = getMetadata(LLVMContext::MD_prof);
if (ProfileData == nullptr)
return;

auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
if (!ProfDataName || !ProfDataName->getString().equals("branch_weights"))
return;

SmallVector<uint32_t, 4> Weights;
for (unsigned i = 1; i < ProfileData->getNumOperands(); i++) {
// Using APInt::div may be expensive, but most cases should fit in 64 bits.
APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i))
->getValue()
.getZExtValue());
Val *= APInt(128, S);
Weights.push_back(Val.udiv(APInt(128, T)).getLimitedValue());
}
MDBuilder MDB(getContext());
setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
}
51 changes: 40 additions & 11 deletions llvm/lib/Transforms/Utils/InlineFunction.cpp
Expand Up @@ -25,6 +25,7 @@
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
Expand Down Expand Up @@ -1425,28 +1426,55 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
ClonedBBs);
}

/// Update the branch metadata for cloned call instructions.
static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
const Optional<uint64_t> &CalleeEntryCount,
const Instruction *TheCall) {
if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1)
return;
Optional<uint64_t> CallSiteCount =
ProfileSummaryInfo::getProfileCount(TheCall, nullptr);
uint64_t CallCount =
std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
CalleeEntryCount.getValue());

for (auto const &Entry : VMap)
if (isa<CallInst>(Entry.first) && &*Entry.second != nullptr &&
isa<CallInst>(Entry.second))
cast<CallInst>(Entry.second)
->updateProfWeight(CallCount, CalleeEntryCount.getValue());
for (BasicBlock &BB : *Callee)
// No need to update the callsite if it is pruned during inlining.
if (VMap.count(&BB))
for (Instruction &I : BB)
if (CallInst *CI = dyn_cast<CallInst>(&I))
CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount,
CalleeEntryCount.getValue());
}

/// Update the entry count of callee after inlining.
///
/// The callsite's block count is subtracted from the callee's function entry
/// count.
static void updateCalleeCount(BlockFrequencyInfo &CallerBFI, BasicBlock *CallBB,
Function *Callee) {
static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
Instruction *CallInst, Function *Callee) {
// If the callee has a original count of N, and the estimated count of
// callsite is M, the new callee count is set to N - M. M is estimated from
// the caller's entry count, its entry block frequency and the block frequency
// of the callsite.
Optional<uint64_t> CalleeCount = Callee->getEntryCount();
if (!CalleeCount)
if (!CalleeCount.hasValue())
return;
Optional<uint64_t> CallSiteCount = CallerBFI.getBlockProfileCount(CallBB);
if (!CallSiteCount)
Optional<uint64_t> CallCount =
ProfileSummaryInfo::getProfileCount(CallInst, CallerBFI);
if (!CallCount.hasValue())
return;
// Since CallSiteCount is an estimate, it could exceed the original callee
// count and has to be set to 0.
if (CallSiteCount.getValue() > CalleeCount.getValue())
if (CallCount.getValue() > CalleeCount.getValue())
Callee->setEntryCount(0);
else
Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue());
Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue());
}

/// This function inlines the called function into the basic block of the
Expand Down Expand Up @@ -1636,13 +1664,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;

if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) {
if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
// Update the BFI of blocks cloned into the caller.
updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
CalledFunc->front());
// Update the profile count of callee.
updateCalleeCount(*IFI.CallerBFI, OrigBB, CalledFunc);
}

updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall);
// Update the profile count of callee.
updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc);

// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
Expand Down
39 changes: 39 additions & 0 deletions llvm/test/Transforms/Inline/prof-update.ll
@@ -0,0 +1,39 @@
; RUN: opt < %s -inline -S | FileCheck %s
; Checks if inliner updates branch_weights annotation for call instructions.

declare void @ext();
declare void @ext1();

; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]]
define void @callee(i32 %n) !prof !1 {
%cond = icmp sle i32 %n, 10
br i1 %cond, label %cond_true, label %cond_false
cond_true:
; ext1 is optimized away, thus not updated.
; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]]
call void @ext1(), !prof !2
ret void
cond_false:
; ext is cloned and updated.
; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]]
call void @ext(), !prof !2
ret void
}

; CHECK: define void @caller()
define void @caller() {
; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]]
call void @callee(i32 15), !prof !3
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"MaxFunctionCount", i32 2000}
!1 = !{!"function_entry_count", i64 1000}
!2 = !{!"branch_weights", i64 2000}
!3 = !{!"branch_weights", i64 400}
attributes #0 = { alwaysinline }
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000}
; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200}
; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800}

0 comments on commit e593049

Please sign in to comment.