Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

CodeGen: Initial instrumentation based PGO implementation

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@198640 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information...
commit e0e019f6a2d76e140221d9c10159a444839e2563 1 parent f4c386b
@bogner bogner authored
View
3  lib/CodeGen/CGCUDARuntime.cpp
@@ -31,7 +31,8 @@ RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end");
CodeGenFunction::ConditionalEvaluation eval(CGF);
- CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock);
+ CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock,
+ /*TrueCount=*/0);
eval.begin(CGF);
CGF.EmitBlock(ConfigOKBlock);
View
1  lib/CodeGen/CGCall.cpp
@@ -2184,6 +2184,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
call->setCallingConv(getRuntimeCC());
Builder.CreateUnreachable();
}
+ PGO.setCurrentRegionCount(0);
}
/// Emits a call or invoke instruction to the given nullary runtime
View
6 lib/CodeGen/CGException.cpp
@@ -1294,6 +1294,10 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
// Initialize the catch variable and set up the cleanups.
BeginCatch(*this, C);
+ // Emit the PGO counter increment
+ RegionCounter CatchCnt = getPGORegionCounter(C);
+ CatchCnt.beginRegion(Builder);
+
// Perform the body of the catch.
EmitStmt(C->getHandlerBlock());
@@ -1320,7 +1324,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
Builder.CreateBr(ContBB);
}
+ RegionCounter ContCnt = getPGORegionCounter(&S);
EmitBlock(ContBB);
+ ContCnt.beginRegion(Builder);
}
namespace {
View
14 lib/CodeGen/CGExpr.cpp
@@ -2651,6 +2651,7 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
}
OpaqueValueMapping binding(*this, expr);
+ RegionCounter Cnt = getPGORegionCounter(expr);
const Expr *condExpr = expr->getCond();
bool CondExprBool;
@@ -2658,8 +2659,12 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
const Expr *live = expr->getTrueExpr(), *dead = expr->getFalseExpr();
if (!CondExprBool) std::swap(live, dead);
- if (!ContainsLabel(dead))
+ if (!ContainsLabel(dead)) {
+ // If the true case is live, we need to track its region
+ if (CondExprBool)
+ Cnt.beginRegion(Builder);
return EmitLValue(live);
+ }
}
llvm::BasicBlock *lhsBlock = createBasicBlock("cond.true");
@@ -2667,13 +2672,15 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
llvm::BasicBlock *contBlock = createBasicBlock("cond.end");
ConditionalEvaluation eval(*this);
- EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock);
+ EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, Cnt.getCount());
// Any temporaries created here are conditional.
EmitBlock(lhsBlock);
+ Cnt.beginRegion(Builder);
eval.begin(*this);
LValue lhs = EmitLValue(expr->getTrueExpr());
eval.end(*this);
+ Cnt.adjustFallThroughCount();
if (!lhs.isSimple())
return EmitUnsupportedLValue(expr, "conditional operator");
@@ -2683,14 +2690,17 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
// Any temporaries created here are conditional.
EmitBlock(rhsBlock);
+ Cnt.beginElseRegion();
eval.begin(*this);
LValue rhs = EmitLValue(expr->getFalseExpr());
eval.end(*this);
+ Cnt.adjustFallThroughCount();
if (!rhs.isSimple())
return EmitUnsupportedLValue(expr, "conditional operator");
rhsBlock = Builder.GetInsertBlock();
EmitBlock(contBlock);
+ Cnt.applyAdjustmentsToRegion();
llvm::PHINode *phi = Builder.CreatePHI(lhs.getAddress()->getType(), 2,
"cond-lvalue");
View
8 lib/CodeGen/CGExprAgg.cpp
@@ -892,15 +892,18 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// Bind the common expression if necessary.
CodeGenFunction::OpaqueValueMapping binding(CGF, E);
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
CodeGenFunction::ConditionalEvaluation eval(CGF);
- CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
+ CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
// Save whether the destination's lifetime is externally managed.
bool isExternallyDestructed = Dest.isExternallyDestructed();
eval.begin(CGF);
CGF.EmitBlock(LHSBlock);
+ Cnt.beginRegion(Builder);
Visit(E->getTrueExpr());
+ Cnt.adjustFallThroughCount();
eval.end(CGF);
assert(CGF.HaveInsertPoint() && "expression evaluation ended with no IP!");
@@ -914,10 +917,13 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
eval.begin(CGF);
CGF.EmitBlock(RHSBlock);
+ Cnt.beginElseRegion();
Visit(E->getFalseExpr());
+ Cnt.adjustFallThroughCount();
eval.end(CGF);
CGF.EmitBlock(ContBlock);
+ Cnt.applyAdjustmentsToRegion();
}
void AggExprEmitter::VisitChooseExpr(const ChooseExpr *CE) {
View
8 lib/CodeGen/CGExprComplex.cpp
@@ -752,22 +752,28 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// Bind the common expression if necessary.
CodeGenFunction::OpaqueValueMapping binding(CGF, E);
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
CodeGenFunction::ConditionalEvaluation eval(CGF);
- CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
+ CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
eval.begin(CGF);
CGF.EmitBlock(LHSBlock);
+ Cnt.beginRegion(Builder);
ComplexPairTy LHS = Visit(E->getTrueExpr());
+ Cnt.adjustFallThroughCount();
LHSBlock = Builder.GetInsertBlock();
CGF.EmitBranch(ContBlock);
eval.end(CGF);
eval.begin(CGF);
CGF.EmitBlock(RHSBlock);
+ Cnt.beginElseRegion();
ComplexPairTy RHS = Visit(E->getFalseExpr());
+ Cnt.adjustFallThroughCount();
RHSBlock = Builder.GetInsertBlock();
CGF.EmitBlock(ContBlock);
eval.end(CGF);
+ Cnt.applyAdjustmentsToRegion();
// Create a PHI node for the real part.
llvm::PHINode *RealPN = Builder.CreatePHI(LHS.first->getType(), 2, "cond.r");
View
37 lib/CodeGen/CGExprScalar.cpp
@@ -2874,8 +2874,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
}
Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
+
// Perform vector logical and on comparisons with zero vectors.
if (E->getType()->isVectorType()) {
+ Cnt.beginRegion(Builder);
+
Value *LHS = Visit(E->getLHS());
Value *RHS = Visit(E->getRHS());
Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@@ -2897,6 +2901,8 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
bool LHSCondVal;
if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
if (LHSCondVal) { // If we have 1 && X, just emit X.
+ Cnt.beginRegion(Builder);
+
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
// ZExt result to int or bool.
return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "land.ext");
@@ -2913,7 +2919,7 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
CodeGenFunction::ConditionalEvaluation eval(CGF);
// Branch on the LHS first. If it is false, go to the failure (cont) block.
- CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock);
+ CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock, Cnt.getCount());
// Any edges into the ContBlock are now from an (indeterminate number of)
// edges from this first condition. All of these values will be false. Start
@@ -2926,7 +2932,9 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
eval.begin(CGF);
CGF.EmitBlock(RHSBlock);
+ Cnt.beginRegion(Builder);
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
+ Cnt.adjustFallThroughCount();
eval.end(CGF);
// Reaquire the RHS block, as there may be subblocks inserted.
@@ -2939,14 +2947,19 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
CGF.EmitBlock(ContBlock);
PN->addIncoming(RHSCond, RHSBlock);
+ Cnt.applyAdjustmentsToRegion();
// ZExt result to int.
return Builder.CreateZExtOrBitCast(PN, ResTy, "land.ext");
}
Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
+
// Perform vector logical or on comparisons with zero vectors.
if (E->getType()->isVectorType()) {
+ Cnt.beginRegion(Builder);
+
Value *LHS = Visit(E->getLHS());
Value *RHS = Visit(E->getRHS());
Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@@ -2968,6 +2981,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
bool LHSCondVal;
if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
if (!LHSCondVal) { // If we have 0 || X, just emit X.
+ Cnt.beginRegion(Builder);
+
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
// ZExt result to int or bool.
return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "lor.ext");
@@ -2984,7 +2999,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
CodeGenFunction::ConditionalEvaluation eval(CGF);
// Branch on the LHS first. If it is true, go to the success (cont) block.
- CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock);
+ CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock,
+ Cnt.getParentCount() - Cnt.getCount());
// Any edges into the ContBlock are now from an (indeterminate number of)
// edges from this first condition. All of these values will be true. Start
@@ -2999,7 +3015,9 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
// Emit the RHS condition as a bool value.
CGF.EmitBlock(RHSBlock);
+ Cnt.beginRegion(Builder);
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
+ Cnt.adjustFallThroughCount();
eval.end(CGF);
@@ -3010,6 +3028,7 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
// into the phi node for the edge with the value of RHSCond.
CGF.EmitBlock(ContBlock);
PN->addIncoming(RHSCond, RHSBlock);
+ Cnt.applyAdjustmentsToRegion();
// ZExt result to int.
return Builder.CreateZExtOrBitCast(PN, ResTy, "lor.ext");
@@ -3049,6 +3068,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// Bind the common expression if necessary.
CodeGenFunction::OpaqueValueMapping binding(CGF, E);
+ RegionCounter Cnt = CGF.getPGORegionCounter(E);
Expr *condExpr = E->getCond();
Expr *lhsExpr = E->getTrueExpr();
@@ -3063,6 +3083,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// If the dead side doesn't have labels we need, just emit the Live part.
if (!CGF.ContainsLabel(dead)) {
+ if (CondExprBool)
+ Cnt.beginRegion(Builder);
Value *Result = Visit(live);
// If the live part is a throw expression, it acts like it has a void
@@ -3079,6 +3101,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// the select function.
if (CGF.getLangOpts().OpenCL
&& condExpr->getType()->isVectorType()) {
+ Cnt.beginRegion(Builder);
+
llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
llvm::Value *LHS = Visit(lhsExpr);
llvm::Value *RHS = Visit(rhsExpr);
@@ -3122,6 +3146,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// safe to evaluate the LHS and RHS unconditionally.
if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
+ Cnt.beginRegion(Builder);
+
llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
llvm::Value *LHS = Visit(lhsExpr);
llvm::Value *RHS = Visit(rhsExpr);
@@ -3138,23 +3164,28 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end");
CodeGenFunction::ConditionalEvaluation eval(CGF);
- CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock);
+ CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock, Cnt.getCount());
CGF.EmitBlock(LHSBlock);
+ Cnt.beginRegion(Builder);
eval.begin(CGF);
Value *LHS = Visit(lhsExpr);
eval.end(CGF);
+ Cnt.adjustFallThroughCount();
LHSBlock = Builder.GetInsertBlock();
Builder.CreateBr(ContBlock);
CGF.EmitBlock(RHSBlock);
+ Cnt.beginElseRegion();
eval.begin(CGF);
Value *RHS = Visit(rhsExpr);
eval.end(CGF);
+ Cnt.adjustFallThroughCount();
RHSBlock = Builder.GetInsertBlock();
CGF.EmitBlock(ContBlock);
+ Cnt.applyAdjustmentsToRegion();
// If the LHS or RHS is a throw expression, it will be legitimately null.
if (!LHS)
View
9 lib/CodeGen/CGObjC.cpp
@@ -1523,10 +1523,13 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
llvm::Value *initialMutations =
Builder.CreateLoad(StateMutationsPtr, "forcoll.initial-mutations");
+ RegionCounter Cnt = getPGORegionCounter(&S);
+
// Start looping. This is the point we return to whenever we have a
// fresh, non-empty batch of objects.
llvm::BasicBlock *LoopBodyBB = createBasicBlock("forcoll.loopbody");
EmitBlock(LoopBodyBB);
+ Cnt.beginRegion(Builder);
// The current index into the buffer.
llvm::PHINode *index = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.index");
@@ -1623,7 +1626,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
EmitAutoVarCleanups(variable);
// Perform the loop body, setting up break and continue labels.
- BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody));
+ BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody, &Cnt));
{
RunCleanupsScope Scope(*this);
EmitStmt(S.getBody());
@@ -1642,6 +1645,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
llvm::Value *indexPlusOne
= Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1));
+ // TODO: We should probably model this as a "continue" for PGO
// If we haven't overrun the buffer yet, we can continue.
Builder.CreateCondBr(Builder.CreateICmpULT(indexPlusOne, count),
LoopBodyBB, FetchMoreBB);
@@ -1665,6 +1669,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
index->addIncoming(zero, Builder.GetInsertBlock());
count->addIncoming(refetchCount, Builder.GetInsertBlock());
+ // TODO: We should be applying PGO weights here, but this needs to handle the
+ // branch before FetchMoreBB or we risk getting the numbers wrong.
Builder.CreateCondBr(Builder.CreateICmpEQ(refetchCount, zero),
EmptyBB, LoopBodyBB);
@@ -1687,6 +1693,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
PopCleanupBlock();
EmitBlock(LoopEnd.getBlock());
+ // TODO: Once we calculate PGO weights above, set the region count here
}
void CodeGenFunction::EmitObjCAtTryStmt(const ObjCAtTryStmt &S) {
View
227 lib/CodeGen/CGStmt.cpp
@@ -358,7 +358,9 @@ void CodeGenFunction::EmitLabel(const LabelDecl *D) {
ResolveBranchFixups(Dest.getBlock());
}
+ RegionCounter Cnt = getPGORegionCounter(D->getStmt());
EmitBlock(Dest.getBlock());
+ Cnt.beginRegion(Builder);
}
/// Change the cleanup scope of the labels in this lexical scope to
@@ -402,12 +404,14 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) {
EmitStopPoint(&S);
EmitBranchThroughCleanup(getJumpDestForLabel(S.getLabel()));
+ PGO.setCurrentRegionCount(0);
}
void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
if (const LabelDecl *Target = S.getConstantTarget()) {
EmitBranchThroughCleanup(getJumpDestForLabel(Target));
+ PGO.setCurrentRegionCount(0);
return;
}
@@ -424,12 +428,14 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
cast<llvm::PHINode>(IndGotoBB->begin())->addIncoming(V, CurBB);
EmitBranch(IndGotoBB);
+ PGO.setCurrentRegionCount(0);
}
void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
// C99 6.8.4.1: The first substatement is executed if the expression compares
// unequal to 0. The condition must be a scalar type.
LexicalScope ConditionScope(*this, S.getSourceRange());
+ RegionCounter Cnt = getPGORegionCounter(&S);
if (S.getConditionVariable())
EmitAutoVarDecl(*S.getConditionVariable());
@@ -447,6 +453,8 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
// If the skipped block has no labels in it, just emit the executed block.
// This avoids emitting dead code and simplifies the CFG substantially.
if (!ContainsLabel(Skipped)) {
+ if (CondConstant)
+ Cnt.beginRegion(Builder);
if (Executed) {
RunCleanupsScope ExecutedScope(*this);
EmitStmt(Executed);
@@ -462,14 +470,17 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
llvm::BasicBlock *ElseBlock = ContBlock;
if (S.getElse())
ElseBlock = createBasicBlock("if.else");
- EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock);
+
+ EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Cnt.getCount());
// Emit the 'then' code.
- EmitBlock(ThenBlock);
+ EmitBlock(ThenBlock);
+ Cnt.beginRegion(Builder);
{
RunCleanupsScope ThenScope(*this);
EmitStmt(S.getThen());
}
+ Cnt.adjustFallThroughCount();
EmitBranch(ContBlock);
// Emit the 'else' code if present.
@@ -478,10 +489,12 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
if (getDebugInfo())
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
EmitBlock(ElseBlock);
+ Cnt.beginElseRegion();
{
RunCleanupsScope ElseScope(*this);
EmitStmt(Else);
}
+ Cnt.adjustFallThroughCount();
// There is no need to emit line number for unconditional branch.
if (getDebugInfo())
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
@@ -490,9 +503,12 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
// Emit the continuation block for code after the if.
EmitBlock(ContBlock, true);
+ Cnt.applyAdjustmentsToRegion();
}
void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
+ RegionCounter Cnt = getPGORegionCounter(&S);
+
// Emit the header for the loop, which will also become
// the continue target.
JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
@@ -503,7 +519,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
JumpDest LoopExit = getJumpDestInCurrentScope("while.end");
// Store the blocks to use for break and continue.
- BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader));
+ BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader, &Cnt));
// C++ [stmt.while]p2:
// When the condition of a while statement is a declaration, the
@@ -525,6 +541,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
// while(1) is common, avoid extra exit blocks. Be sure
// to correctly handle break/continue though.
bool EmitBoolCondBranch = true;
+ llvm::BranchInst *CondBr = NULL;
if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal))
if (C->isOne())
EmitBoolCondBranch = false;
@@ -536,7 +553,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
if (ConditionScope.requiresCleanups())
ExitBlock = createBasicBlock("while.exit");
- Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
+ CondBr = Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@@ -549,11 +566,19 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
{
RunCleanupsScope BodyScope(*this);
EmitBlock(LoopBody);
+ Cnt.beginRegion(Builder);
EmitStmt(S.getBody());
}
+ Cnt.adjustFallThroughCount();
BreakContinueStack.pop_back();
+ uint64_t LoopCount = Cnt.getCount();
+ uint64_t ExitCount = Cnt.getLoopExitCount();
+ if (EmitBoolCondBranch)
+ CondBr->setMetadata(llvm::LLVMContext::MD_prof,
+ PGO.createBranchWeights(LoopCount, ExitCount));
+
// Immediately force cleanup.
ConditionScope.ForceCleanup();
@@ -562,6 +587,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
// Emit the exit block.
EmitBlock(LoopExit.getBlock(), true);
+ PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
// The LoopHeader typically is just a branch if we skipped emitting
// a branch, try to erase it.
@@ -573,16 +599,20 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");
+ RegionCounter Cnt = getPGORegionCounter(&S);
+
// Store the blocks to use for break and continue.
- BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));
+ BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond, &Cnt));
// Emit the body of the loop.
llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
EmitBlock(LoopBody);
+ Cnt.beginRegion(Builder);
{
RunCleanupsScope BodyScope(*this);
EmitStmt(S.getBody());
}
+ Cnt.adjustFallThroughCount();
BreakContinueStack.pop_back();
@@ -603,12 +633,18 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
if (C->isZero())
EmitBoolCondBranch = false;
+ uint64_t LoopCount = Cnt.getCount() - Cnt.getParentCount();
+ uint64_t ExitCount = Cnt.getLoopExitCount();
+
// As long as the condition is true, iterate the loop.
- if (EmitBoolCondBranch)
- Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock());
+ if (EmitBoolCondBranch) {
+ Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(),
+ PGO.createBranchWeights(LoopCount, ExitCount));
+ }
// Emit the exit block.
EmitBlock(LoopExit.getBlock());
+ PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
// The DoCond block typically is just a branch if we skipped
// emitting a branch, try to erase it.
@@ -617,6 +653,8 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
}
void CodeGenFunction::EmitForStmt(const ForStmt &S) {
+ RegionCounter Cnt = getPGORegionCounter(&S);
+
JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
RunCleanupsScope ForScope(*this);
@@ -639,6 +677,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
// Create a cleanup scope for the condition variable cleanups.
RunCleanupsScope ConditionScope(*this);
+ llvm::BranchInst *CondBr = NULL;
if (S.getCond()) {
// If the for statement has a condition scope, emit the local variable
// declaration.
@@ -658,7 +697,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
// C99 6.8.5p2/p4: The first substatement is executed if the expression
// compares unequal to 0. The condition must be a scalar type.
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
- Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
+ CondBr = Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@@ -670,6 +709,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
// Treat it as a non-zero constant. Don't even create a new block for the
// body, just fall into it.
}
+ Cnt.beginRegion(Builder);
// If the for loop doesn't have an increment we can just use the
// condition as the continue block. Otherwise we'll need to create
@@ -679,7 +719,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
Continue = getJumpDestInCurrentScope("for.inc");
// Store the blocks to use for break and continue.
- BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+ BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
{
// Create a separate cleanup scope for the body, in case it is not
@@ -693,9 +733,16 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
EmitBlock(Continue.getBlock());
EmitStmt(S.getInc());
}
+ Cnt.adjustFallThroughCount();
BreakContinueStack.pop_back();
+ uint64_t LoopCount = Cnt.getCount();
+ uint64_t ExitCount = Cnt.getLoopExitCount();
+ if (S.getCond())
+ CondBr->setMetadata(llvm::LLVMContext::MD_prof,
+ PGO.createBranchWeights(LoopCount, ExitCount));
+
ConditionScope.ForceCleanup();
EmitBranch(CondBlock);
@@ -706,9 +753,12 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
// Emit the fall-through block.
EmitBlock(LoopExit.getBlock(), true);
+ PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
}
void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
+ RegionCounter Cnt = getPGORegionCounter(&S);
+
JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
RunCleanupsScope ForScope(*this);
@@ -739,7 +789,8 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
// The body is executed if the expression, contextually converted
// to bool, is true.
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
- Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
+ llvm::BranchInst *CondBr = Builder.CreateCondBr(BoolCondVal,
+ ForBody, ExitBlock);
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@@ -747,12 +798,13 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
}
EmitBlock(ForBody);
+ Cnt.beginRegion(Builder);
// Create a block for the increment. In case of a 'continue', we jump there.
JumpDest Continue = getJumpDestInCurrentScope("for.inc");
// Store the blocks to use for break and continue.
- BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+ BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
{
// Create a separate cleanup scope for the loop variable and body.
@@ -764,9 +816,15 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
// If there is an increment, emit it next.
EmitBlock(Continue.getBlock());
EmitStmt(S.getInc());
+ Cnt.adjustFallThroughCount();
BreakContinueStack.pop_back();
+ uint64_t LoopCount = Cnt.getCount();
+ uint64_t ExitCount = Cnt.getLoopExitCount();
+ CondBr->setMetadata(llvm::LLVMContext::MD_prof,
+ PGO.createBranchWeights(LoopCount, ExitCount));
+
EmitBranch(CondBlock);
ForScope.ForceCleanup();
@@ -776,6 +834,7 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
// Emit the fall-through block.
EmitBlock(LoopExit.getBlock(), true);
+ PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
}
void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
@@ -789,6 +848,7 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
/*init*/ true);
}
EmitBranchThroughCleanup(ReturnBlock);
+ PGO.setCurrentRegionCount(0);
}
/// EmitReturnStmt - Note that due to GCC extensions, this can have an operand
@@ -860,6 +920,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
cleanupScope.ForceCleanup();
EmitBranchThroughCleanup(ReturnBlock);
+ PGO.setCurrentRegionCount(0);
}
void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) {
@@ -882,8 +943,14 @@ void CodeGenFunction::EmitBreakStmt(const BreakStmt &S) {
if (HaveInsertPoint())
EmitStopPoint(&S);
- JumpDest Block = BreakContinueStack.back().BreakBlock;
- EmitBranchThroughCleanup(Block);
+ BreakContinue &BC = BreakContinueStack.back();
+ // We keep track of breaks from the loop so we can differentiate them from
+ // non-local exits in PGO instrumentation. This only applies to loops, not
+ // breaks from switch statements.
+ if (BC.CountBreak)
+ BC.LoopCnt->getBreakCounter().beginRegion(Builder);
+ EmitBranchThroughCleanup(BC.BreakBlock);
+ PGO.setCurrentRegionCount(0);
}
void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
@@ -895,8 +962,12 @@ void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
if (HaveInsertPoint())
EmitStopPoint(&S);
- JumpDest Block = BreakContinueStack.back().ContinueBlock;
- EmitBranchThroughCleanup(Block);
+ BreakContinue &BC = BreakContinueStack.back();
+ // We keep track of continues in the loop so we can differentiate them from
+ // non-local exits in PGO instrumentation.
+ BC.LoopCnt->getContinueCounter().beginRegion(Builder);
+ EmitBranchThroughCleanup(BC.ContinueBlock);
+ PGO.setCurrentRegionCount(0);
}
/// EmitCaseStmtRange - If case statement range is not too big then
@@ -908,11 +979,14 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());
+ RegionCounter CaseCnt = getPGORegionCounter(&S);
+
// Emit the code for this case. We do this first to make sure it is
// properly chained from our predecessor before generating the
// switch machinery to enter this block.
EmitBlock(createBasicBlock("sw.bb"));
llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
+ CaseCnt.beginRegion(Builder);
EmitStmt(S.getSubStmt());
// If range is empty, do nothing.
@@ -923,7 +997,17 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
// FIXME: parameters such as this should not be hardcoded.
if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
// Range is small enough to add multiple switch instruction cases.
- for (unsigned i = 0, e = Range.getZExtValue() + 1; i != e; ++i) {
+ uint64_t Total = CaseCnt.getCount() - CaseCnt.getParentCount();
+ unsigned NCases = Range.getZExtValue() + 1;
+ // Divide the weights evenly between the cases, ensuring that the total
+ // weight is preserved. Ie, a weight of 5 over three cases will be
+ // distributed as weights of 2, 2, and 1.
+ uint64_t Weight = Total / NCases, Rem = Total % NCases;
+ for (unsigned I = 0; I != NCases; ++I) {
+ if (SwitchWeights)
+ SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
+ if (Rem)
+ Rem--;
SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
LHS++;
}
@@ -948,7 +1032,19 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS));
llvm::Value *Cond =
Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");
- Builder.CreateCondBr(Cond, CaseDest, FalseDest);
+
+ llvm::MDNode *Weights = 0;
+ if (SwitchWeights) {
+ uint64_t ThisCount = CaseCnt.getCount() - CaseCnt.getParentCount();
+ uint64_t DefaultCount = (*SwitchWeights)[0];
+ Weights = PGO.createBranchWeights(ThisCount, DefaultCount);
+
+ // Since we're chaining the switch default through each large case range, we
+ // need to update the weight for the default, ie, the first case, to include
+ // this case.
+ (*SwitchWeights)[0] += ThisCount;
+ }
+ Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);
// Restore the appropriate insertion point.
if (RestoreBB)
@@ -974,17 +1070,22 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
return;
}
+ RegionCounter CaseCnt = getPGORegionCounter(&S);
llvm::ConstantInt *CaseVal =
Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
- // If the body of the case is just a 'break', and if there was no fallthrough,
- // try to not emit an empty block.
- if ((CGM.getCodeGenOpts().OptimizationLevel > 0) &&
+ // If the body of the case is just a 'break', try to not emit an empty block.
+ // If we're profiling or we're not optimizing, leave the block in for better
+ // debug and coverage analysis.
+ if (!CGM.getCodeGenOpts().ProfileInstrGenerate &&
+ CGM.getCodeGenOpts().OptimizationLevel > 0 &&
isa<BreakStmt>(S.getSubStmt())) {
JumpDest Block = BreakContinueStack.back().BreakBlock;
// Only do this optimization if there are no cleanups that need emitting.
if (isObviouslyBranchWithoutCleanups(Block)) {
+ if (SwitchWeights)
+ SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
SwitchInsn->addCase(CaseVal, Block.getBlock());
// If there was a fallthrough into this case, make sure to redirect it to
@@ -999,6 +1100,9 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
EmitBlock(createBasicBlock("sw.bb"));
llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
+ if (SwitchWeights)
+ SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
+ CaseCnt.beginRegion(Builder);
SwitchInsn->addCase(CaseVal, CaseDest);
// Recursively emitting the statement is acceptable, but is not wonderful for
@@ -1016,8 +1120,14 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
// Otherwise, iteratively add consecutive cases to this switch stmt.
while (NextCase && NextCase->getRHS() == 0) {
CurCase = NextCase;
- llvm::ConstantInt *CaseVal =
+ llvm::ConstantInt *CaseVal =
Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
+
+ CaseCnt = getPGORegionCounter(NextCase);
+ if (SwitchWeights)
+ SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
+ CaseCnt.beginRegion(Builder);
+
SwitchInsn->addCase(CaseVal, CaseDest);
NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
}
@@ -1030,7 +1140,22 @@ void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) {
llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
assert(DefaultBlock->empty() &&
"EmitDefaultStmt: Default block already defined?");
+
+ llvm::BasicBlock *SkipCountBB = 0;
+ if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
+ // The PGO region here needs to count the number of times the edge occurs,
+ // so fallthrough into this case will jump past the region counter to the
+ // skipcount basic block.
+ SkipCountBB = createBasicBlock("skipcount");
+ EmitBranch(SkipCountBB);
+ }
EmitBlock(DefaultBlock);
+
+ RegionCounter Cnt = getPGORegionCounter(&S);
+ Cnt.beginRegion(Builder, /*AddIncomingFallThrough=*/true);
+
+ if (SkipCountBB)
+ EmitBlock(SkipCountBB);
EmitStmt(S.getSubStmt());
}
@@ -1187,7 +1312,8 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
static bool FindCaseStatementsForValue(const SwitchStmt &S,
const llvm::APSInt &ConstantCondValue,
SmallVectorImpl<const Stmt*> &ResultStmts,
- ASTContext &C) {
+ ASTContext &C,
+ const SwitchCase *&ResultCase) {
// First step, find the switch case that is being branched to. We can do this
// efficiently by scanning the SwitchCase list.
const SwitchCase *Case = S.getSwitchCaseList();
@@ -1230,6 +1356,7 @@ static bool FindCaseStatementsForValue(const SwitchStmt &S,
// while (1) {
// case 4: ...
bool FoundCase = false;
+ ResultCase = Case;
return CollectStatementsForCase(S.getBody(), Case, FoundCase,
ResultStmts) != CSFC_Failure &&
FoundCase;
@@ -1245,6 +1372,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
// Handle nested switch statements.
llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
+ SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;
// See if we can constant fold the condition of the switch and therefore only
@@ -1252,8 +1380,14 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
llvm::APSInt ConstantCondValue;
if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
SmallVector<const Stmt*, 4> CaseStmts;
+ const SwitchCase *Case = 0;
if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
- getContext())) {
+ getContext(), Case)) {
+ PGO.setCurrentRegionCount(0);
+ if (Case) {
+ RegionCounter CaseCnt = getPGORegionCounter(Case);
+ CaseCnt.beginRegion(Builder);
+ }
RunCleanupsScope ExecutedScope(*this);
// At this point, we are no longer "within" a switch instance, so
@@ -1265,6 +1399,8 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
// specified series of statements and we're good.
for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
EmitStmt(CaseStmts[i]);
+ RegionCounter ExitCnt = getPGORegionCounter(&S);
+ ExitCnt.beginRegion(Builder);
// Now we want to restore the saved switch instance so that nested
// switches continue to function properly
@@ -1282,18 +1418,41 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
// failure.
llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default");
SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock);
+ if (PGO.haveRegionCounts()) {
+ // Walk the SwitchCase list to find how many there are.
+ uint64_t DefaultCount = 0;
+ unsigned NumCases = 0;
+ for (const SwitchCase *Case = S.getSwitchCaseList();
+ Case;
+ Case = Case->getNextSwitchCase()) {
+ if (isa<DefaultStmt>(Case))
+ DefaultCount = getPGORegionCounter(Case).getCount();
+ NumCases += 1;
+ }
+ SwitchWeights = new SmallVector<uint64_t, 16>();
+ SwitchWeights->reserve(NumCases);
+ // The default needs to be first. We store the edge count, so we already
+ // know the right weight.
+ SwitchWeights->push_back(DefaultCount);
+ }
CaseRangeBlock = DefaultBlock;
// Clear the insertion point to indicate we are in unreachable code.
Builder.ClearInsertionPoint();
+ PGO.setCurrentRegionCount(0);
// All break statements jump to NextBlock. If BreakContinueStack is non-empty
- // then reuse last ContinueBlock.
+ // then reuse last ContinueBlock and that block's counter.
JumpDest OuterContinue;
- if (!BreakContinueStack.empty())
- OuterContinue = BreakContinueStack.back().ContinueBlock;
+ RegionCounter *OuterCount = 0;
+ if (!BreakContinueStack.empty()) {
+ BreakContinue &BC = BreakContinueStack.back();
+ OuterContinue = BC.ContinueBlock;
+ OuterCount = BC.LoopCnt;
+ }
- BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue));
+ BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue,
+ OuterCount, /*CountBreak=*/false));
// Emit switch body.
EmitStmt(S.getBody());
@@ -1322,8 +1481,20 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
// Emit continuation.
EmitBlock(SwitchExit.getBlock(), true);
-
+ RegionCounter ExitCnt = getPGORegionCounter(&S);
+ ExitCnt.beginRegion(Builder);
+
+ if (SwitchWeights) {
+ assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
+ "switch weights do not match switch cases");
+ // If there's only one jump destination there's no sense weighting it.
+ if (SwitchWeights->size() > 1)
+ SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
+ PGO.createBranchWeights(*SwitchWeights));
+ delete SwitchWeights;
+ }
SwitchInsn = SavedSwitchInsn;
+ SwitchWeights = SavedSwitchWeights;
CaseRangeBlock = SavedCRBlock;
}
View
1  lib/CodeGen/CMakeLists.txt
@@ -53,6 +53,7 @@ add_clang_library(clangCodeGen
CodeGenModule.cpp
CodeGenTBAA.cpp
CodeGenTypes.cpp
+ CodeGenPGO.cpp
ItaniumCXXABI.cpp
MicrosoftCXXABI.cpp
ModuleBuilder.cpp
View
92 lib/CodeGen/CodeGenFunction.cpp
@@ -16,6 +16,7 @@
#include "CGCXXABI.h"
#include "CGDebugInfo.h"
#include "CodeGenModule.h"
+#include "CodeGenPGO.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
@@ -44,7 +45,8 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
NextCleanupDestIndex(1), FirstBlockInfo(0), EHResumeBlock(0),
ExceptionSlot(0), EHSelectorSlot(0), DebugInfo(CGM.getModuleDebugInfo()),
DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(0),
- SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
+ PGO(cgm), SwitchInsn(0), SwitchWeights(0),
+ CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
NumSimpleReturnExprs(0), CXXABIThisDecl(0), CXXABIThisValue(0),
CXXThisValue(0), CXXDefaultInitExprThis(0),
CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
@@ -571,6 +573,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
if (CGM.getCodeGenOpts().InstrumentForProfiling)
EmitMCountInstrumentation();
+ PGO.assignRegionCounters(GD);
+
if (RetTy->isVoidType()) {
// Void type; nothing to return.
ReturnValue = 0;
@@ -643,6 +647,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
const Stmt *Body) {
+ RegionCounter Cnt = getPGORegionCounter(Body);
+ Cnt.beginRegion(Builder);
if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
EmitCompoundStmtWithoutScope(*S);
else
@@ -772,6 +778,9 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
// a quick pass now to see if we can.
if (!CurFn->doesNotThrow())
TryMarkNoThrow(CurFn);
+
+ PGO.emitWriteoutFunction(CurGD);
+ PGO.destroyRegionCounters();
}
/// ContainsLabel - Return true if the statement contains a label in it. If
@@ -870,10 +879,13 @@ ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &ResultInt) {
///
void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
llvm::BasicBlock *TrueBlock,
- llvm::BasicBlock *FalseBlock) {
+ llvm::BasicBlock *FalseBlock,
+ uint64_t TrueCount) {
Cond = Cond->IgnoreParens();
if (const BinaryOperator *CondBOp = dyn_cast<BinaryOperator>(Cond)) {
+ RegionCounter Cnt = getPGORegionCounter(CondBOp);
+
// Handle X && Y in a condition.
if (CondBOp->getOpcode() == BO_LAnd) {
// If we have "1 && X", simplify the code. "0 && X" would have constant
@@ -882,7 +894,9 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
ConstantBool) {
// br(1 && X) -> br(X).
- return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+ Cnt.beginRegion(Builder);
+ return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
+ TrueCount);
}
// If we have "X && 1", simplify the code to use an uncond branch.
@@ -890,21 +904,28 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
ConstantBool) {
// br(X && 1) -> br(X).
- return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
+ return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
+ TrueCount);
}
// Emit the LHS as a conditional. If the LHS conditional is false, we
// want to jump to the FalseBlock.
llvm::BasicBlock *LHSTrue = createBasicBlock("land.lhs.true");
+ // The counter tells us how often we evaluate RHS, and all of TrueCount
+ // can be propagated to that branch.
+ uint64_t RHSCount = Cnt.getCount();
ConditionalEvaluation eval(*this);
- EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock);
+ EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount);
EmitBlock(LHSTrue);
// Any temporaries created here are conditional.
+ Cnt.beginRegion(Builder);
eval.begin(*this);
- EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+ EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount);
eval.end(*this);
+ Cnt.adjustFallThroughCount();
+ Cnt.applyAdjustmentsToRegion();
return;
}
@@ -916,7 +937,9 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
!ConstantBool) {
// br(0 || X) -> br(X).
- return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+ Cnt.beginRegion(Builder);
+ return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
+ TrueCount);
}
// If we have "X || 0", simplify the code to use an uncond branch.
@@ -924,21 +947,31 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
!ConstantBool) {
// br(X || 0) -> br(X).
- return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
+ return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
+ TrueCount);
}
// Emit the LHS as a conditional. If the LHS conditional is true, we
// want to jump to the TrueBlock.
llvm::BasicBlock *LHSFalse = createBasicBlock("lor.lhs.false");
+ // We have the count for entry to the RHS and for the whole expression
+ // being true, so we can divy up True count between the short circuit and
+ // the RHS.
+ uint64_t LHSCount = TrueCount - Cnt.getCount();
+ uint64_t RHSCount = TrueCount - LHSCount;
ConditionalEvaluation eval(*this);
- EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse);
+ EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount);
EmitBlock(LHSFalse);
// Any temporaries created here are conditional.
+ Cnt.beginRegion(Builder);
eval.begin(*this);
- EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+ EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount);
+
eval.end(*this);
+ Cnt.adjustFallThroughCount();
+ Cnt.applyAdjustmentsToRegion();
return;
}
@@ -946,8 +979,13 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (const UnaryOperator *CondUOp = dyn_cast<UnaryOperator>(Cond)) {
// br(!x, t, f) -> br(x, f, t)
- if (CondUOp->getOpcode() == UO_LNot)
- return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock);
+ if (CondUOp->getOpcode() == UO_LNot) {
+ // Negate the count.
+ uint64_t FalseCount = PGO.getCurrentRegionCount() - TrueCount;
+ // Negate the condition and swap the destination blocks.
+ return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock,
+ FalseCount);
+ }
}
if (const ConditionalOperator *CondOp = dyn_cast<ConditionalOperator>(Cond)) {
@@ -955,17 +993,33 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true");
llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false");
+ RegionCounter Cnt = getPGORegionCounter(CondOp);
ConditionalEvaluation cond(*this);
- EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock);
+ EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
+
+ // When computing PGO branch weights, we only know the overall count for
+ // the true block. This code is essentially doing tail duplication of the
+ // naive code-gen, introducing new edges for which counts are not
+ // available. Divide the counts proportionally between the LHS and RHS of
+ // the conditional operator.
+ uint64_t LHSScaledTrueCount = 0;
+ if (TrueCount) {
+ double LHSRatio = Cnt.getCount() / (double) PGO.getCurrentRegionCount();
+ LHSScaledTrueCount = TrueCount * LHSRatio;
+ }
cond.begin(*this);
EmitBlock(LHSBlock);
- EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock);
+ Cnt.beginRegion(Builder);
+ EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
+ LHSScaledTrueCount);
cond.end(*this);
cond.begin(*this);
EmitBlock(RHSBlock);
- EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock);
+ Cnt.beginElseRegion();
+ EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock,
+ TrueCount - LHSScaledTrueCount);
cond.end(*this);
return;
@@ -981,9 +1035,15 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
return;
}
+ // Create branch weights based on the number of times we get here and the
+ // number of times the condition should be true.
+ uint64_t CurrentCount = PGO.getCurrentRegionCountWithMin(TrueCount);
+ llvm::MDNode *Weights = PGO.createBranchWeights(TrueCount,
+ CurrentCount - TrueCount);
+
// Emit the code with the fully general case.
llvm::Value *CondV = EvaluateExprAsBool(Cond);
- Builder.CreateCondBr(CondV, TrueBlock, FalseBlock);
+ Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights);
}
/// ErrorUnsupported - Print out an error that codegen doesn't support the
View
28 lib/CodeGen/CodeGenFunction.h
@@ -19,6 +19,7 @@
#include "CGValue.h"
#include "EHScopeStack.h"
#include "CodeGenModule.h"
+#include "CodeGenPGO.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
@@ -817,19 +818,36 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::DenseMap<const LabelDecl*, JumpDest> LabelMap;
// BreakContinueStack - This keeps track of where break and continue
- // statements should jump to.
+ // statements should jump to and the associated base counter for
+ // instrumentation.
struct BreakContinue {
- BreakContinue(JumpDest Break, JumpDest Continue)
- : BreakBlock(Break), ContinueBlock(Continue) {}
+ BreakContinue(JumpDest Break, JumpDest Continue, RegionCounter *LoopCnt,
+ bool CountBreak = true)
+ : BreakBlock(Break), ContinueBlock(Continue), LoopCnt(LoopCnt),
+ CountBreak(CountBreak) {}
JumpDest BreakBlock;
JumpDest ContinueBlock;
+ RegionCounter *LoopCnt;
+ bool CountBreak;
};
SmallVector<BreakContinue, 8> BreakContinueStack;
+ CodeGenPGO PGO;
+
+public:
+ /// Get a counter for instrumentation of the region associated with the given
+ /// statement.
+ RegionCounter getPGORegionCounter(const Stmt *S) {
+ return RegionCounter(PGO, S);
+ }
+private:
+
/// SwitchInsn - This is nearest current switch instruction. It is null if
/// current context is not in a switch.
llvm::SwitchInst *SwitchInsn;
+ /// The branch weights of SwitchInsn when doing instrumentation based PGO.
+ SmallVector<uint64_t, 16> *SwitchWeights;
/// CaseRangeBlock - This block holds if condition check for last case
/// statement range in current switch instruction.
@@ -2413,8 +2431,10 @@ class CodeGenFunction : public CodeGenTypeCache {
/// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
/// if statement) to the specified blocks. Based on the condition, this might
/// try to simplify the codegen of the conditional based on the branch.
+ /// TrueCount should be the number of times we expect the condition to
+ /// evaluate to true based on PGO data.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock,
- llvm::BasicBlock *FalseBlock);
+ llvm::BasicBlock *FalseBlock, uint64_t TrueCount);
/// \brief Emit a description of a type in a format suitable for passing to
/// a runtime sanitizer handler.
View
11 lib/CodeGen/CodeGenModule.cpp
@@ -20,6 +20,7 @@
#include "CGOpenCLRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenTBAA.h"
+#include "CodeGenPGO.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/CharUnits.h"
@@ -77,7 +78,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0),
TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0),
OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0),
- NoObjCARCExceptionsMetadata(0), RRData(0), CFConstantStringClassRef(0),
+ NoObjCARCExceptionsMetadata(0), RRData(0), PGOData(0),
+ CFConstantStringClassRef(0),
ConstantStringClassRef(0), NSConstantStringType(0),
NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0),
BlockObjectDispose(0), BlockDescriptorType(0), GenericBlockLiteralType(0),
@@ -131,6 +133,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
if (C.getLangOpts().ObjCAutoRefCount)
ARCData = new ARCEntrypoints();
RRData = new RREntrypoints();
+
+ if (!CodeGenOpts.InstrProfileInput.empty())
+ PGOData = new PGOProfileData(*this, CodeGenOpts.InstrProfileInput);
}
CodeGenModule::~CodeGenModule() {
@@ -2181,6 +2186,10 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
AddGlobalDtor(Fn, DA->getPriority());
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, Fn);
+
+ llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this);
+ if (PGOInit)
+ AddGlobalCtor(PGOInit, 0);
}
void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
View
8 lib/CodeGen/CodeGenModule.h
@@ -85,7 +85,8 @@ namespace CodeGen {
class CGCUDARuntime;
class BlockFieldFlags;
class FunctionArgList;
-
+ class PGOProfileData;
+
struct OrderGlobalInits {
unsigned int priority;
unsigned int lex_order;
@@ -258,6 +259,7 @@ class CodeGenModule : public CodeGenTypeCache {
ARCEntrypoints *ARCData;
llvm::MDNode *NoObjCARCExceptionsMetadata;
RREntrypoints *RRData;
+ PGOProfileData *PGOData;
// WeakRefReferences - A set of references that have only been seen via
// a weakref so far. This is used to remove the weak of the reference if we
@@ -479,6 +481,10 @@ class CodeGenModule : public CodeGenTypeCache {
return *RRData;
}
+ PGOProfileData *getPGOData() const {
+ return PGOData;
+ }
+
llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) {
return StaticLocalDeclMap[D];
}
View
456 lib/CodeGen/CodeGenPGO.cpp
@@ -0,0 +1,456 @@
+//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Instrumentation-based profile-guided optimization
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenPGO.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/StmtVisitor.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/FileSystem.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) {
+ DiagnosticsEngine &Diags = CGM.getDiags();
+ unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, Message);
+ Diags.Report(DiagID);
+}
+
+PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path)
+ : CGM(CGM) {
+ if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) {
+ ReportBadPGOData(CGM, "failed to open pgo data file");
+ return;
+ }
+
+ if (DataBuffer->getBufferSize() > std::numeric_limits<unsigned>::max()) {
+ ReportBadPGOData(CGM, "pgo data file too big");
+ return;
+ }
+
+ // Scan through the data file and map each function to the corresponding
+ // file offset where its counts are stored.
+ const char *BufferStart = DataBuffer->getBufferStart();
+ const char *BufferEnd = DataBuffer->getBufferEnd();
+ const char *CurPtr = BufferStart;
+ while (CurPtr < BufferEnd) {
+ // Read the mangled function name.
+ const char *FuncName = CurPtr;
+ // FIXME: Something will need to be added to distinguish static functions.
+ CurPtr = strchr(CurPtr, ' ');
+ if (!CurPtr) {
+ ReportBadPGOData(CGM, "pgo data file has malformed function entry");
+ return;
+ }
+ StringRef MangledName(FuncName, CurPtr - FuncName);
+
+ // Read the number of counters.
+ char *EndPtr;
+ unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
+ if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) {
+ ReportBadPGOData(CGM, "pgo data file has unexpected number of counters");
+ return;
+ }
+ CurPtr = EndPtr;
+
+ // There is one line for each counter; skip over those lines.
+ for (unsigned N = 0; N < NumCounters; ++N) {
+ CurPtr = strchr(++CurPtr, '\n');
+ if (!CurPtr) {
+ ReportBadPGOData(CGM, "pgo data file is missing some counter info");
+ return;
+ }
+ }
+
+ // Skip over the blank line separating functions.
+ CurPtr += 2;
+
+ DataOffsets[MangledName] = FuncName - BufferStart;
+ }
+}
+
+bool PGOProfileData::getFunctionCounts(StringRef MangledName,
+ std::vector<uint64_t> &Counts) {
+ // Find the relevant section of the pgo-data file.
+ llvm::StringMap<unsigned>::const_iterator OffsetIter =
+ DataOffsets.find(MangledName);
+ if (OffsetIter == DataOffsets.end())
+ return true;
+ const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue();
+
+ // Skip over the function name.
+ CurPtr = strchr(CurPtr, ' ');
+ assert(CurPtr && "pgo-data has corrupted function entry");
+
+ // Read the number of counters.
+ char *EndPtr;
+ unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
+ assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 &&
+ "pgo-data file has corrupted number of counters");
+ CurPtr = EndPtr;
+
+ Counts.reserve(NumCounters);
+
+ for (unsigned N = 0; N < NumCounters; ++N) {
+ // Read the count value.
+ uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
+ if (EndPtr == CurPtr || *EndPtr != '\n') {
+ ReportBadPGOData(CGM, "pgo-data file has bad count value");
+ return true;
+ }
+ Counts.push_back(Count);
+ CurPtr = EndPtr + 1;
+ }
+
+ // Make sure the number of counters matches up.
+ if (Counts.size() != NumCounters) {
+ ReportBadPGOData(CGM, "pgo-data file has inconsistent counters");
+ return true;
+ }
+
+ return false;
+}
+
+void CodeGenPGO::emitWriteoutFunction(GlobalDecl &GD) {
+ if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
+ return;
+
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+
+ llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
+ llvm::Type *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
+
+ llvm::Function *WriteoutF =
+ CGM.getModule().getFunction("__llvm_pgo_writeout");
+ if (!WriteoutF) {
+ llvm::FunctionType *WriteoutFTy =
+ llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false);
+ WriteoutF = llvm::Function::Create(WriteoutFTy,
+ llvm::GlobalValue::InternalLinkage,
+ "__llvm_pgo_writeout", &CGM.getModule());
+ }
+ WriteoutF->setUnnamedAddr(true);
+ WriteoutF->addFnAttr(llvm::Attribute::NoInline);
+ if (CGM.getCodeGenOpts().DisableRedZone)
+ WriteoutF->addFnAttr(llvm::Attribute::NoRedZone);
+
+ llvm::BasicBlock *BB = WriteoutF->empty() ?
+ llvm::BasicBlock::Create(Ctx, "", WriteoutF) : &WriteoutF->getEntryBlock();
+
+ CGBuilderTy PGOBuilder(BB);
+
+ llvm::Instruction *I = BB->getTerminator();
+ if (!I)
+ I = PGOBuilder.CreateRetVoid();
+ PGOBuilder.SetInsertPoint(I);
+
+ llvm::Type *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
+ llvm::Type *Args[] = {
+ Int8PtrTy, // const char *MangledName
+ Int32Ty, // uint32_t NumCounters
+ Int64PtrTy // uint64_t *Counters
+ };
+ llvm::FunctionType *FTy =
+ llvm::FunctionType::get(PGOBuilder.getVoidTy(), Args, false);
+ llvm::Constant *EmitFunc =
+ CGM.getModule().getOrInsertFunction("llvm_pgo_emit", FTy);
+
+ llvm::Constant *MangledName =
+ CGM.GetAddrOfConstantCString(CGM.getMangledName(GD), "__llvm_pgo_name");
+ MangledName = llvm::ConstantExpr::getBitCast(MangledName, Int8PtrTy);
+ PGOBuilder.CreateCall3(EmitFunc, MangledName,
+ PGOBuilder.getInt32(NumRegionCounters),
+ PGOBuilder.CreateBitCast(RegionCounters, Int64PtrTy));
+}
+
+llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
+ llvm::Function *WriteoutF =
+ CGM.getModule().getFunction("__llvm_pgo_writeout");
+ if (!WriteoutF)
+ return NULL;
+
+ // Create a small bit of code that registers the "__llvm_pgo_writeout" to
+ // be executed at exit.
+ llvm::Function *F = CGM.getModule().getFunction("__llvm_pgo_init");
+ if (F)
+ return NULL;
+
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+ llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
+ false);
+ F = llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage,
+ "__llvm_pgo_init", &CGM.getModule());
+ F->setUnnamedAddr(true);
+ F->setLinkage(llvm::GlobalValue::InternalLinkage);
+ F->addFnAttr(llvm::Attribute::NoInline);
+ if (CGM.getCodeGenOpts().DisableRedZone)
+ F->addFnAttr(llvm::Attribute::NoRedZone);
+
+ llvm::BasicBlock *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F);
+ CGBuilderTy PGOBuilder(BB);
+
+ FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), false);
+ llvm::Type *Params[] = {
+ llvm::PointerType::get(FTy, 0)
+ };
+ FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), Params, false);
+
+ // Inialize the environment and register the local writeout function.
+ llvm::Constant *PGOInit =
+ CGM.getModule().getOrInsertFunction("llvm_pgo_init", FTy);
+ PGOBuilder.CreateCall(PGOInit, WriteoutF);
+ PGOBuilder.CreateRetVoid();
+
+ return F;
+}
+
+namespace {
+ /// A StmtVisitor that fills a map of statements to PGO counters.
+ struct MapRegionCounters : public ConstStmtVisitor<MapRegionCounters> {
+ /// The next counter value to assign.
+ unsigned NextCounter;
+ /// The map of statements to counters.
+ llvm::DenseMap<const Stmt*, unsigned> *CounterMap;
+
+ MapRegionCounters(llvm::DenseMap<const Stmt*, unsigned> *CounterMap) :
+ NextCounter(0), CounterMap(CounterMap) {
+ }
+
+ void VisitChildren(const Stmt *S) {
+ for (Stmt::const_child_range I = S->children(); I; ++I)
+ if (*I)
+ this->Visit(*I);
+ }
+ void VisitStmt(const Stmt *S) { VisitChildren(S); }
+
+ /// Assign a counter to track entry to the function body
+ void VisitFunctionDecl(const FunctionDecl *S) {
+ (*CounterMap)[S->getBody()] = NextCounter++;
+ Visit(S->getBody());
+ }
+ /// Assign a counter to track the block following a label
+ void VisitLabelStmt(const LabelStmt *S) {
+ (*CounterMap)[S] = NextCounter++;
+ Visit(S->getSubStmt());
+ }
+ /// Assign three counters - one for the body of the loop, one for breaks
+ /// from the loop, and one for continues.
+ ///
+ /// The break and continue counters cover all such statements in this loop,
+ /// and are used in calculations to find the number of times the condition
+ /// and exit of the loop occur. They are needed so we can differentiate
+ /// these statements from non-local exits like return and goto.
+ void VisitWhileStmt(const WhileStmt *S) {
+ (*CounterMap)[S] = NextCounter;
+ NextCounter += 3;
+ Visit(S->getCond());
+ Visit(S->getBody());
+ }
+ /// Assign counters for the body of the loop, and for breaks and
+ /// continues. See VisitWhileStmt.
+ void VisitDoStmt(const DoStmt *S) {
+ (*CounterMap)[S] = NextCounter;
+ NextCounter += 3;
+ Visit(S->getBody());
+ Visit(S->getCond());
+ }
+ /// Assign counters for the body of the loop, and for breaks and
+ /// continues. See VisitWhileStmt.
+ void VisitForStmt(const ForStmt *S) {
+ (*CounterMap)[S] = NextCounter;
+ NextCounter += 3;
+ const Expr *E;
+ if ((E = S->getCond()))
+ Visit(E);
+ Visit(S->getBody());
+ if ((E = S->getInc()))
+ Visit(E);
+ }
+ /// Assign counters for the body of the loop, and for breaks and
+ /// continues. See VisitWhileStmt.
+ void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
+ (*CounterMap)[S] = NextCounter;
+ NextCounter += 3;
+ const Expr *E;
+ if ((E = S->getCond()))
+ Visit(E);
+ Visit(S->getBody());
+ if ((E = S->getInc()))
+ Visit(E);
+ }
+ /// Assign counters for the body of the loop, and for breaks and
+ /// continues. See VisitWhileStmt.
+ void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
+ (*CounterMap)[S] = NextCounter;
+ NextCounter += 3;
+ Visit(S->getElement());
+ Visit(S->getBody());
+ }
+ /// Assign a counter for the exit block of the switch statement.
+ void VisitSwitchStmt(const SwitchStmt *S) {
+ (*CounterMap)[S] = NextCounter++;
+ Visit(S->getCond());
+ Visit(S->getBody());
+ }
+ /// Assign a counter for a particular case in a switch. This counts jumps
+ /// from the switch header as well as fallthrough from the case before this
+ /// one.
+ void VisitCaseStmt(const CaseStmt *S) {
+ (*CounterMap)[S] = NextCounter++;
+ Visit(S->getSubStmt());
+ }
+ /// Assign a counter for the default case of a switch statement. The count
+ /// is the number of branches from the loop header to the default, and does
+ /// not include fallthrough from previous cases. If we have multiple
+ /// conditional branch blocks from the switch instruction to the default
+ /// block, as with large GNU case ranges, this is the counter for the last
+ /// edge in that series, rather than the first.
+ void VisitDefaultStmt(const DefaultStmt *S) {
+ (*CounterMap)[S] = NextCounter++;
+ Visit(S->getSubStmt());
+ }
+ /// Assign a counter for the "then" part of an if statement. The count for
+ /// the "else" part, if it exists, will be calculated from this counter.
+ void VisitIfStmt(const IfStmt *S) {
+ (*CounterMap)[S] = NextCounter++;
+ Visit(S->getCond());
+ Visit(S->getThen());
+ if (S->getElse())
+ Visit(S->getElse());
+ }
+ /// Assign a counter for the continuation block of a C++ try statement.
+ void VisitCXXTryStmt(const CXXTryStmt *S) {
+ (*CounterMap)[S] = NextCounter++;
+ Visit(S->getTryBlock());
+ for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
+ Visit(S->getHandler(I));
+ }
+ /// Assign a counter for a catch statement's handler block.
+ void VisitCXXCatchStmt(const CXXCatchStmt *S) {
+ (*CounterMap)[S] = NextCounter++;
+ Visit(S->getHandlerBlock());
+ }
+ /// Assign a counter for the "true" part of a conditional operator. The
+ /// count in the "false" part will be calculated from this counter.
+ void VisitConditionalOperator(const ConditionalOperator *E) {
+ (*CounterMap)[E] = NextCounter++;
+ Visit(E->getCond());
+ Visit(E->getTrueExpr());
+ Visit(E->getFalseExpr());
+ }
+ /// Assign a counter for the right hand side of a logical and operator.
+ void VisitBinLAnd(const BinaryOperator *E) {
+ (*CounterMap)[E] = NextCounter++;
+ Visit(E->getLHS());
+ Visit(E->getRHS());
+ }
+ /// Assign a counter for the right hand side of a logical or operator.
+ void VisitBinLOr(const BinaryOperator *E) {
+ (*CounterMap)[E] = NextCounter++;
+ Visit(E->getLHS());
+ Visit(E->getRHS());
+ }
+ };
+}
+
+void CodeGenPGO::assignRegionCounters(GlobalDecl &GD) {
+ bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
+ PGOProfileData *PGOData = CGM.getPGOData();
+ if (!InstrumentRegions && !PGOData)
+ return;
+ const Decl *D = GD.getDecl();
+ if (!D)
+ return;
+ mapRegionCounters(D);
+ if (InstrumentRegions)
+ emitCounterVariables();
+ if (PGOData)
+ loadRegionCounts(GD, PGOData);
+}
+
+void CodeGenPGO::mapRegionCounters(const Decl *D) {
+ RegionCounterMap = new llvm::DenseMap<const Stmt*, unsigned>();
+ MapRegionCounters Walker(RegionCounterMap);
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+ Walker.VisitFunctionDecl(FD);
+ NumRegionCounters = Walker.NextCounter;
+}
+
+void CodeGenPGO::emitCounterVariables() {
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+ llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
+ NumRegionCounters);
+ RegionCounters =
+ new llvm::GlobalVariable(CGM.getModule(), CounterTy, false,
+ llvm::GlobalVariable::PrivateLinkage,
+ llvm::Constant::getNullValue(CounterTy),
+ "__llvm_pgo_ctr");
+}
+
+void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
+ if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
+ return;
+ llvm::Value *Addr =
+ Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
+ llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+ Builder.CreateStore(Count, Addr);
+}
+
+void CodeGenPGO::loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData) {
+ // For now, ignore the counts from the PGO data file only if the number of
+ // counters does not match. This could be tightened down in the future to
+ // ignore counts when the input changes in various ways, e.g., by comparing a
+ // hash value based on some characteristics of the input.
+ RegionCounts = new std::vector<uint64_t>();
+ if (PGOData->getFunctionCounts(CGM.getMangledName(GD), *RegionCounts) ||
+ RegionCounts->size() != NumRegionCounters) {
+ delete RegionCounts;
+ RegionCounts = 0;
+ }
+}
+
+void CodeGenPGO::destroyRegionCounters() {
+ if (RegionCounterMap != 0)
+ delete RegionCounterMap;
+ if (RegionCounts != 0)
+ delete RegionCounts;
+}
+
+llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
+ uint64_t FalseCount) {
+ if (!TrueCount && !FalseCount)
+ return 0;
+
+ llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+ // TODO: need to scale down to 32-bits
+ // According to Laplace's Rule of Succession, it is better to compute the
+ // weight based on the count plus 1.
+ return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1);
+}
+
+llvm::MDNode *
+CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
+ llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+ // TODO: need to scale down to 32-bits, instead of just truncating.
+ // According to Laplace's Rule of Succession, it is better to compute the
+ // weight based on the count plus 1.
+ SmallVector<uint32_t, 16> ScaledWeights;
+ ScaledWeights.reserve(Weights.size());
+ for (ArrayRef<uint64_t>::iterator WI = Weights.begin(), WE = Weights.end();
+ WI != WE; ++WI) {
+ ScaledWeights.push_back(*WI + 1);
+ }
+ return MDHelper.createBranchWeights(ScaledWeights);
+}
View
216 lib/CodeGen/CodeGenPGO.h
@@ -0,0 +1,216 @@
+//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Instrumentation-based profile-guided optimization
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CODEGEN_CODEGENPGO_H
+#define CLANG_CODEGEN_CODEGENPGO_H
+
+#include "CGBuilder.h"
+#include "CodeGenModule.h"
+#include "CodeGenTypes.h"
+#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace clang {
+namespace CodeGen {
+class RegionCounter;
+
+/// The raw counter data from an instrumented PGO binary
+class PGOProfileData {
+private:
+ /// The PGO data
+ llvm::OwningPtr<llvm::MemoryBuffer> DataBuffer;
+ /// Offsets into DataBuffer for each function's counters
+ llvm::StringMap<unsigned> DataOffsets;
+ CodeGenModule &CGM;
+public:
+ PGOProfileData(CodeGenModule &CGM, std::string Path);
+ /// Fill Counts with the profile data for the given function name. Returns
+ /// false on success.
+ bool getFunctionCounts(StringRef MangledName, std::vector<uint64_t> &Counts);
+};
+
+/// Per-function PGO state. This class should generally not be used directly,
+/// but instead through the CodeGenFunction and RegionCounter types.
+class CodeGenPGO {
+private:
+ CodeGenModule &CGM;
+
+ unsigned NumRegionCounters;
+ llvm::GlobalVariable *RegionCounters;
+ llvm::DenseMap<const Stmt*, unsigned> *RegionCounterMap;
+ std::vector<uint64_t> *RegionCounts;
+ uint64_t CurrentRegionCount;
+
+public:
+ CodeGenPGO(CodeGenModule &CGM)
+ : CGM(CGM), NumRegionCounters(0), RegionCounters(0), RegionCounterMap(0),
+ RegionCounts(0), CurrentRegionCount(0) {}
+ ~CodeGenPGO() {}
+
+ /// Whether or not we have PGO region data for the current function. This is
+ /// false both when we have no data at all and when our data has been
+ /// discarded.
+ bool haveRegionCounts() const { return RegionCounts != 0; }
+
+ /// Return the counter value of the current region.
+ uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
+ /// Return the counter value of the current region, or \p Min if it is larger.
+ uint64_t getCurrentRegionCountWithMin(uint64_t Min) {
+ return std::max(Min, CurrentRegionCount);
+ }
+ /// Set the counter value for the current region. This is used to keep track
+ /// of changes to the most recent counter from control flow and non-local
+ /// exits.
+ void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
+
+ /// Calculate branch weights appropriate for PGO data
+ llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount);
+ llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights);
+
+ /// Assign counters to regions and configure them for PGO of a given
+ /// function. Does nothing if instrumentation is not enabled and either
+ /// generates global variables or associates PGO data with each of the
+ /// counters depending on whether we are generating or using instrumentation.
+ void assignRegionCounters(GlobalDecl &GD);
+ /// Emit code to write counts for a given function to disk, if necessary.
+ void emitWriteoutFunction(GlobalDecl &GD);
+ /// Clean up region counter state. Must be called if assignRegionCounters is
+ /// used.
+ void destroyRegionCounters();
+ /// Emit the logic to register region counter write out functions. Returns a
+ /// function that implements this logic.
+ static llvm::Function *emitInitialization(CodeGenModule &CGM);
+
+private:
+ void mapRegionCounters(const Decl *D);
+ void loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData);
+ void emitCounterVariables();
+
+ /// Emit code to increment the counter at the given index
+ void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter);
+
+ /// Return the region counter for the given statement. This should only be
+ /// called on statements that have a dedicated counter.
+ unsigned getRegionCounter(const Stmt *S) {
+ if (RegionCounterMap == 0)
+ return 0;
+ return (*RegionCounterMap)[S];
+ }
+
+ /// Return the region count for the counter at the given index.
+ uint64_t getRegionCount(unsigned Counter) {
+ if (!haveRegionCounts())
+ return 0;
+ return (*RegionCounts)[Counter];
+ }
+
+ friend class RegionCounter;
+};
+
+/// A counter for a particular region. This is the primary interface through
+/// which clients manage PGO counters and their values.
+class RegionCounter {
+ CodeGenPGO *PGO;
+ unsigned Counter;
+ uint64_t Count;
+ uint64_t ParentCount;
+ uint64_t RegionCount;
+ int64_t Adjust;
+
+ RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex)
+ : PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)),
+ ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
+
+public:
+ RegionCounter(CodeGenPGO &PGO, const Stmt *S)
+ : PGO(&PGO), Counter(PGO.getRegionCounter(S)),
+ Count(PGO.getRegionCount(Counter)),
+ ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
+
+ /// Get the value of the counter. In most cases this is the number of times
+ /// the region of the counter was entered, but for switch labels it's the
+ /// number of direct jumps to that label.
+ uint64_t getCount() const { return Count; }
+ /// Get the value of the counter with adjustments applied. Adjustments occur
+ /// when control enters or leaves the region abnormally, ie, if there is a
+ /// jump to a label within the region, or if the function can return from
+ /// within the region. The adjusted count, then, is the value of the counter
+ /// at the end of the region.
+ uint64_t getAdjustedCount() const {
+ assert(Adjust > 0 || (uint64_t)(-Adjust) <= Count && "Negative count");
+ return Count + Adjust;
+ }
+ /// Get the value of the counter in this region's parent, ie, the region that
+ /// was active when this region began. This is useful for deriving counts in
+ /// implicitly counted regions, like the false case of a condition or the
+ /// normal exits of a loop.
+ uint64_t getParentCount() const { return ParentCount; }
+
+ /// Get the number of times the condition of a loop will evaluate false. This
+ /// is the number of times we enter the loop, adjusted by the difference
+ /// between entering and exiting the loop body normally, excepting that
+ /// 'continue' statements also bring us back here.
+ ///
+ /// Undefined if this counter is not counting a loop.
+ uint64_t getLoopExitCount() const {
+ return getParentCount() + getContinueCounter().getCount() +
+ getAdjustedCount() - getCount();
+ }
+ /// Get the associated break counter. Undefined if this counter is not
+ /// counting a loop.
+ RegionCounter getBreakCounter() const {
+ return RegionCounter(*PGO, Counter + 1);
+ }
+ /// Get the associated continue counter. Undefined if this counter is not
+ /// counting a loop.
+ RegionCounter getContinueCounter() const {
+ return RegionCounter(*PGO, Counter + 2);
+ }
+
+ /// Activate the counter by emitting an increment and starting to track
+ /// adjustments. If AddIncomingFallThrough is true, the current region count
+ /// will be added to the counter for the purposes of tracking the region.
+ void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) {
+ RegionCount = Count;
+ if (AddIncomingFallThrough)
+ RegionCount += PGO->getCurrentRegionCount();
+ PGO->setCurrentRegionCount(RegionCount);
+ PGO->emitCounterIncrement(Builder, Counter);
+ }
+ /// For counters on boolean branches, begins tracking adjustments for the
+ /// uncounted path.
+ void beginElseRegion() {
+ RegionCount = ParentCount - Count;
+ PGO->setCurrentRegionCount(RegionCount);
+ }
+
+ /// Control may either enter or leave the region, so the count at the end may
+ /// be different from the start. Call this to track that adjustment without
+ /// modifying the current count. Must not be called before one of beginRegion
+ /// or beginElseRegion.
+ void adjustFallThroughCount() {
+ Adjust += PGO->getCurrentRegionCount() - RegionCount;
+ }
+ /// Commit all adjustments to the current region. This should be called after
+ /// all blocks that adjust the fallthrough count have been emitted.
+ void applyAdjustmentsToRegion() {
+ PGO->setCurrentRegionCount(ParentCount + Adjust);
+ }
+};
+
+} // end namespace CodeGen
+} // end namespace clang
+
+#endif
View
127 test/CodeGen/Inputs/instr-profile.pgodata
@@ -0,0 +1,127 @@
+simple_loops 10
+1
+100
+0
+0
+100
+0
+0
+76
+0
+0
+
+conditionals 13
+1
+100
+0
+0
+50
+50
+33
+33
+16
+99
+100
+99
+100
+
+early_exits 13
+1
+0
+51
+1
+25
+1
+25
+1
+26
+0
+0
+1
+0
+
+jumps 30
+1
+1
+0
+0
+0
+1
+0
+0
+0
+0
+1
+0
+1
+2
+3
+2
+0
+0
+0
+3
+0
+1
+1
+1
+10
+0
+0
+0
+10
+9
+
+switches 21
+1
+1
+1
+15
+0
+7
+7
+1
+0
+3
+2
+3
+3
+4
+4
+0
+4
+4
+5
+1
+0
+
+big_switch 19
+1
+32
+0
+0
+32
+1
+0
+2
+1
+11
+11
+1
+1
+15
+15
+1
+1
+2
+2
+
+no_usable_data 5
+1
+1
+1
+1
+1
+
+main 1
+1
View
428 test/CodeGen/instr-profile.c
@@ -0,0 +1,428 @@
+// Test that instrumentation based profiling feeds branch prediction
+// correctly. This tests both generation of profile data and use of the same,
+// and the input file for the -fprofile-instr-use case is expected to be result
+// of running the program generated by the -fprofile-instr-generate case
+// (excepting no_usable_data). As such, main() should call every function in
+// this test.
+
+// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s
+// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s
+
+// PGOGEN: @[[SLC:__llvm_pgo_ctr[0-9]*]] = private global [10 x i64] zeroinitializer
+// PGOGEN: @[[IFC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
+// PGOGEN: @[[EEC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
+// PGOGEN: @[[JMC:__llvm_pgo_ctr[0-9]*]] = private global [30 x i64] zeroinitializer
+// PGOGEN: @[[SWC:__llvm_pgo_ctr[0-9]*]] = private global [21 x i64] zeroinitializer
+// PGOGEN: @[[BSC:__llvm_pgo_ctr[0-9]*]] = private global [19 x i64] zeroinitializer
+// PGOGEN: @[[NOC:__llvm_pgo_ctr[0-9]*]] = private global [2 x i64] zeroinitializer
+// PGOGEN: @[[MAC:__llvm_pgo_ctr[0-9]*]] = private global [1 x i64] zeroinitializer
+
+// PGOGEN-LABEL: @simple_loops()
+// PGOUSE-LABEL: @simple_loops()
+// PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 0
+void simple_loops() {
+ int i;
+ // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 1
+ // PGOUSE: br {{.*}} !prof ![[SL1:[0-9]+]]
+ for (i = 0; i < 100; ++i) {
+ }
+ // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 4
+ // PGOUSE: br {{.*}} !prof ![[SL2:[0-9]+]]
+ while (i > 0)
+ i--;
+ // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 7
+ // PGOUSE: br {{.*}} !prof ![[SL3:[0-9]+]]
+ do {} while (i++ < 75);
+
+ // PGOGEN-NOT: store {{.*}} @[[SLC]],
+ // PGOUSE-NOT: br {{.*}} !prof ![0-9]+
+}
+
+// PGOGEN-LABEL: @conditionals()
+// PGOUSE-LABEL: @conditionals()
+// PGOGEN: s