Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions bolt/include/bolt/Passes/InferNonStale.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//===- bolt/Passes/InferNonStale.h - Non-stale profile inference --------*- C++
//-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the pass that runs stale profile matching on functions
// with non-stale/non-inferred profile to improve profile quality.
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_PASSES_INFERNONSTALE_H
#define BOLT_PASSES_INFERNONSTALE_H

#include "bolt/Passes/BinaryPasses.h"

namespace llvm {
namespace bolt {

/// Run stale profile matching inference on functions with non-stale profile
/// to improve edge count estimates and profile quality.
class InferNonStale : public BinaryFunctionPass {
void runOnFunction(BinaryFunction &BF);

public:
explicit InferNonStale(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}

const char *getName() const override { return "infer-non-stale"; }

/// Pass entry point
Error runOnFunctions(BinaryContext &BC) override;
};

} // namespace bolt
} // namespace llvm

#endif
2 changes: 2 additions & 0 deletions bolt/lib/Passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ add_llvm_library(LLVMBOLTPasses
Hugify.cpp
IdenticalCodeFolding.cpp
IndirectCallPromotion.cpp
InferNonStale.cpp
Inliner.cpp
Instrumentation.cpp
JTFootprintReduction.cpp
Expand Down Expand Up @@ -64,5 +65,6 @@ add_llvm_library(LLVMBOLTPasses
target_link_libraries(LLVMBOLTPasses
PRIVATE
LLVMBOLTCore
LLVMBOLTProfile
LLVMBOLTUtils
)
168 changes: 168 additions & 0 deletions bolt/lib/Passes/InferNonStale.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
//===- bolt/Passes/InferNonStale.cpp - Non-stale profile inference ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the InferNonStale pass that runs stale profile
// matching on functions with non-stale/non-inferred profile to improve
// profile quality.
//
//===----------------------------------------------------------------------===//

#include "bolt/Passes/InferNonStale.h"

#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Timer.h"
#include "llvm/Transforms/Utils/SampleProfileInference.h"

#undef DEBUG_TYPE
#define DEBUG_TYPE "infer-non-stale"

using namespace llvm;
using namespace bolt;

namespace opts {

extern cl::opt<bool> TimeRewrite;
extern cl::OptionCategory BoltOptCategory;

cl::opt<bool>
InferNonStaleProfile("infer-non-stale-profile",
cl::desc("Infer profile counts for functions with "
"non-stale profile using profi"),
cl::init(false), cl::cat(BoltOptCategory));

// Reuse existing stale matching parameters
extern cl::opt<bool> StaleMatchingEvenFlowDistribution;
extern cl::opt<bool> StaleMatchingRebalanceUnknown;
extern cl::opt<bool> StaleMatchingJoinIslands;
extern cl::opt<unsigned> StaleMatchingCostBlockInc;
extern cl::opt<unsigned> StaleMatchingCostBlockDec;
extern cl::opt<unsigned> StaleMatchingCostJumpInc;
extern cl::opt<unsigned> StaleMatchingCostJumpDec;
extern cl::opt<unsigned> StaleMatchingCostBlockUnknownInc;
extern cl::opt<unsigned> StaleMatchingCostJumpUnknownInc;
extern cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc;

} // namespace opts

namespace llvm {
namespace bolt {

// Forward declarations of functions from StaleProfileMatching.cpp
FlowFunction
createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder);
void preprocessUnreachableBlocks(FlowFunction &Func);
void assignProfile(BinaryFunction &BF,
const BinaryFunction::BasicBlockOrderType &BlockOrder,
FlowFunction &Func);

} // namespace bolt
} // namespace llvm

namespace llvm {
namespace bolt {

void InferNonStale::runOnFunction(BinaryFunction &BF) {
NamedRegionTimer T("inferNonStale", "non-stale profile inference", "rewrite",
"Rewrite passes", opts::TimeRewrite);

assert(BF.hasCFG() && "Function must have CFG");

// Only process functions with profile that are not already inferred
assert(BF.hasValidProfile() && "Function must have valid profile");

assert(!BF.hasInferredProfile() && "Function must not have inferred profile");

LLVM_DEBUG(dbgs() << "BOLT-INFO: applying non-stale profile inference for "
<< "\"" << BF.getPrintName() << "\"\n");

// Make sure that block hashes are up to date.
BF.computeBlockHashes();

const BinaryFunction::BasicBlockOrderType BlockOrder(
BF.getLayout().block_begin(), BF.getLayout().block_end());

// Create a wrapper flow function to use with the profile inference algorithm.
FlowFunction Func = createFlowFunction(BlockOrder);

// Assign existing profile counts to the flow function
// This differs from stale matching - we use existing counts directly
for (uint64_t I = 0; I < BlockOrder.size(); I++) {
BinaryBasicBlock *BB = BlockOrder[I];
FlowBlock &Block = Func.Blocks[I + 1]; // Skip dummy entry block

// Set block weight from existing execution count
Block.Weight = BB->getKnownExecutionCount();
Block.HasUnknownWeight = (Block.Weight == 0);

// Set jump weights from existing branch info
for (FlowJump *Jump : Block.SuccJumps) {
if (Jump->Target == Func.Blocks.size() - 1) // Skip artificial sink
continue;

BinaryBasicBlock *SuccBB = BlockOrder[Jump->Target - 1];
if (BB->getSuccessor(SuccBB->getLabel())) {
BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*SuccBB);
Jump->Weight = BI.Count;
Jump->HasUnknownWeight = (Jump->Weight == 0);
}
}
}

// Adjust the flow function by marking unreachable blocks Unlikely
preprocessUnreachableBlocks(Func);

// Set up inference parameters
ProfiParams Params;
Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution;
Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown;
Params.JoinIslands = opts::StaleMatchingJoinIslands;

Params.CostBlockInc = opts::StaleMatchingCostBlockInc;
Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc;
Params.CostBlockDec = opts::StaleMatchingCostBlockDec;
Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec;
Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc;

Params.CostJumpInc = opts::StaleMatchingCostJumpInc;
Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc;
Params.CostJumpDec = opts::StaleMatchingCostJumpDec;
Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec;
Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc;
Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc;

// Apply the profile inference algorithm
applyFlowInference(Params, Func);

// Collect inferred counts and update function annotations
assignProfile(BF, BlockOrder, Func);

// Mark the function as having inferred profile
BF.setHasInferredProfile(true);
}

Error InferNonStale::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
runOnFunction(BF);
};

ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
return !BF.hasValidProfile() || BF.hasInferredProfile() || !BF.hasCFG();
};

ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_BB_QUADRATIC, WorkFun,
SkipFunc, "InferNonStale");

return Error::success();
}

} // namespace bolt
} // namespace llvm
24 changes: 12 additions & 12 deletions bolt/lib/Profile/StaleProfileMatching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,66 +52,66 @@ cl::opt<bool>
cl::desc("Infer counts from stale profile data."),
cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));

static cl::opt<unsigned> StaleMatchingMinMatchedBlock(
cl::opt<unsigned> StaleMatchingMinMatchedBlock(
"stale-matching-min-matched-block",
cl::desc("Percentage threshold of matched basic blocks at which stale "
"profile inference is executed."),
cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));

static cl::opt<unsigned> StaleMatchingMaxFuncSize(
cl::opt<unsigned> StaleMatchingMaxFuncSize(
"stale-matching-max-func-size",
cl::desc("The maximum size of a function to consider for inference."),
cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory));

// Parameters of the profile inference algorithm. The default values are tuned
// on several benchmarks.
static cl::opt<bool> StaleMatchingEvenFlowDistribution(
cl::opt<bool> StaleMatchingEvenFlowDistribution(
"stale-matching-even-flow-distribution",
cl::desc("Try to evenly distribute flow when there are multiple equally "
"likely options."),
cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));

static cl::opt<bool> StaleMatchingRebalanceUnknown(
cl::opt<bool> StaleMatchingRebalanceUnknown(
"stale-matching-rebalance-unknown",
cl::desc("Evenly re-distribute flow among unknown subgraphs."),
cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory));

static cl::opt<bool> StaleMatchingJoinIslands(
cl::opt<bool> StaleMatchingJoinIslands(
"stale-matching-join-islands",
cl::desc("Join isolated components having positive flow."), cl::init(true),
cl::ReallyHidden, cl::cat(BoltOptCategory));

static cl::opt<unsigned> StaleMatchingCostBlockInc(
cl::opt<unsigned> StaleMatchingCostBlockInc(
"stale-matching-cost-block-inc",
cl::desc("The cost of increasing a block count by one."), cl::init(150),
cl::ReallyHidden, cl::cat(BoltOptCategory));

static cl::opt<unsigned> StaleMatchingCostBlockDec(
cl::opt<unsigned> StaleMatchingCostBlockDec(
"stale-matching-cost-block-dec",
cl::desc("The cost of decreasing a block count by one."), cl::init(150),
cl::ReallyHidden, cl::cat(BoltOptCategory));

static cl::opt<unsigned> StaleMatchingCostJumpInc(
cl::opt<unsigned> StaleMatchingCostJumpInc(
"stale-matching-cost-jump-inc",
cl::desc("The cost of increasing a jump count by one."), cl::init(150),
cl::ReallyHidden, cl::cat(BoltOptCategory));

static cl::opt<unsigned> StaleMatchingCostJumpDec(
cl::opt<unsigned> StaleMatchingCostJumpDec(
"stale-matching-cost-jump-dec",
cl::desc("The cost of decreasing a jump count by one."), cl::init(150),
cl::ReallyHidden, cl::cat(BoltOptCategory));

static cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
"stale-matching-cost-block-unknown-inc",
cl::desc("The cost of increasing an unknown block count by one."),
cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory));

static cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
"stale-matching-cost-jump-unknown-inc",
cl::desc("The cost of increasing an unknown jump count by one."),
cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory));

static cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
"stale-matching-cost-jump-unknown-ft-inc",
cl::desc(
"The cost of increasing an unknown fall-through jump count by one."),
Expand Down
16 changes: 16 additions & 0 deletions bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "bolt/Passes/Hugify.h"
#include "bolt/Passes/IdenticalCodeFolding.h"
#include "bolt/Passes/IndirectCallPromotion.h"
#include "bolt/Passes/InferNonStale.h"
#include "bolt/Passes/Inliner.h"
#include "bolt/Passes/Instrumentation.h"
#include "bolt/Passes/JTFootprintReduction.h"
Expand Down Expand Up @@ -58,6 +59,7 @@ extern cl::opt<bolt::PLTCall::OptType> PLT;
extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
llvm::bolt::DeprecatedICFNumericOptionParser>
ICF;
extern cl::opt<bool> InferNonStaleProfile;

static cl::opt<bool>
DynoStatsAll("dyno-stats-all",
Expand Down Expand Up @@ -98,6 +100,11 @@ static cl::opt<bool> PrintEstimateEdgeCounts(
cl::desc("print function after edge counts are set for no-LBR profile"),
cl::Hidden, cl::cat(BoltOptCategory));

static cl::opt<bool> PrintInferNonStale(
"print-infer-non-stale",
cl::desc("print function after non-stale profile inference"), cl::Hidden,
cl::cat(BoltOptCategory));

cl::opt<bool>
PrintFinalized("print-finalized",
cl::desc("print function after CFG is finalized"),
Expand Down Expand Up @@ -384,6 +391,15 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {

Manager.registerPass(std::make_unique<PrintProfileQualityStats>(NeverPrint));

// Optionally run profile inference on non-stale profiles
if (opts::InferNonStaleProfile) {
Manager.registerPass(std::make_unique<InferNonStale>(PrintInferNonStale));

// Print profile quality stats after inference to show improvement
Manager.registerPass(
std::make_unique<PrintProfileQualityStats>(NeverPrint));
}

Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));

Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
Expand Down
5 changes: 5 additions & 0 deletions bolt/test/X86/profile-quality-reporting.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s
CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC)

## Check profile quality with infer-non-stale-profile option
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
RUN: --infer-non-stale-profile | FileCheck %s --check-prefix CHECK-INFER
CHECK-INFER: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC)
Loading