Skip to content

Commit

Permalink
[NFC][Inliner] Add cl::opt<int> to tune InstrCost
Browse files Browse the repository at this point in the history
The plan is tune this for sanitizers.

Differential Revision: https://reviews.llvm.org/D131123
  • Loading branch information
vitalybuka committed Aug 4, 2022
1 parent 8dc4b2e commit a2aa680
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 41 deletions.
2 changes: 1 addition & 1 deletion llvm/include/llvm/Analysis/InlineCost.h
Expand Up @@ -44,7 +44,7 @@ const int OptMinSizeThreshold = 5;
const int OptAggressiveThreshold = 250;

// Various magic constants used to adjust heuristics.
const int InstrCost = 5;
int getInstrCost();
const int IndirectCallThreshold = 100;
const int LoopPenalty = 25;
const int LastCallToStaticBonus = 15000;
Expand Down
76 changes: 40 additions & 36 deletions llvm/lib/Analysis/InlineCost.cpp
Expand Up @@ -122,6 +122,10 @@ static cl::opt<int> HotCallSiteRelFreq(
"entry frequency, for a callsite to be hot in the absence of "
"profile information."));

static cl::opt<int>
InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
cl::desc("Cost of a single instruction when inlining"));

static cl::opt<int> CallPenalty(
"inline-call-penalty", cl::Hidden, cl::init(25),
cl::desc("Call penalty that is applied per callsite when inlining"));
Expand Down Expand Up @@ -160,6 +164,12 @@ Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
return None;
return AttrValue;
}

namespace InlineConstants {
int getInstrCost() { return InstrCost; }

} // namespace InlineConstants

} // namespace llvm

namespace {
Expand Down Expand Up @@ -618,16 +628,16 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
void onCallArgumentSetup(const CallBase &Call) override {
// Pay the price of the argument setup. We account for the average 1
// instruction per call argument setup here.
addCost(Call.arg_size() * InlineConstants::InstrCost);
addCost(Call.arg_size() * InstrCost);
}
void onLoadRelativeIntrinsic() override {
// This is normally lowered to 4 LLVM instructions.
addCost(3 * InlineConstants::InstrCost);
addCost(3 * InstrCost);
}
void onLoweredCall(Function *F, CallBase &Call,
bool IsIndirectCall) override {
// We account for the average 1 instruction per call argument setup here.
addCost(Call.arg_size() * InlineConstants::InstrCost);
addCost(Call.arg_size() * InstrCost);

// If we have a constant that we are calling as a function, we can peer
// through it and see the function target. This happens not infrequently
Expand Down Expand Up @@ -659,29 +669,25 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// Maximum valid cost increased in this function.
if (JumpTableSize) {
int64_t JTCost =
static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
4 * InlineConstants::InstrCost;
static_cast<int64_t>(JumpTableSize) * InstrCost + 4 * InstrCost;

addCost(JTCost);
return;
}

if (NumCaseCluster <= 3) {
// Suppose a comparison includes one compare and one conditional branch.
addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
addCost(NumCaseCluster * 2 * InstrCost);
return;
}

int64_t ExpectedNumberOfCompare =
getExpectedNumberOfCompare(NumCaseCluster);
int64_t SwitchCost =
ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;

addCost(SwitchCost);
}
void onMissedSimplification() override {
addCost(InlineConstants::InstrCost);
}
void onMissedSimplification() override { addCost(InstrCost); }

void onInitializeSROAArg(AllocaInst *Arg) override {
assert(Arg != nullptr &&
Expand All @@ -693,8 +699,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
auto CostIt = SROAArgCosts.find(SROAArg);
assert(CostIt != SROAArgCosts.end() &&
"expected this argument to have a cost");
CostIt->second += InlineConstants::InstrCost;
SROACostSavings += InlineConstants::InstrCost;
CostIt->second += InstrCost;
SROACostSavings += InstrCost;
}

void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
Expand Down Expand Up @@ -801,7 +807,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
assert(CalleeBFI);

// The cycle savings expressed as the sum of InlineConstants::InstrCost
// The cycle savings expressed as the sum of InstrCost
// multiplied by the estimated dynamic count of each instruction we can
// avoid. Savings come from the call site cost, such as argument setup and
// the call instruction, as well as the instructions that are folded.
Expand All @@ -821,12 +827,12 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
if (BI->isConditional() &&
isa_and_nonnull<ConstantInt>(
SimplifiedValues.lookup(BI->getCondition()))) {
CurrentSavings += InlineConstants::InstrCost;
CurrentSavings += InstrCost;
}
} else if (Value *V = dyn_cast<Value>(&I)) {
// Count an instruction as savings if we can fold it.
if (SimplifiedValues.count(V)) {
CurrentSavings += InlineConstants::InstrCost;
CurrentSavings += InstrCost;
}
}
}
Expand Down Expand Up @@ -945,7 +951,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
}

void onLoadEliminationOpportunity() override {
LoadEliminationCost += InlineConstants::InstrCost;
LoadEliminationCost += InstrCost;
}

InlineResult onAnalysisStart() override {
Expand Down Expand Up @@ -1086,18 +1092,17 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {

void onCallArgumentSetup(const CallBase &Call) override {
increment(InlineCostFeatureIndex::CallArgumentSetup,
Call.arg_size() * InlineConstants::InstrCost);
Call.arg_size() * InstrCost);
}

void onLoadRelativeIntrinsic() override {
increment(InlineCostFeatureIndex::LoadRelativeIntrinsic,
3 * InlineConstants::InstrCost);
increment(InlineCostFeatureIndex::LoadRelativeIntrinsic, 3 * InstrCost);
}

void onLoweredCall(Function *F, CallBase &Call,
bool IsIndirectCall) override {
increment(InlineCostFeatureIndex::LoweredCallArgSetup,
Call.arg_size() * InlineConstants::InstrCost);
Call.arg_size() * InstrCost);

if (IsIndirectCall) {
InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
Expand Down Expand Up @@ -1130,37 +1135,35 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
unsigned NumCaseCluster) override {

if (JumpTableSize) {
int64_t JTCost =
static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
JTCostMultiplier * InlineConstants::InstrCost;
int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
JTCostMultiplier * InstrCost;
increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
return;
}

if (NumCaseCluster <= 3) {
increment(InlineCostFeatureIndex::CaseClusterPenalty,
NumCaseCluster * CaseClusterCostMultiplier *
InlineConstants::InstrCost);
NumCaseCluster * CaseClusterCostMultiplier * InstrCost);
return;
}

int64_t ExpectedNumberOfCompare =
getExpectedNumberOfCompare(NumCaseCluster);

int64_t SwitchCost = ExpectedNumberOfCompare * SwitchCostMultiplier *
InlineConstants::InstrCost;
int64_t SwitchCost =
ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
}

void onMissedSimplification() override {
increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
InlineConstants::InstrCost);
InstrCost);
}

void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
void onAggregateSROAUse(AllocaInst *Arg) override {
SROACosts.find(Arg)->second += InlineConstants::InstrCost;
SROACostSavingOpportunities += InlineConstants::InstrCost;
SROACosts.find(Arg)->second += InstrCost;
SROACostSavingOpportunities += InstrCost;
}

void onBlockAnalyzed(const BasicBlock *BB) override {
Expand Down Expand Up @@ -2746,7 +2749,7 @@ static bool functionsHaveCompatibleAttributes(
}

int llvm::getCallsiteCost(const CallBase &Call, const DataLayout &DL) {
int Cost = 0;
int64_t Cost = 0;
for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
if (Call.isByValArgument(I)) {
// We approximate the number of loads and stores needed by dividing the
Expand All @@ -2766,16 +2769,17 @@ int llvm::getCallsiteCost(const CallBase &Call, const DataLayout &DL) {
// DataLayout.
NumStores = std::min(NumStores, 8U);

Cost += 2 * NumStores * InlineConstants::InstrCost;
Cost += 2 * NumStores * InstrCost;
} else {
// For non-byval arguments subtract off one instruction per call
// argument.
Cost += InlineConstants::InstrCost;
Cost += InstrCost;
}
}
// The call instruction also disappears after inlining.
Cost += InlineConstants::InstrCost + CallPenalty;
return Cost;
Cost += InstrCost;
Cost += CallPenalty;
return std::min<int64_t>(Cost, INT_MAX);
}

InlineCost llvm::getInlineCost(
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
Expand Up @@ -561,7 +561,7 @@ class FunctionSpecializer {
// Otherwise, set the specialization cost to be the cost of all the
// instructions in the function and penalty for specializing more functions.
unsigned Penalty = NbFunctionsSpecialized + 1;
return Metrics.NumInsts * InlineConstants::InstrCost * Penalty;
return Metrics.NumInsts * InlineConstants::getInstrCost() * Penalty;
}

InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI,
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/IPO/PartialInlining.cpp
Expand Up @@ -854,6 +854,7 @@ PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
TargetTransformInfo *TTI) {
InstructionCost InlineCost = 0;
const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
int InstrCost = InlineConstants::getInstrCost();
for (Instruction &I : BB->instructionsWithoutDebug()) {
// Skip free instructions.
switch (I.getOpcode()) {
Expand Down Expand Up @@ -900,10 +901,10 @@ PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
}

if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
InlineCost += (SI->getNumCases() + 1) * InstrCost;
continue;
}
InlineCost += InlineConstants::InstrCost;
InlineCost += InstrCost;
}

return InlineCost;
Expand Down Expand Up @@ -932,7 +933,7 @@ PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
// additional unconditional branches. Those branches will be eliminated
// later with bb layout. The cost should be adjusted accordingly:
OutlinedFunctionCost -=
2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();

InstructionCost OutliningRuntimeOverhead =
OutliningFuncCallCost +
Expand Down

0 comments on commit a2aa680

Please sign in to comment.