Skip to content

Commit

Permalink
[FuncSpec] Rework the discardment logic for unprofitable specializati…
Browse files Browse the repository at this point in the history
…ons.

Currently we make an arbitrary comparison between codesize and latency
in order to decide whether to keep a specialization or not. Sometimes
the latency savings are biased in favor of loops because of imprecise
block frequencies, therefore this metric contains a lot of noise. This
patch tries to address the problem as follows:

* Reject specializations whose codesize savings are less than X% of
  the original function size.
* Reject specializations whose latency savings are less than Y% of
  the original function size.
* Reject specializations whose inlining bonus is less than Z% of
  the original function size.

I am not saying this is super precise, but at least X, Y and Z are
configurable, allowing us to tweak the cost model. Moreover, it lets
us prioritize codesize over latency, which is a less noisy metric.

I am also increasing the minimum size a function should have to be
considered a candidate for specialization. Initially the cost of
a function was calculated as

  CodeMetrics::NumInsts * InlineConstants::getInstrCost()

which later in D150464 was altered into CodeMetrics::NumInsts since
the metric is supposed to model TargetTransformInfo::TCK_CodeSize.
However, we omitted adjusting MinFunctionSize in that commit.

Differential Revision: https://reviews.llvm.org/D157123
  • Loading branch information
labrinea committed Aug 9, 2023
1 parent 7ec88f0 commit d1b376f
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 57 deletions.
18 changes: 9 additions & 9 deletions llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,7 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
}

Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
Constant *C = nullptr);
Bonus getSpecializationBonus(Argument *A, Constant *C);

Bonus getBonusFromPendingPHIs();

Expand All @@ -187,6 +186,9 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
DenseSet<BasicBlock *> &DeadBlocks);

Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
Constant *C = nullptr);

Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList);
Cost estimateSwitchInst(SwitchInst &I);
Cost estimateBranchInst(BranchInst &I);
Expand Down Expand Up @@ -244,10 +246,6 @@ class FunctionSpecializer {
return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver);
}

/// Compute a bonus for replacing argument \p A with constant \p C.
Bonus getSpecializationBonus(Argument *A, Constant *C,
InstCostVisitor &Visitor);

private:
Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);

Expand All @@ -268,14 +266,16 @@ class FunctionSpecializer {

/// @brief Find potential specialization opportunities.
/// @param F Function to specialize
/// @param SpecCost Cost of specializing a function. Final score is benefit
/// minus this cost.
/// @param FuncSize Cost of specializing a function.
/// @param AllSpecs A vector to add potential specializations to.
/// @param SM A map for a function's specialisation range
/// @return True, if any potential specializations were found
bool findSpecializations(Function *F, unsigned SpecCost,
bool findSpecializations(Function *F, unsigned FuncSize,
SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);

/// Compute the inlining bonus for replacing argument \p A with constant \p C.
unsigned getInliningBonus(Argument *A, Constant *C);

bool isCandidateFunction(Function *F);

/// @brief Create a specialization of \p F and prime the SCCPSolver
Expand Down
105 changes: 70 additions & 35 deletions llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,25 @@ static cl::opt<unsigned> MaxBlockPredecessors(
"considered during the estimation of dead code"));

static cl::opt<unsigned> MinFunctionSize(
"funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc(
"funcspec-min-function-size", cl::init(300), cl::Hidden, cl::desc(
"Don't specialize functions that have less than this number of "
"instructions"));

static cl::opt<unsigned> MinCodeSizeSavings(
"funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc(
"Reject specializations whose codesize savings are less than this"
"much percent of the original function size"));

static cl::opt<unsigned> MinLatencySavings(
"funcspec-min-latency-savings", cl::init(70), cl::Hidden, cl::desc(
"Reject specializations whose latency savings are less than this"
"much percent of the original function size"));

static cl::opt<unsigned> MinInliningBonus(
"funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
"Reject specializations whose inlining bonus is less than this"
"much percent of the original function size"));

static cl::opt<bool> SpecializeOnAddress(
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
"Enable function specialization on the address of global values"));
Expand Down Expand Up @@ -180,6 +195,22 @@ Bonus InstCostVisitor::getBonusFromPendingPHIs() {
return B;
}

/// Compute a bonus for replacing argument \p A with constant \p C.
Bonus InstCostVisitor::getSpecializationBonus(Argument *A, Constant *C) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
<< C->getNameOrAsOperand() << "\n");
Bonus B;
for (auto *U : A->users())
if (auto *UI = dyn_cast<Instruction>(U))
if (isBlockExecutable(UI->getParent()))
B += getUserBonus(UI, A, C);

LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "
<< B.CodeSize << ", Latency = " << B.Latency
<< "} for argument " << *A << "\n");
return B;
}

Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
// We have already propagated a constant for this user.
if (KnownConstants.contains(User))
Expand Down Expand Up @@ -589,15 +620,15 @@ bool FunctionSpecializer::run() {
int64_t Sz = *Metrics.NumInsts.getValue();
assert(Sz > 0 && "CodeSize should be positive");
// It is safe to down cast from int64_t, NumInsts is always positive.
unsigned SpecCost = static_cast<unsigned>(Sz);
unsigned FuncSize = static_cast<unsigned>(Sz);

LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
<< F.getName() << " is " << SpecCost << "\n");
<< F.getName() << " is " << FuncSize << "\n");

if (Inserted && Metrics.isRecursive)
promoteConstantStackValues(&F);

if (!findSpecializations(&F, SpecCost, AllSpecs, SM)) {
if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: No possible specializations found for "
<< F.getName() << "\n");
Expand Down Expand Up @@ -732,7 +763,7 @@ static Function *cloneCandidateFunction(Function *F) {
return Clone;
}

bool FunctionSpecializer::findSpecializations(Function *F, unsigned SpecCost,
bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
SmallVectorImpl<Spec> &AllSpecs,
SpecMap &SM) {
// A mapping from a specialisation signature to the index of the respective
Expand Down Expand Up @@ -799,21 +830,42 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned SpecCost,
} else {
// Calculate the specialisation gain.
Bonus B;
unsigned Score = 0;
InstCostVisitor Visitor = getInstCostVisitorFor(F);
for (ArgInfo &A : S.Args)
B += getSpecializationBonus(A.Formal, A.Actual, Visitor);
for (ArgInfo &A : S.Args) {
B += Visitor.getSpecializationBonus(A.Formal, A.Actual);
Score += getInliningBonus(A.Formal, A.Actual);
}
B += Visitor.getBonusFromPendingPHIs();

LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score {CodeSize = "

LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "
<< B.CodeSize << ", Latency = " << B.Latency
<< "}\n");
<< ", Inlining = " << Score << "}\n");

auto IsProfitable = [&FuncSize](Bonus &B, unsigned Score) -> bool {
// No check required.
if (ForceSpecialization)
return true;
// Minimum inlining bonus.
if (Score > MinInliningBonus * FuncSize / 100)
return true;
// Minimum codesize savings.
if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100)
return false;
// Minimum latency savings.
if (B.Latency < MinLatencySavings * FuncSize / 100)
return false;
return true;
};

// Discard unprofitable specialisations.
if (!ForceSpecialization && B.Latency <= SpecCost - B.CodeSize)
if (!IsProfitable(B, Score))
continue;

// Create a new specialisation entry.
auto &Spec = AllSpecs.emplace_back(F, S, B.Latency);
Score += std::max(B.CodeSize, B.Latency);
auto &Spec = AllSpecs.emplace_back(F, S, Score);
if (CS.getFunction() != F)
Spec.CallSites.push_back(&CS);
const unsigned Index = AllSpecs.size() - 1;
Expand Down Expand Up @@ -879,31 +931,14 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
return Clone;
}

/// Compute a bonus for replacing argument \p A with constant \p C.
Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
InstCostVisitor &Visitor) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
<< C->getNameOrAsOperand() << "\n");

Bonus B;
for (auto *U : A->users())
if (auto *UI = dyn_cast<Instruction>(U))
if (Visitor.isBlockExecutable(UI->getParent()))
B += Visitor.getUserBonus(UI, A, C);

LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "
<< B.CodeSize << ", Latency = " << B.Latency
<< "} for argument " << *A << "\n");

// The below heuristic is only concerned with exposing inlining
// opportunities via indirect call promotion. If the argument is not a
// (potentially casted) function pointer, give up.
//
// TODO: Perhaps we should consider checking such inlining opportunities
// while traversing the users of the specialization arguments ?
/// Compute the inlining bonus for replacing argument \p A with constant \p C.
/// The below heuristic is only concerned with exposing inlining
/// opportunities via indirect call promotion. If the argument is not a
/// (potentially casted) function pointer, give up.
unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) {
Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
if (!CalledFunction)
return B;
return 0;

// Get TTI for the called function (used for the inline cost).
auto &CalleeTTI = (GetTTI)(*CalledFunction);
Expand Down Expand Up @@ -948,7 +983,7 @@ Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
<< " for user " << *U << "\n");
}

return B += {0, InliningBonus};
return InliningBonus > 0 ? static_cast<unsigned>(InliningBonus) : 0;
}

/// Determine if it is possible to specialise the function for constant values
Expand Down
26 changes: 13 additions & 13 deletions llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,13 @@ TEST_F(FunctionSpecializationTest, SwitchInst) {

// mul
Bonus Ref = getInstCost(Mul);
Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);

// and + or + add
Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add);
Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
Test = Visitor.getSpecializationBonus(F->getArg(1), One);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);

Expand All @@ -183,7 +183,7 @@ TEST_F(FunctionSpecializationTest, SwitchInst) {
getInstCost(Sdiv, /*SizeOnly =*/ true) +
getInstCost(BrBB2, /*SizeOnly =*/ true) +
getInstCost(BrLoop, /*SizeOnly =*/ true);
Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
Test = Visitor.getSpecializationBonus(F->getArg(2), One);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
}
Expand Down Expand Up @@ -235,13 +235,13 @@ TEST_F(FunctionSpecializationTest, BranchInst) {

// mul
Bonus Ref = getInstCost(Mul);
Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);

// add
Ref = getInstCost(Add);
Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
Test = Visitor.getSpecializationBonus(F->getArg(1), One);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);

Expand All @@ -252,7 +252,7 @@ TEST_F(FunctionSpecializationTest, BranchInst) {
getInstCost(Sdiv, /*SizeOnly =*/ true) +
getInstCost(BrBB2, /*SizeOnly =*/ true) +
getInstCost(BrLoop, /*SizeOnly =*/ true);
Test = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor);
Test = Visitor.getSpecializationBonus(F->getArg(2), False);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
}
Expand Down Expand Up @@ -301,24 +301,24 @@ TEST_F(FunctionSpecializationTest, Misc) {

// icmp + zext
Bonus Ref = getInstCost(Icmp) + getInstCost(Zext);
Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);

// select
Ref = getInstCost(Select);
Test = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor);
Test = Visitor.getSpecializationBonus(F->getArg(1), True);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);

// gep + load + freeze + smax
Ref = getInstCost(Gep) + getInstCost(Load) + getInstCost(Freeze) +
getInstCost(Smax);
Test = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor);
Test = Visitor.getSpecializationBonus(F->getArg(2), GV);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);

Test = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor);
Test = Visitor.getSpecializationBonus(F->getArg(3), Undef);
EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
}

Expand Down Expand Up @@ -369,17 +369,17 @@ TEST_F(FunctionSpecializationTest, PhiNode) {
Instruction &Icmp = *++BB.begin();
Instruction &Branch = BB.back();

Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One);
EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);

Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
Test = Visitor.getSpecializationBonus(F->getArg(1), One);
EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);

// switch + phi + br
Bonus Ref = getInstCost(Switch) +
getInstCost(PhiCase2, /*SizeOnly =*/ true) +
getInstCost(BrBB, /*SizeOnly =*/ true);
Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
Test = Visitor.getSpecializationBonus(F->getArg(2), One);
EXPECT_EQ(Test, Ref);
EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);

Expand Down

0 comments on commit d1b376f

Please sign in to comment.