Skip to content

Commit

Permalink
[Partial Inliner] Compute intrinsic cost through TTI
Browse files Browse the repository at this point in the history
https://bugs.llvm.org/show_bug.cgi?id=45932

assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && "Outlined function cost should be no less than the outlined region") getting triggered in computeBBInlineCost.

Intrinsics like "assume" are considered regular function calls while computing costs.
This patch enables computeBBInlineCost to queries TTI for intrinsic call cost.

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D87132
  • Loading branch information
Dangeti Tharun kumar authored and fhahn committed Sep 16, 2020
1 parent 855ec51 commit 01e2b39
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 19 deletions.
64 changes: 45 additions & 19 deletions llvm/lib/Transforms/IPO/PartialInlining.cpp
Expand Up @@ -226,10 +226,13 @@ struct PartialInlinerImpl {
// multi-region outlining.
FunctionCloner(Function *F, FunctionOutliningInfo *OI,
OptimizationRemarkEmitter &ORE,
function_ref<AssumptionCache *(Function &)> LookupAC);
function_ref<AssumptionCache *(Function &)> LookupAC,
function_ref<TargetTransformInfo &(Function &)> GetTTI);
FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
OptimizationRemarkEmitter &ORE,
function_ref<AssumptionCache *(Function &)> LookupAC);
function_ref<AssumptionCache *(Function &)> LookupAC,
function_ref<TargetTransformInfo &(Function &)> GetTTI);

~FunctionCloner();

// Prepare for function outlining: making sure there is only
Expand Down Expand Up @@ -266,6 +269,7 @@ struct PartialInlinerImpl {
std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
OptimizationRemarkEmitter &ORE;
function_ref<AssumptionCache *(Function &)> LookupAC;
function_ref<TargetTransformInfo &(Function &)> GetTTI;
};

private:
Expand Down Expand Up @@ -334,7 +338,7 @@ struct PartialInlinerImpl {
// Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
// approximate both the size and runtime cost (Note that in the current
// inline cost analysis, there is no clear distinction there either).
static int computeBBInlineCost(BasicBlock *BB);
static int computeBBInlineCost(BasicBlock *BB, TargetTransformInfo *TTI);

std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
std::unique_ptr<FunctionOutliningMultiRegionInfo>
Expand Down Expand Up @@ -448,9 +452,10 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,

// Use the same computeBBInlineCost function to compute the cost savings of
// the outlining the candidate region.
TargetTransformInfo *FTTI = &GetTTI(*F);
int OverallFunctionCost = 0;
for (auto &BB : *F)
OverallFunctionCost += computeBBInlineCost(&BB);
OverallFunctionCost += computeBBInlineCost(&BB, FTTI);

#ifndef NDEBUG
if (TracePartialInlining)
Expand Down Expand Up @@ -509,7 +514,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
continue;
int OutlineRegionCost = 0;
for (auto *BB : DominateVector)
OutlineRegionCost += computeBBInlineCost(BB);
OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));

#ifndef NDEBUG
if (TracePartialInlining)
Expand Down Expand Up @@ -843,7 +848,8 @@ bool PartialInlinerImpl::shouldPartialInline(
// TODO: Ideally we should share Inliner's InlineCost Analysis code.
// For now use a simplified version. The returned 'InlineCost' will be used
// to esimate the size cost as well as runtime cost of the BB.
int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
TargetTransformInfo *TTI) {
int InlineCost = 0;
const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
for (Instruction &I : BB->instructionsWithoutDebug()) {
Expand All @@ -866,6 +872,21 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
if (I.isLifetimeStartOrEnd())
continue;

if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
Intrinsic::ID IID = II->getIntrinsicID();
SmallVector<Type *, 4> Tys;
FastMathFlags FMF;
for (Value *Val : II->args())
Tys.push_back(Val->getType());

if (auto *FPMO = dyn_cast<FPMathOperator>(II))
FMF = FPMO->getFastMathFlags();

IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
continue;
}

if (CallInst *CI = dyn_cast<CallInst>(&I)) {
InlineCost += getCallsiteCost(*CI, DL);
continue;
Expand Down Expand Up @@ -893,11 +914,13 @@ PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
BasicBlock* OutliningCallBB = FuncBBPair.second;
// Now compute the cost of the call sequence to the outlined function
// 'OutlinedFunction' in BB 'OutliningCallBB':
OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
OutliningFuncCallCost +=
computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);

// Now compute the cost of the extracted/outlined function itself:
for (BasicBlock &BB : *OutlinedFunc)
OutlinedFunctionCost += computeBBInlineCost(&BB);
OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
}
assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
Expand Down Expand Up @@ -962,8 +985,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(

PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
function_ref<AssumptionCache *(Function &)> LookupAC)
: OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
function_ref<AssumptionCache *(Function &)> LookupAC,
function_ref<TargetTransformInfo &(Function &)> GetTTI)
: OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
ClonedOI = std::make_unique<FunctionOutliningInfo>();

// Clone the function, so that we can hack away on it.
Expand All @@ -987,8 +1011,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningMultiRegionInfo *OI,
OptimizationRemarkEmitter &ORE,
function_ref<AssumptionCache *(Function &)> LookupAC)
: OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
function_ref<AssumptionCache *(Function &)> LookupAC,
function_ref<TargetTransformInfo &(Function &)> GetTTI)
: OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();

// Clone the function, so that we can hack away on it.
Expand Down Expand Up @@ -1099,10 +1124,10 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {

bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {

auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
auto ComputeRegionCost = [&](SmallVectorImpl<BasicBlock *> &Region) {
int Cost = 0;
for (BasicBlock* BB : Region)
Cost += computeBBInlineCost(BB);
Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
return Cost;
};

Expand Down Expand Up @@ -1196,17 +1221,18 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {

// Gather up the blocks that we're going to extract.
std::vector<BasicBlock *> ToExtract;
auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
ToExtract.push_back(ClonedOI->NonReturnBlock);
OutlinedRegionCost +=
PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
ClonedOI->NonReturnBlock, ClonedFuncTTI);
for (BasicBlock &BB : *ClonedFunc)
if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
ToExtract.push_back(&BB);
// FIXME: the code extractor may hoist/sink more code
// into the outlined function which may make the outlining
// overhead (the difference of the outlined function cost
// and OutliningRegionCost) look larger.
OutlinedRegionCost += computeBBInlineCost(&BB);
OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
}

// Extract the body of the if.
Expand Down Expand Up @@ -1276,7 +1302,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
computeOutliningColdRegionsInfo(F, ORE);
if (OMRI) {
FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache);
FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);

#ifndef NDEBUG
if (TracePartialInlining) {
Expand Down Expand Up @@ -1309,7 +1335,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (!OI)
return {false, nullptr};

FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache);
FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
Cloner.NormalizeReturnBlock();

Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
Expand Down
55 changes: 55 additions & 0 deletions llvm/test/Transforms/PartialInlining/intrinsic-call-cost.ll
@@ -0,0 +1,55 @@
; RUN: opt -partial-inliner -S < %s | FileCheck %s

; Checks that valid costs are computed for intrinsic calls.
; https://bugs.llvm.org/show_bug.cgi?id=45932


@emit_notes = external global i8, align 2

; CHECK: var_reg_delete
; CHECK-NEXT: bb
; CHECK-NEXT: tail call void @delete_variable_part()
; CHECK-NEXT: ret void

define void @var_reg_delete() {
bb:
tail call void @delete_variable_part()
ret void
}

; CHECK: delete_variable_part
; CHECK-NEXT: bb
; CHECK-NEXT: %tmp1.i = tail call i32 @find_variable_location_part()
; CHECK-NEXT: %tmp3.i = icmp sgt i32 %tmp1.i, -1
; CHECK-NEXT: br i1 %tmp3.i, label %bb4.i, label %delete_slot_part.exit

; CHECK: bb4.i
; CHECK-NEXT: %tmp.i.i = load i8, i8* @emit_notes
; CHECK-NEXT: %tmp1.i.i = icmp ne i8 %tmp.i.i, 0
; CHECK-NEXT: tail call void @llvm.assume(i1 %tmp1.i.i)
; CHECK-NEXT: unreachable

; CHECK: delete_slot_part.exit
; CHECK-NEXT: ret void

define void @delete_variable_part() {
bb:
%tmp1.i = tail call i32 @find_variable_location_part()
%tmp3.i = icmp sgt i32 %tmp1.i, -1
br i1 %tmp3.i, label %bb4.i, label %delete_slot_part.exit

bb4.i:
%tmp.i.i = load i8, i8* @emit_notes, align 2
%tmp1.i.i = icmp ne i8 %tmp.i.i, 0
tail call void @llvm.assume(i1 %tmp1.i.i)
unreachable

delete_slot_part.exit:
ret void
}

; CHECK: declare i32 @find_variable_location_part
declare i32 @find_variable_location_part()

; CHECK: declare void @llvm.assume(i1 noundef)
declare void @llvm.assume(i1 noundef)

0 comments on commit 01e2b39

Please sign in to comment.