Skip to content

Commit

Permalink
[PartialInlining] Fix Crash from holding a reference to a destructed …
Browse files Browse the repository at this point in the history
…ORE.

The callback used to create an ORE for the legacy PI pass caches the allocated
object in a unique_ptr in the runOnModule function, and returns a reference to
that object. Under certian circumstances we can end up holding onto that
reference after the OREs destruction. Rather then allowing the new and legacy
passes to create ORE object in diffrent ways, create the ORE at the point of
use.

Differential Revision: https://reviews.llvm.org/D43219

llvm-svn: 330473
  • Loading branch information
Sean Fertile committed Apr 20, 2018
1 parent 5061b37 commit 18f1733
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 37 deletions.
57 changes: 20 additions & 37 deletions llvm/lib/Transforms/IPO/PartialInlining.cpp
Expand Up @@ -202,10 +202,8 @@ struct PartialInlinerImpl {
std::function<AssumptionCache &(Function &)> *GetAC,
std::function<TargetTransformInfo &(Function &)> *GTTI,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
ProfileSummaryInfo *ProfSI,
std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
: GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI),
GetORE(GORE) {}
ProfileSummaryInfo *ProfSI)
: GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}

bool run(Module &M);
// Main part of the transformation that calls helper functions to find
Expand Down Expand Up @@ -271,7 +269,6 @@ struct PartialInlinerImpl {
std::function<TargetTransformInfo &(Function &)> *GetTTI;
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
ProfileSummaryInfo *PSI;
std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;

// Return the frequency of the OutlininingBB relative to F's entry point.
// The result is no larger than 1 and is represented using BP.
Expand All @@ -282,7 +279,8 @@ struct PartialInlinerImpl {
// Return true if the callee of CS should be partially inlined with
// profit.
bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
BlockFrequency WeightedOutliningRcost);
BlockFrequency WeightedOutliningRcost,
OptimizationRemarkEmitter &ORE);

// Try to inline DuplicateFunction (cloned from F with call to
// the OutlinedFunction into its callers. Return true
Expand Down Expand Up @@ -337,7 +335,7 @@ struct PartialInlinerImpl {

std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
std::unique_ptr<FunctionOutliningMultiRegionInfo>
computeOutliningColdRegionsInfo(Function *F);
computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE);
};

struct PartialInlinerLegacyPass : public ModulePass {
Expand All @@ -362,7 +360,6 @@ struct PartialInlinerLegacyPass : public ModulePass {
&getAnalysis<TargetTransformInfoWrapperPass>();
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
std::unique_ptr<OptimizationRemarkEmitter> UPORE;

std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&ACT](Function &F) -> AssumptionCache & {
Expand All @@ -374,22 +371,16 @@ struct PartialInlinerLegacyPass : public ModulePass {
return TTIWP->getTTI(F);
};

std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
[&UPORE](Function &F) -> OptimizationRemarkEmitter & {
UPORE.reset(new OptimizationRemarkEmitter(&F));
return *UPORE.get();
};

return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI,
&GetORE)
return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI)
.run(M);
}
};

} // end anonymous namespace

std::unique_ptr<FunctionOutliningMultiRegionInfo>
PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) {
PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
OptimizationRemarkEmitter &ORE) {
BasicBlock *EntryBlock = &F->front();

DominatorTree DT(*F);
Expand All @@ -403,8 +394,6 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) {
} else
BFI = &(*GetBFI)(*F);

auto &ORE = (*GetORE)(*F);

// Return if we don't have profiling information.
if (!PSI->hasInstrumentationProfile())
return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
Expand Down Expand Up @@ -766,7 +755,8 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {

bool PartialInlinerImpl::shouldPartialInline(
CallSite CS, FunctionCloner &Cloner,
BlockFrequency WeightedOutliningRcost) {
BlockFrequency WeightedOutliningRcost,
OptimizationRemarkEmitter &ORE) {
using namespace ore;

Instruction *Call = CS.getInstruction();
Expand All @@ -778,7 +768,6 @@ bool PartialInlinerImpl::shouldPartialInline(

Function *Caller = CS.getCaller();
auto &CalleeTTI = (*GetTTI)(*Callee);
auto &ORE = (*GetORE)(*Caller);
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
*GetAssumptionCache, GetBFI, PSI, &ORE);

Expand Down Expand Up @@ -1270,14 +1259,14 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (F->user_begin() == F->user_end())
return {false, nullptr};

auto &ORE = (*GetORE)(*F);
OptimizationRemarkEmitter ORE(F);

// Only try to outline cold regions if we have a profile summary, which
// implies we have profiling information.
if (PSI->hasProfileSummary() && F->hasProfileData() &&
!DisableMultiRegionPartialInline) {
std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
computeOutliningColdRegionsInfo(F);
computeOutliningColdRegionsInfo(F, ORE);
if (OMRI) {
FunctionCloner Cloner(F, OMRI.get(), ORE);

Expand Down Expand Up @@ -1357,11 +1346,11 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
// inlining the function with outlining (The inliner uses the size increase to
// model the cost of inlining a callee).
if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
auto &ORE = (*GetORE)(*Cloner.OrigFunc);
OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
DebugLoc DLoc;
BasicBlock *Block;
std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
ORE.emit([&]() {
OrigFuncORE.emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
DLoc, Block)
<< ore::NV("Function", Cloner.OrigFunc)
Expand Down Expand Up @@ -1394,11 +1383,10 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
if (IsLimitReached())
continue;


if (!shouldPartialInline(CS, Cloner, WeightedRcost))
OptimizationRemarkEmitter CallerORE(CS.getCaller());
if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
continue;

auto &ORE = (*GetORE)(*CS.getCaller());
// Construct remark before doing the inlining, as after successful inlining
// the callsite is removed.
OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
Expand All @@ -1413,7 +1401,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
: nullptr)))
continue;

ORE.emit(OR);
CallerORE.emit(OR);

// Now update the entry count:
if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
Expand All @@ -1436,8 +1424,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
if (CalleeEntryCount)
Cloner.OrigFunc->setEntryCount(
CalleeEntryCount.setCount(CalleeEntryCountV));
auto &ORE = (*GetORE)(*Cloner.OrigFunc);
ORE.emit([&]() {
OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
OrigFuncORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
<< "Partially inlined into at least one caller";
});
Expand Down Expand Up @@ -1519,14 +1507,9 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
return FAM.getResult<TargetIRAnalysis>(F);
};

std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
[&FAM](Function &F) -> OptimizationRemarkEmitter & {
return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
};

ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);

if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE)
if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
.run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
Expand Down
170 changes: 170 additions & 0 deletions llvm/test/Transforms/CodeExtractor/PartialInlineORECrash.ll
@@ -0,0 +1,170 @@
; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -inline-threshold=0 -disable-output

target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"

%0 = type { i32 (...)**, %1, %1, %3, %3, %3, i8, float, %4*, %5*, %5*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] }
%1 = type { %2, %3 }
%2 = type { [3 x %3] }
%3 = type { [4 x float] }
%4 = type <{ i8*, i16, i16, [4 x i8], i8*, i32, %3, %3, [4 x i8] }>
%5 = type { i32 (...)**, i32, i8* }
%6 = type <{ %7, [4 x i8], %19*, %20*, %30, %35, %3, float, i8, i8, i8, i8, %37, i32, [4 x i8] }>
%7 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17 }>
%8 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8 }>
%9 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %0**, i8, [7 x i8] }>
%11 = type { i32 (...)** }
%12 = type { float, i32, i32, float, i8, %15*, i8, i8, i8, float, i8, float, %13* }
%13 = type opaque
%14 = type { i32 (...)** }
%15 = type { i32 (...)** }
%16 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17, [4 x i8] }>
%17 = type { %18 }
%18 = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 }
%19 = type { i32 (...)** }
%20 = type <{ i32 (...)**, %21, %25, %9, i8, [7 x i8] }>
%21 = type { %22 }
%22 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %24*, i8, [7 x i8] }>
%24 = type { i32, i32 }
%25 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %27**, i8, [7 x i8] }>
%27 = type { i32, [4 x i8], [4 x %29], i8*, i8*, i32, float, float, i32 }
%29 = type <{ %3, %3, %3, %3, %3, float, float, float, i32, i32, i32, i32, [4 x i8], i8*, float, i8, [3 x i8], float, float, i32, %3, %3, [4 x i8] }>
%30 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %32**, i8, [7 x i8] }>
%32 = type { i32 (...)**, i32, i32, i32, i8, %33*, %33*, float, float, %3, %3, %3 }
%33 = type <{ %0, %2, %3, %3, float, %3, %3, %3, %3, %3, %3, %3, float, float, i8, [3 x i8], float, float, float, float, float, float, %34*, %30, i32, i32, i32, [4 x i8] }>
%34 = type { i32 (...)** }
%35 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %33**, i8, [7 x i8] }>
%37 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %39**, i8, [7 x i8] }>
%39 = type { i32 (...)** }
%40 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8, [7 x i8] }>

@gDisableDeactivation = external local_unnamed_addr global i8, align 1
@0 = external dso_local unnamed_addr constant [29 x i8], align 1
@1 = external dso_local unnamed_addr constant [14 x i8], align 1
@2 = external dso_local unnamed_addr constant [22 x i8], align 1
@gDeactivationTime = external local_unnamed_addr global float, align 4

declare void @_ZN15CProfileManager12Stop_ProfileEv() local_unnamed_addr

declare void @_ZN15CProfileManager13Start_ProfileEPKc(i8*) local_unnamed_addr

declare void @_ZN17btCollisionObject18setActivationStateEi(%0*, i32 signext) local_unnamed_addr

declare hidden void @__clang_call_terminate(i8*) local_unnamed_addr

declare i32 @__gxx_personality_v0(...)

; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #0

define void @_ZN23btDiscreteDynamicsWorld28internalSingleStepSimulationEf(%6*, float) unnamed_addr align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !27 {
invoke void null(%6* nonnull %0, float %1)
to label %5 unwind label %3

; <label>:3: ; preds = %2
%4 = landingpad { i8*, i32 }
cleanup
br label %16

; <label>:5: ; preds = %2
%6 = invoke %15* null(%40* null)
to label %11 unwind label %13

; <label>:7: ; preds = %5
invoke void null(%40* null)
to label %8 unwind label %13

; <label>:8: ; preds = %7
invoke void null(%6* nonnull %0)
to label %9 unwind label %13

; <label>:9: ; preds = %8
invoke void null(%6* nonnull %0, %17* nonnull dereferenceable(68) null)
to label %10 unwind label %13

; <label>:10: ; preds = %9
invoke void null(%6* nonnull %0, float %1)
to label %11 unwind label %13

; <label>:11:
invoke void @_ZN23btDiscreteDynamicsWorld21updateActivationStateEf(%6* nonnull %0, float %1)
to label %12 unwind label %13

; <label>:12:
ret void

; <label>:13:
%14 = landingpad { i8*, i32 }
cleanup
%15 = extractvalue { i8*, i32 } %14, 0
br label %16


; <label>:16:
call void @_ZN15CProfileManager12Stop_ProfileEv()
resume { i8*, i32 } zeroinitializer
}

define void @_ZN23btDiscreteDynamicsWorld21updateActivationStateEf(%6* nocapture readonly, float) local_unnamed_addr align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !27 {
%3 = icmp sgt i32 0, 0
br i1 %3, label %4, label %5, !prof !29

; <label>:4: ; preds = %2
br i1 false, label %5, label %6, !prof !30

; <label>:5: ; preds = %7, %4, %2
ret void

; <label>:6: ; preds = %4
invoke void @_ZN17btCollisionObject18setActivationStateEi(%0* nonnull null, i32 signext 0)
to label %7 unwind label %8

; <label>:7: ; preds = %6
invoke void @_ZN17btCollisionObject18setActivationStateEi(%0* nonnull null, i32 signext 1)
to label %5 unwind label %8

; <label>:8: ; preds = %7, %6
%9 = landingpad { i8*, i32 }
cleanup
resume { i8*, i32 } %9
}

; Function Attrs: noreturn nounwind
declare void @llvm.trap() #1

attributes #0 = { argmemonly nounwind }
attributes #1 = { noreturn nounwind }

!llvm.module.flags = !{!0}

!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 6540578580}
!4 = !{!"MaxCount", i64 629805108}
!5 = !{!"MaxInternalCount", i64 40670372}
!6 = !{!"MaxFunctionCount", i64 629805108}
!7 = !{!"NumCounts", i64 8554}
!8 = !{!"NumFunctions", i64 3836}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13, !14, !15, !16, !16, !17, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26}
!11 = !{i32 10000, i64 629805108, i32 1}
!12 = !{i32 100000, i64 366853677, i32 2}
!13 = !{i32 200000, i64 196816893, i32 4}
!14 = !{i32 300000, i64 192575561, i32 7}
!15 = !{i32 400000, i64 130688163, i32 11}
!16 = !{i32 500000, i64 74857169, i32 19}
!17 = !{i32 600000, i64 48184151, i32 30}
!18 = !{i32 700000, i64 21298588, i32 49}
!19 = !{i32 800000, i64 10721033, i32 90}
!20 = !{i32 900000, i64 3301634, i32 202}
!21 = !{i32 950000, i64 1454952, i32 362}
!22 = !{i32 990000, i64 343872, i32 675}
!23 = !{i32 999000, i64 46009, i32 1112}
!24 = !{i32 999900, i64 6067, i32 1435}
!25 = !{i32 999990, i64 700, i32 1721}
!26 = !{i32 999999, i64 72, i32 1955}
!27 = !{!"function_entry_count", i64 700}
!28 = !{!"branch_weights", i32 701, i32 1}
!29 = !{!"branch_weights", i32 954001, i32 701}
!30 = !{!"branch_weights", i32 1, i32 954001}

0 comments on commit 18f1733

Please sign in to comment.