Skip to content

Commit

Permalink
[PartialInlining] Shrinkwrap allocas with live range contained in out…
Browse files Browse the repository at this point in the history
…line region.

Differential Revision: http://reviews.llvm.org/D33618

llvm-svn: 304245
  • Loading branch information
david-xl committed May 30, 2017
1 parent 73141d5 commit 74480ad
Show file tree
Hide file tree
Showing 6 changed files with 355 additions and 8 deletions.
13 changes: 12 additions & 1 deletion llvm/include/llvm/Transforms/Utils/CodeExtractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ template <typename T> class ArrayRef;
class BranchProbabilityInfo;
class DominatorTree;
class Function;
class Instruction;
class Loop;
class Module;
class RegionNode;
Expand Down Expand Up @@ -103,7 +104,17 @@ template <typename T> class ArrayRef;
/// a code sequence, that sequence is modified, including changing these
/// sets, before extraction occurs. These modifications won't have any
/// significant impact on the cost however.
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const;
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
const ValueSet &Allocas) const;
/// Find the set of allocas whose life ranges are contained within the
/// outlined region.
///
/// Allocas which have life_time markers contained in the outlined region
/// should be pushed to the outlined function. The address bitcasts that
/// are used by the lifetime markers are also candidates for shrink-
/// wrapping. The instructions that need to be sinked are collected in
/// 'Allocas'.
void findAllocas(ValueSet &Allocas) const;

private:
void severSplitPHINodes(BasicBlock *&Header);
Expand Down
83 changes: 76 additions & 7 deletions llvm/lib/Transforms/Utils/CodeExtractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
Expand Down Expand Up @@ -141,16 +142,77 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
return false;
}

void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
ValueSet &Outputs) const {
void CodeExtractor::findAllocas(ValueSet &SinkCands) const {
Function *Func = (*Blocks.begin())->getParent();
for (BasicBlock &BB : *Func) {
if (Blocks.count(&BB))
continue;
for (Instruction &II : BB) {
auto *AI = dyn_cast<AllocaInst>(&II);
if (!AI)
continue;

// Returns true if matching life time markers are found within
// the outlined region.
auto GetLifeTimeMarkers = [&](Instruction *Addr) {
Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
for (User *U : Addr->users()) {
if (!definedInRegion(Blocks, U))
return false;

IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
if (IntrInst) {
if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start)
LifeStart = IntrInst;
if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
LifeEnd = IntrInst;
}
}
return LifeStart && LifeEnd;
};

if (GetLifeTimeMarkers(AI)) {
SinkCands.insert(AI);
continue;
}

// Follow the bitcast:
Instruction *MarkerAddr = nullptr;
for (User *U : AI->users()) {
if (U->stripPointerCasts() == AI) {
Instruction *Bitcast = cast<Instruction>(U);
if (GetLifeTimeMarkers(Bitcast)) {
MarkerAddr = Bitcast;
continue;
}
}
if (!definedInRegion(Blocks, U)) {
MarkerAddr = nullptr;
break;
}
}
if (MarkerAddr) {
if (!definedInRegion(Blocks, MarkerAddr))
SinkCands.insert(MarkerAddr);
SinkCands.insert(AI);
}
}
}
}

void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
const ValueSet &SinkCands) const {

for (BasicBlock *BB : Blocks) {
// If a used value is defined outside the region, it's an input. If an
// instruction is used outside the region, it's an output.
for (Instruction &II : *BB) {
for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
++OI)
if (definedInCaller(Blocks, *OI))
Inputs.insert(*OI);
++OI) {
Value *V = *OI;
if (!SinkCands.count(V) && definedInCaller(Blocks, V))
Inputs.insert(V);
}

for (User *U : II.users())
if (!definedInRegion(Blocks, U)) {
Expand Down Expand Up @@ -718,7 +780,7 @@ Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;

ValueSet inputs, outputs;
ValueSet inputs, outputs, SinkingCands;

// Assumption: this is a single-entry code region, and the header is the first
// block in the region.
Expand Down Expand Up @@ -757,8 +819,15 @@ Function *CodeExtractor::extractCodeRegion() {
"newFuncRoot");
newFuncRoot->getInstList().push_back(BranchInst::Create(header));

findAllocas(SinkingCands);

// Find inputs to, outputs from the code region.
findInputsOutputs(inputs, outputs);
findInputsOutputs(inputs, outputs, SinkingCands);

// Now sink all instructions which only have non-phi uses inside the region
for (auto *II : SinkingCands)
cast<Instruction>(II)->moveBefore(*newFuncRoot,
newFuncRoot->getFirstInsertionPt());

// Calculate the exit blocks for the extracted region and the total exit
// weights for each of those blocks.
Expand Down
68 changes: 68 additions & 0 deletions llvm/test/Transforms/CodeExtractor/PartialInlineAlloca.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@

; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s

%"class.base" = type { %"struct.base"* }
%"struct.base" = type opaque

@g = external local_unnamed_addr global i32, align 4

; Function Attrs: nounwind uwtable
define i32 @callee_sinkable_bitcast(i32 %arg) local_unnamed_addr #0 {
; CHECK-LABEL:define{{.*}}@callee_sinkable_bitcast.{{[0-9]}}
; CHECK: alloca
; CHECK-NEXT: bitcast
; CHECK: call void @llvm.lifetime
bb:
%tmp = alloca %"class.base", align 4
%tmp1 = bitcast %"class.base"* %tmp to i8*
%tmp2 = load i32, i32* @g, align 4, !tbaa !2
%tmp3 = add nsw i32 %tmp2, 1
%tmp4 = icmp slt i32 %arg, 0
br i1 %tmp4, label %bb6, label %bb5

bb5: ; preds = %bb
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2
%tmp11 = bitcast %"class.base"* %tmp to i32*
store i32 %tmp3, i32* %tmp11, align 4, !tbaa !2
store i32 %tmp3, i32* @g, align 4, !tbaa !2
call void @bar(i32* nonnull %tmp11) #2
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2
br label %bb6

bb6: ; preds = %bb5, %bb
%tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
ret i32 %tmp7
}

declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1

declare void @bar(i32*) local_unnamed_addr #2
declare void @bar2(i32*, i32*) local_unnamed_addr #1


; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1

; Function Attrs: nounwind uwtable
define i32 @caller(i32 %arg) local_unnamed_addr #0 {
bb:
%tmp = tail call i32 @callee_sinkable_bitcast(i32 %arg)
ret i32 %tmp
}

attributes #0 = { nounwind uwtable}
attributes #1 = { argmemonly nounwind }
attributes #2 = { nounwind }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 303574)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"int", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}


65 changes: 65 additions & 0 deletions llvm/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s

%"class.base" = type { %"struct.base"* }
%"struct.base" = type opaque

@g = external local_unnamed_addr global i32, align 4

define i32 @callee_no_bitcast(i32 %arg) local_unnamed_addr #0 {
; CHECK-LABEL:define{{.*}}@callee_no_bitcast.{{[0-9]}}
; CHECK: alloca
; CHECK: call void @llvm.lifetime
bb:
%tmp = alloca i8, align 4
%tmp2 = load i32, i32* @g, align 4, !tbaa !2
%tmp3 = add nsw i32 %tmp2, 1
%tmp4 = icmp slt i32 %arg, 0
br i1 %tmp4, label %bb6, label %bb5

bb5: ; preds = %bb
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
store i32 %tmp3, i32* @g, align 4, !tbaa !2
%tmp11 = bitcast i8 * %tmp to i32*
call void @bar(i32* nonnull %tmp11) #2
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
br label %bb6

bb6: ; preds = %bb5, %bb
%tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
ret i32 %tmp7
}

; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1

declare void @bar(i32*) local_unnamed_addr #2
declare void @bar2(i32*, i32*) local_unnamed_addr #1


; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1

; Function Attrs: nounwind uwtable
define i32 @caller(i32 %arg) local_unnamed_addr #0 {
bb:
%tmp = tail call i32 @callee_no_bitcast(i32 %arg)
ret i32 %tmp
}

attributes #0 = { nounwind uwtable}
attributes #1 = { argmemonly nounwind }
attributes #2 = { nounwind }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 303574)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"int", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}



67 changes: 67 additions & 0 deletions llvm/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s

%"class.base" = type { %"struct.base"* }
%"struct.base" = type opaque

@g = external local_unnamed_addr global i32, align 4

define i32 @callee_unknown_use1(i32 %arg) local_unnamed_addr #0 {
; CHECK-LABEL:define{{.*}}@callee_unknown_use1.{{[0-9]}}
; CHECK-NOT: alloca
; CHECK: call void @llvm.lifetime
bb:
%tmp = alloca i8, align 4
%tmp2 = load i32, i32* @g, align 4, !tbaa !2
%tmp3 = add nsw i32 %tmp2, 1
%tmp4 = icmp slt i32 %arg, 0
br i1 %tmp4, label %bb6, label %bb5

bb5: ; preds = %bb
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
store i32 %tmp3, i32* @g, align 4, !tbaa !2
%tmp11 = bitcast i8* %tmp to i32*
call void @bar(i32* nonnull %tmp11) #2
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
br label %bb6

bb6: ; preds = %bb5, %bb
%tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
%tmp1 = bitcast i8* %tmp to i32*
ret i32 %tmp7
}


; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1

declare void @bar(i32*) local_unnamed_addr #2
declare void @bar2(i32*, i32*) local_unnamed_addr #1


; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1

; Function Attrs: nounwind uwtable
define i32 @caller(i32 %arg) local_unnamed_addr #0 {
bb:
%tmp = tail call i32 @callee_unknown_use1(i32 %arg)
ret i32 %tmp
}

attributes #0 = { nounwind uwtable}
attributes #1 = { argmemonly nounwind }
attributes #2 = { nounwind }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 303574)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"int", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}



0 comments on commit 74480ad

Please sign in to comment.