Skip to content

Commit

Permalink
Recommit r325001: [CallSiteSplitting] Support splitting of blocks wit…
Browse files Browse the repository at this point in the history
…h instrs before call.

For basic blocks with instructions between the beginning of the block
and a call we have to duplicate the instructions before the call in all
split blocks and add PHI nodes for uses of the duplicated instructions
after the call.

Currently, the threshold for the number of instructions before a call
is quite low, to keep the impact on binary size low.

Reviewers: junbuml, mcrosier, davidxl, davide

Reviewed By: junbuml

Differential Revision: https://reviews.llvm.org/D41860

llvm-svn: 325126
  • Loading branch information
fhahn committed Feb 14, 2018
1 parent de300e6 commit b4e3bad
Show file tree
Hide file tree
Showing 7 changed files with 429 additions and 151 deletions.
99 changes: 77 additions & 22 deletions llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
Expand Up @@ -59,11 +59,13 @@
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"

using namespace llvm;
Expand All @@ -73,6 +75,15 @@ using namespace PatternMatch;

STATISTIC(NumCallSiteSplit, "Number of call-site split");

/// Only allow instructions before a call, if their CodeSize cost is below
/// DuplicationThreshold. Those instructions need to be duplicated in all
/// split blocks.
static cl::opt<unsigned>
DuplicationThreshold("callsite-splitting-duplication-threshold", cl::Hidden,
cl::desc("Only allow instructions before a call, if "
"their cost is below DuplicationThreshold"),
cl::init(5));

static void addNonNullAttribute(CallSite CS, Value *Op) {
unsigned ArgNo = 0;
for (auto &I : CS.args()) {
Expand Down Expand Up @@ -168,20 +179,26 @@ static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) {
return Preds;
}

static bool canSplitCallSite(CallSite CS) {
static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
// FIXME: As of now we handle only CallInst. InvokeInst could be handled
// without too much effort.
Instruction *Instr = CS.getInstruction();
if (!isa<CallInst>(Instr))
return false;

// Allow splitting a call-site only when there is no instruction before the
// call-site in the basic block. Based on this constraint, we only clone the
// call instruction, and we do not move a call-site across any other
// instruction.
BasicBlock *CallSiteBB = Instr->getParent();
if (Instr != CallSiteBB->getFirstNonPHIOrDbg())
return false;
// Allow splitting a call-site only when the CodeSize cost of the
// instructions before the call is less then DuplicationThreshold. The
// instructions before the call will be duplicated in the split blocks and
// corresponding uses will be updated.
unsigned Cost = 0;
for (auto &InstBeforeCall :
llvm::make_range(CallSiteBB->begin(), Instr->getIterator())) {
Cost += TTI.getInstructionCost(&InstBeforeCall,
TargetTransformInfo::TCK_CodeSize);
if (Cost >= DuplicationThreshold)
return false;
}

// Need 2 predecessors and cannot split an edge from an IndirectBrInst.
SmallVector<BasicBlock *, 2> Preds(predecessors(CallSiteBB));
Expand Down Expand Up @@ -246,16 +263,21 @@ static void splitCallSite(
CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call");

DEBUG(dbgs() << "split call-site : " << *Instr << " into \n");
for (const auto &P : Preds) {
BasicBlock *PredBB = P.first;
BasicBlock *SplitBlock =
SplitBlockPredecessors(TailBB, PredBB, ".predBB.split");

assert(Preds.size() == 2 && "The ValueToValueMaps array has size 2.");
// ValueToValueMapTy is neither copy nor moveable, so we use a simple array
// here.
ValueToValueMapTy ValueToValueMaps[2];
for (unsigned i = 0; i < Preds.size(); i++) {
BasicBlock *PredBB = Preds[i].first;
BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i]);
assert(SplitBlock && "Unexpected new basic block split.");

Instruction *NewCI = Instr->clone();
Instruction *NewCI =
&*std::prev(SplitBlock->getTerminator()->getIterator());
CallSite NewCS(NewCI);
addConditions(NewCS, P.second);
NewCI->insertBefore(&*SplitBlock->getFirstInsertionPt());
addConditions(NewCS, Preds[i].second);

// Handle PHIs used as arguments in the call-site.
for (PHINode &PN : TailBB->phis()) {
Expand All @@ -273,13 +295,41 @@ static void splitCallSite(
CallPN->addIncoming(NewCI, SplitBlock);
}

auto *OriginalBegin = &*TailBB->begin();
// Replace users of the original call with a PHI mering call-sites split.
if (CallPN) {
CallPN->insertBefore(TailBB->getFirstNonPHI());
CallPN->insertBefore(OriginalBegin);
Instr->replaceAllUsesWith(CallPN);
}

Instr->eraseFromParent();
// Remove instructions moved to split blocks from TailBB, from the duplicated
// call instruction to the beginning of the basic block. If an instruction
// has any uses, add a new PHI node to combine the values coming from the
// split blocks. The new PHI nodes are placed before the first original
// instruction, so we do not end up deleting them. By using reverse-order, we
// do not introduce unnecessary PHI nodes for def-use chains from the call
// instruction to the beginning of the block.
auto I = Instr->getReverseIterator();
while (I != TailBB->rend()) {
Instruction *CurrentI = &*I++;
if (!CurrentI->use_empty()) {
// If an existing PHI has users after the call, there is no need to create
// a new one.
if (isa<PHINode>(CurrentI))
continue;
PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size());
for (auto &Mapping : ValueToValueMaps)
NewPN->addIncoming(Mapping[CurrentI],
cast<Instruction>(Mapping[CurrentI])->getParent());
NewPN->insertBefore(&*TailBB->begin());
CurrentI->replaceAllUsesWith(NewPN);
}
CurrentI->eraseFromParent();
// We are done once we handled the first original instruction in TailBB.
if (CurrentI == OriginalBegin)
break;
}

NumCallSiteSplit++;
}

Expand Down Expand Up @@ -344,14 +394,15 @@ static bool tryToSplitOnPredicatedArgument(CallSite CS) {
return true;
}

static bool tryToSplitCallSite(CallSite CS) {
if (!CS.arg_size() || !canSplitCallSite(CS))
static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
return false;
return tryToSplitOnPredicatedArgument(CS) ||
tryToSplitOnPHIPredicatedArgument(CS);
}

static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) {
static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
TargetTransformInfo &TTI) {
bool Changed = false;
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
BasicBlock &BB = *BI++;
Expand All @@ -364,7 +415,7 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) {
Function *Callee = CS.getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;
Changed |= tryToSplitCallSite(CS);
Changed |= tryToSplitCallSite(CS, TTI);
}
}
return Changed;
Expand All @@ -379,6 +430,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}

Expand All @@ -387,7 +439,8 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
return false;

auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
return doCallSiteSplitting(F, TLI);
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
return doCallSiteSplitting(F, TLI, TTI);
}
};
} // namespace
Expand All @@ -396,6 +449,7 @@ char CallSiteSplittingLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
FunctionPass *llvm::createCallSiteSplittingPass() {
Expand All @@ -405,8 +459,9 @@ FunctionPass *llvm::createCallSiteSplittingPass() {
PreservedAnalyses CallSiteSplittingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);

if (!doCallSiteSplitting(F, TLI))
if (!doCallSiteSplitting(F, TLI, TTI))
return PreservedAnalyses::all();
PreservedAnalyses PA;
return PA;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Other/new-pm-lto-defaults.ll
Expand Up @@ -35,6 +35,7 @@
; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
; CHECK-O2-NEXT: Running pass: CallSiteSplittingPass on foo
; CHECK-O2-NEXT: Running analysis: TargetLibraryAnalysis on foo
; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo
; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
; CHECK-O2-NEXT: PGOIndirectCallPromotion
; CHECK-O2-NEXT: Running analysis: ProfileSummaryAnalysis
Expand Down Expand Up @@ -79,7 +80,6 @@
; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass>
; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis
; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo
; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis
; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass
; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass>
Expand Down

0 comments on commit b4e3bad

Please sign in to comment.