Skip to content

Commit

Permalink
[PGO] Handle cases of failing to split critical edges
Browse files Browse the repository at this point in the history
Fix PR41279 where critical edges to EHPad are not split.
The fix is to not instrument those critical edges. We used to be able to know
the size of counters right after MST is computed. With this, we have to
pre-collect the instrument BBs to know the size, and then instrument them.

Differential Revision: https://reviews.llvm.org/D62439

llvm-svn: 361882
  • Loading branch information
xur-llvm committed May 28, 2019
1 parent 5b32f60 commit e88173a
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 44 deletions.
100 changes: 56 additions & 44 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Expand Up @@ -573,6 +573,10 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
// The Minimum Spanning Tree of function CFG.
CFGMST<Edge, BBInfo> MST;

// Collect all the BBs that will be instrumented, and store them in
// InstrumentBBs.
void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);

// Give an edge, find the BB that will be instrumented.
// Return nullptr if there is no BB to be instrumented.
BasicBlock *getInstrBB(Edge *E);
Expand Down Expand Up @@ -629,16 +633,6 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
if (CreateGlobalVar)
FuncNameVar = createPGOFuncNameVar(F, FuncName);
}

// Return the number of profile counters needed for the function.
unsigned getNumCounters() {
unsigned NumCounters = 0;
for (auto &E : this->MST.AllEdges) {
if (!E->InMST && !E->Removed)
NumCounters++;
}
return NumCounters + SIVisitor.getNumOfSelectInsts();
}
};

} // end anonymous namespace
Expand Down Expand Up @@ -753,6 +747,24 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
}
}

// Collect all the BBs that will be instruments and return them in
// InstrumentBBs.
template <class Edge, class BBInfo>
void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
std::vector<BasicBlock *> &InstrumentBBs) {
// Use a worklist as we will update the vector during the iteration.
std::vector<Edge *> EdgeList;
EdgeList.reserve(MST.AllEdges.size());
for (auto &E : MST.AllEdges)
EdgeList.push_back(E.get());

for (auto &E : EdgeList) {
BasicBlock *InstrBB = getInstrBB(E);
if (InstrBB)
InstrumentBBs.push_back(InstrBB);
}
}

// Given a CFG E to be instrumented, find which BB to place the instrumented
// code. The function will split the critical edge if necessary.
template <class Edge, class BBInfo>
Expand Down Expand Up @@ -783,9 +795,18 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
<< " --> " << getBBInfo(DestBB).Index << "\n");
unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
assert(InstrBB && "Critical edge is not split");

if (!InstrBB) {
LLVM_DEBUG(
dbgs() << "Fail to split critical edge: not instrument this edge.\n");
return nullptr;
}
// Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
MST.addEdge(SrcBB, InstrBB, 0);
// Second one: Add new edge of InstrBB->DestBB.
Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
NewEdge1.InMST = true;
E->Removed = true;

return InstrBB;
}

Expand All @@ -801,15 +822,14 @@ static void instrumentOneFunc(

FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
BFI, IsCS);
unsigned NumCounters = FuncInfo.getNumCounters();
std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();

uint32_t I = 0;
Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
for (auto &E : FuncInfo.MST.AllEdges) {
BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
if (!InstrBB)
continue;

for (auto *InstrBB : InstrumentBBs) {
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
assert(Builder.GetInsertPoint() != InstrBB->end() &&
"Cannot get the Instrumentation point");
Expand Down Expand Up @@ -1039,39 +1059,31 @@ class PGOUseFunc {
// edges and the BB. Return false on error.
bool PGOUseFunc::setInstrumentedCounts(
const std::vector<uint64_t> &CountFromProfile) {

std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
// The number of counters here should match the number of counters
// in profile. Return if they mismatch.
if (FuncInfo.getNumCounters() != CountFromProfile.size()) {
if (NumCounters != CountFromProfile.size()) {
return false;
}
// Use a worklist as we will update the vector during the iteration.
std::vector<PGOUseEdge *> WorkList;
for (auto &E : FuncInfo.MST.AllEdges)
WorkList.push_back(E.get());

uint32_t I = 0;
for (auto &E : WorkList) {
BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
if (!InstrBB)
continue;
for (BasicBlock *InstrBB : InstrumentBBs) {
uint64_t CountValue = CountFromProfile[I++];
if (!E->Removed) {
getBBInfo(InstrBB).setBBInfoCount(CountValue);
E->setEdgeCount(CountValue);
continue;
UseBBInfo &Info = getBBInfo(InstrBB);
Info.setBBInfoCount(CountValue);
// If only one in-edge, the edge profile count should be the same as BB
// profile count.
if (Info.InEdges.size() == 1) {
Info.InEdges[0]->setEdgeCount(CountValue);
}
// If only one out-edge, the edge profile count should be the same as BB
// profile count.
if (Info.OutEdges.size() == 1) {
Info.OutEdges[0]->setEdgeCount(CountValue);
}

// Need to add two new edges.
BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
// Add new edge of SrcBB->InstrBB.
PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
NewEdge.setEdgeCount(CountValue);
// Add new edge of InstrBB->DestBB.
PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
NewEdge1.setEdgeCount(CountValue);
NewEdge1.InMST = true;
getBBInfo(InstrBB).setBBInfoCount(CountValue);
}
ProfileCountSize = CountFromProfile.size();
CountPosition = I;
Expand Down
67 changes: 67 additions & 0 deletions llvm/test/Transforms/PGOProfile/PR41279.ll
@@ -0,0 +1,67 @@
; Test that instrumentaiton works fine for the case of failing the split critical edges.
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN

declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64)
declare { i8*, i64 } @f0({ i8*, i64 }*)
declare i64 @f1()
declare void @invok2({ i8*, i64 }*, i8* noalias readonly align 1, i64)
declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64)
declare i32 @__CxxFrameHandler3(...)

define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
%3 = alloca i8, align 1
store i8 0, i8* %3, align 1
%4 = call i64 @f1()
%5 = icmp ult i64 %4, 32
br i1 %5, label %7, label %13

6:
cleanupret from %17 unwind to caller
; GEN: 6:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)

7:
store i8 1, i8* %3, align 1
%8 = call { i8*, i64 } @f0({ i8*, i64 }* %0)
%9 = extractvalue { i8*, i64 } %8, 0
%10 = extractvalue { i8*, i64 } %8, 1
invoke void @invok1({ i8*, i64 }* %1, { i8*, i64 }* %0, i64 1)
to label %11 unwind label %16
; GEN: 7:
; GEN-NOT: call void @llvm.instrprof.increment

11:
store i8 0, i8* %3, align 1
invoke void @invok2({ i8*, i64 }* %1, i8* noalias readonly align 1 %9, i64 %10)
to label %12 unwind label %16
; GEN: 11:
; GEN-NOT: call void @llvm.instrprof.increment

12:
store i8 0, i8* %3, align 1
br label %14
; GEN: 12:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)

13:
call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1)
br label %14
; GEN: 13:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)

14:
ret void

15:
store i8 0, i8* %3, align 1
br label %6
; GEN: 15:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)

16:
%17 = cleanuppad within none []
%18 = load i8, i8* %3, align 1
%19 = trunc i8 %18 to i1
br i1 %19, label %15, label %6
}

0 comments on commit e88173a

Please sign in to comment.