923 changes: 324 additions & 599 deletions llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1308,7 +1308,8 @@ void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {

// Used by INSTR_PROF_VTABLE_DATA MACRO
Constant *VTableAddr = getVTableAddrForProfData(GV);
const std::string PGOVTableName = getPGOName(*GV);
const std::string PGOVTableName =
getIRPGOObjectName(*GV, false /* InLTO */, nullptr /* PGONameMetadata */);
// Record the length of the vtable. This is needed since vtable pointers
// loaded from C++ objects might be from the middle of a vtable definition.
uint32_t VTableSizeVal =
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2194,7 +2194,9 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
// consistent names as annotated (in the form of md5hash(name)) in the
// value profiles.
if (!Types.empty())
createPGOVTableNameMetadata(G, getPGOName(G, false /* InLTO*/));
createPGOVTableNameMetadata(
G, getIRPGOObjectName(G, false /* InLTO */,
nullptr /* PGONameMetadata */));
}
}

Expand Down
228 changes: 127 additions & 101 deletions llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ static bool HasUseInNonElseBlock(Instruction &I, BasicBlock *ElseBlock,

// Given the following block,
// Sink the instructions that are used by function-based comparisons.
static void SinkVFuncOnlyInstructionsIfSafe(BasicBlock *SrcBlock,
static void SinkIndirectCallInstructions(BasicBlock *SrcBlock,
BasicBlock *DestBlock,
Instruction *VTableInstr,
const CallBase &CB) {
Expand Down Expand Up @@ -471,6 +471,87 @@ Value *getOrResult(std::vector<Value *> &ICmps, IRBuilder<> &Builder) {
return getOrResult(OrResults, Builder);
}

// From:
// bb:
// vptr = load
// func-addr = gep vptr
// funcptr = load
// res = tail call funcptr
// ret res
//
// To:
// bb:
// vptr = load
// minus = sub vptr, vtable
// cond = icmp minus, constant-offset
// br cond true if.then, false if.else
// if.then:
// %res1 = musttail call direct_callee
// ret %res1
// if.else:
// func-addr = gep vptr
// funcptr = load
// res2 = musttail call funcptr
// ret res2
CallBase &promoteIndirectTailCallWithVTableInfo(CallBase &CB,
Function *TargetFunction,
Instruction *VTableInstr,
Value *Cond,
MDNode *BranchWeights) {
BasicBlock *OrigBlock = CB.getParent();
Instruction *ThenTerm =
SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights);
BasicBlock *ThenBlock = ThenTerm->getParent();
BasicBlock *ElseBlock = CB.getParent();
ThenBlock->setName("if.then.direct_tail_call");
ElseBlock->setName("if.else.orig_indirect_call");
CallBase *NewInst = cast<CallBase>(CB.clone());
NewInst->insertBefore(ThenTerm);
NewInst->setCalledOperand(TargetFunction);
// right now it becomes
// bb:
// vptr = load
// func-addr = gep vptr <-- 0
// funcptr = load <-- 1
// minus = sub vptr, vtablevar
// cond = icmp minus, constant-offset
// br cond true, if.true, if.end
// if.true:
// res_clone = tail call funcptr
// br if.end
// if.end:
// res = tail call funcptr
// ret res

// Now sink instructions 0 and 1 if possible to `if.end` block.
SinkIndirectCallInstructions(OrigBlock, ElseBlock, VTableInstr, CB);

Value *NewRetVal = NewInst;
auto Next = CB.getNextNode();
if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) {
assert(BitCast->getOperand(0) == &CB &&
"bitcast following musttail call must use the call");
auto NewBitCast = BitCast->clone();
NewBitCast->replaceUsesOfWith(&CB, NewInst);
NewBitCast->insertBefore(ThenTerm);
NewRetVal = NewBitCast;
Next = BitCast->getNextNode();
}

// Place a clone of the return instruction after the new call site.
ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
assert(Ret && "musttail call must precede a ret with an optional bitcast");
auto NewRet = Ret->clone();
if (Ret->getReturnValue()) {
NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal);
}
NewRet->insertBefore(ThenTerm);
// A return instruction is terminating, so we don't need the terminating
// instruction just created.
ThenTerm->eraseFromParent();
return *NewInst;
}

// From
// bb:
// vptr = load
Expand All @@ -494,124 +575,66 @@ Value *getOrResult(std::vector<Value *> &ICmps, IRBuilder<> &Builder) {
// bb.merge:
// res = phi [res1, if.then] [res2, if.else]
CallBase &llvm::promoteIndirectCallWithVTableInfo(
CallBase &CB, Function *TargetFunction,
const SmallVector<VTableCandidate> &VTable2Candidate,
CallBase &CB, Function *TargetFunction, Instruction *VPtr,
const SmallVector<VTableCandidateInfo> &VTable2Candidate,
const std::vector<int> &VTableIndices,
const std::unordered_map<int, Value *> &VTableOffsetToValueMap,
uint64_t &SumPromotedVTableCount, MDNode *BranchWeights) {
SumPromotedVTableCount = 0;
// It might be a global variable, or alias

IRBuilder<> Builder(&CB);
CallBase *OrigIndirectCall = &CB;

// The VTableInstr that's being instrumented. It should remain the same across
// all candidates.
Instruction *VTableInstr = nullptr;

// The VTableInstr that's being instrumented.
Instruction *VTableInstr = VPtr;

// If a function candidate comes from multiple vtables, create one icmp for
// each vtable and OR the icmp results together.
//
// For example, given input IR
// vptr = load obj ... !prof !0
// func-addr = getelementptr vptr
// funcptr = load func-addr ... !prof !1
// call funcptr
//
// Output IR will be
// addr = sub vptr, offset
// res1 = icmp addr, vtable-addr1
// res2 = icmp addr, vtable-addr2
// res3 = icmp addr, vtable-addr3
// or1 = or res1, res2
// or2 = or or1, res3
// br or2 if.then, if.else
//
// if.then:
// call direct_callee

// if.else;
// func-addr = getelementptr vptr
// funcptr = load func-addr ...
// call funcptr
std::vector<Value *> ICmps;
for (auto Index : VTableIndices) {
SumPromotedVTableCount += VTable2Candidate[Index].VTableValCount;
const auto &VTableCandidateInfo = VTable2Candidate[Index];
if (VTableInstr == nullptr) {
VTableInstr = VTableCandidateInfo.VTableInstr;
}
assert(VTableCandidateInfo.VTableInstr == VTableInstr &&
"VTableInstr should remain the same across all candidate");
const auto &VTableInfoInfo = VTable2Candidate[Index];

Value *VTableVar = Builder.CreatePtrToInt(
VTableCandidateInfo.VTableVariable, Builder.getInt64Ty());
assert(
VTableOffsetToValueMap.find(VTableCandidateInfo.AddressPointOffset) !=
VTableOffsetToValueMap.end() &&
"Didn't find a value for offset");
Value *VTableVar = Builder.CreatePtrToInt(VTableInfoInfo.VTableVariable,
Builder.getInt64Ty());
assert(VTableOffsetToValueMap.find(VTableInfoInfo.AddressPointOffset) !=
VTableOffsetToValueMap.end() &&
"Didn't find a value for offset");

Value *OffsetVar =
VTableOffsetToValueMap.at(VTableCandidateInfo.AddressPointOffset);
VTableOffsetToValueMap.at(VTableInfoInfo.AddressPointOffset);
Value *ICmp = Builder.CreateICmpEQ(VTableVar, OffsetVar);
ICmps.push_back(ICmp);
}

Value *Cond = getOrResult(ICmps, Builder);

// FIXME:
// This should be optimized to the comparison with a newly-created (alias of?)
// vtable variable, and only frequently accessed vtables are created.

if (CB.isMustTailCall()) {
// From:
// bb:
// vptr = load
// func-addr = gep vptr
// funcptr = load
// res = tail call funcptr
// ret res
//
// To:
// bb:
// vptr = load
// minus = sub vptr, vtable
// cond = icmp minus, constant-offset
// br cond true if.then, false if.else
// if.then:
// %res1 = musttail call direct_callee
// ret %res1
// if.else:
// func-addr = gep vptr
// funcptr = load
// res2 = musttail call funcptr
// ret res2
BasicBlock *OrigBlock = CB.getParent();
Instruction *ThenTerm =
SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights);
BasicBlock *ThenBlock = ThenTerm->getParent();
BasicBlock *ElseBlock = CB.getParent();
ThenBlock->setName("if.then.direct_call");
CallBase *NewInst = cast<CallBase>(CB.clone());
NewInst->insertBefore(ThenTerm);
NewInst->setCalledOperand(TargetFunction);
// right now it becomes
// bb:
// vptr = load
// func-addr = gep vptr <-- 0
// funcptr = load <-- 1
// minus = sub vptr, vtablevar
// cond = icmp minus, constant-offset
// br cond true, if.true, if.end
// if.true:
// res_clone = tail call funcptr
// br if.end
// if.end:
// res = tail call funcptr
// ret res

// Now sink instructions 0 and 1 if possible to `if.end` block.
SinkVFuncOnlyInstructionsIfSafe(OrigBlock, ElseBlock, VTableInstr, CB);

Value *NewRetVal = NewInst;
auto Next = CB.getNextNode();
if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) {
assert(BitCast->getOperand(0) == &CB &&
"bitcast following musttail call must use the call");
auto NewBitCast = BitCast->clone();
NewBitCast->replaceUsesOfWith(&CB, NewInst);
NewBitCast->insertBefore(ThenTerm);
NewRetVal = NewBitCast;
Next = BitCast->getNextNode();
}

// Place a clone of the return instruction after the new call site.
ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
assert(Ret && "musttail call must precede a ret with an optional bitcast");
auto NewRet = Ret->clone();
if (Ret->getReturnValue()) {
NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal);
}
NewRet->insertBefore(ThenTerm);
// A return instruction is terminating, so we don't need the terminating
// instruction just created.
ThenTerm->eraseFromParent();
return *NewInst;
} // end if for musttail call.
if (CB.isMustTailCall())
return promoteIndirectTailCallWithVTableInfo(
CB, TargetFunction, VTableInstr, Cond, BranchWeights);

// create if-then-else structure. The original instruction is moved into else,
// and vfunc-only instructions are sinked into else.
Expand All @@ -622,16 +645,19 @@ CallBase &llvm::promoteIndirectCallWithVTableInfo(
BasicBlock *OrigBlock = CB.getParent();
SplitBlockAndInsertIfThenElse(Cond, &CB, &ThenTerm, &ElseTerm, BranchWeights);
BasicBlock *ThenBlock = ThenTerm->getParent();
ThenBlock->setName("if.then.direct_call");
BasicBlock *ElseBlock = ElseTerm->getParent();
ElseBlock->setName("if.else.orig_indirect");
BasicBlock *MergeBlock = OrigIndirectCall->getParent();
MergeBlock->setName("if.end.icp");

CallBase *NewInst = cast<CallBase>(OrigIndirectCall->clone());
OrigIndirectCall->moveBefore(ElseTerm);
NewInst->insertBefore(ThenTerm);
// Rewrite NewInst to use direct callee to sink the vfunc-only instructions.
NewInst->setCalledOperand(TargetFunction);
// Now proceed to sink those instructions.
SinkVFuncOnlyInstructionsIfSafe(OrigBlock, ElseBlock, VTableInstr, CB);
SinkIndirectCallInstructions(OrigBlock, ElseBlock, VTableInstr, CB);

if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigIndirectCall)) {
auto *NewInvoke = cast<InvokeInst>(NewInst);
Expand Down
154 changes: 72 additions & 82 deletions llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll

Large diffs are not rendered by default.

258 changes: 258 additions & 0 deletions llvm/test/Transforms/PGOProfile/icp_vtable_invoke_test.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-FUNC
; RUN: opt < %s -passes=pgo-icall-prom -enable-vtable-cmp -icp-vtable-cmp-inst-threshold=4 -icp-vtable-cmp-inst-last-candidate-threshold=4 -icp-vtable-cmp-total-inst-threshold=4 -S | FileCheck %s --check-prefix=ICALL-VTABLE

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%class.Error = type { i8 }

@_ZTI5Error = constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr null, i64 2), ptr null }
@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1
@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3

@.str = private unnamed_addr constant [15 x i8] c"out of tickets\00"

define i32 @_Z4testP4Base(ptr %b) personality ptr @__gxx_personality_v0 {
; ICALL-FUNC-LABEL: define i32 @_Z4testP4Base(
; ICALL-FUNC-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 {
; ICALL-FUNC-NEXT: entry:
; ICALL-FUNC-NEXT: [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8
; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN7Derived10get_ticketEv
; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4:![0-9]+]]
; ICALL-FUNC: if.true.direct_targ:
; ICALL-FUNC-NEXT: [[TMP3:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr [[B]])
; ICALL-FUNC-NEXT: to label [[IF_END_ICP:%.*]] unwind label [[LPAD:%.*]]
; ICALL-FUNC: if.false.orig_indirect:
; ICALL-FUNC-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base10get_ticketEv
; ICALL-FUNC-NEXT: br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF5:![0-9]+]]
; ICALL-FUNC: if.true.direct_targ1:
; ICALL-FUNC-NEXT: [[TMP5:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr [[B]])
; ICALL-FUNC-NEXT: to label [[IF_END_ICP3:%.*]] unwind label [[LPAD]]
; ICALL-FUNC: if.false.orig_indirect2:
; ICALL-FUNC-NEXT: [[CALL:%.*]] = invoke i32 [[TMP1]](ptr [[B]])
; ICALL-FUNC-NEXT: to label [[IF_END_ICP3]] unwind label [[LPAD]]
; ICALL-FUNC: if.end.icp3:
; ICALL-FUNC-NEXT: [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_TARG1]] ]
; ICALL-FUNC-NEXT: br label [[IF_END_ICP]]
; ICALL-FUNC: if.end.icp:
; ICALL-FUNC-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[IF_END_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
; ICALL-FUNC-NEXT: br label [[TRY_CONT:%.*]]
; ICALL-FUNC: lpad:
; ICALL-FUNC-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 }
; ICALL-FUNC-NEXT: cleanup
; ICALL-FUNC-NEXT: catch ptr @_ZTI5Error
; ICALL-FUNC-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP8]], 1
; ICALL-FUNC-NEXT: [[TMP10:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr nonnull @_ZTI5Error)
; ICALL-FUNC-NEXT: [[MATCHES:%.*]] = icmp eq i32 [[TMP9]], [[TMP10]]
; ICALL-FUNC-NEXT: br i1 [[MATCHES]], label [[CATCH:%.*]], label [[EHCLEANUP:%.*]]
; ICALL-FUNC: catch:
; ICALL-FUNC-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i32 } [[TMP8]], 0
; ICALL-FUNC-NEXT: [[CALL3:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr nonnull align 1 dereferenceable(1) [[E]])
; ICALL-FUNC-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1:%.*]]
; ICALL-FUNC: invoke.cont2:
; ICALL-FUNC-NEXT: call void @__cxa_end_catch()
; ICALL-FUNC-NEXT: br label [[TRY_CONT]]
; ICALL-FUNC: try.cont:
; ICALL-FUNC-NEXT: [[RET_0:%.*]] = phi i32 [ [[CALL3]], [[INVOKE_CONT2]] ], [ [[TMP7]], [[IF_END_ICP]] ]
; ICALL-FUNC-NEXT: ret i32 [[RET_0]]
; ICALL-FUNC: lpad1:
; ICALL-FUNC-NEXT: [[TMP12:%.*]] = landingpad { ptr, i32 }
; ICALL-FUNC-NEXT: cleanup
; ICALL-FUNC-NEXT: invoke void @__cxa_end_catch()
; ICALL-FUNC-NEXT: to label [[INVOKE_CONT4:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
; ICALL-FUNC: invoke.cont4:
; ICALL-FUNC-NEXT: br label [[EHCLEANUP]]
; ICALL-FUNC: ehcleanup:
; ICALL-FUNC-NEXT: [[LPAD_VAL7_MERGED:%.*]] = phi { ptr, i32 } [ [[TMP12]], [[INVOKE_CONT4]] ], [ [[TMP8]], [[LPAD]] ]
; ICALL-FUNC-NEXT: resume { ptr, i32 } [[LPAD_VAL7_MERGED]]
; ICALL-FUNC: terminate.lpad:
; ICALL-FUNC-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 }
; ICALL-FUNC-NEXT: catch ptr null
; ICALL-FUNC-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0
; ICALL-FUNC-NEXT: unreachable
;
; ICALL-VTABLE-LABEL: define i32 @_Z4testP4Base(
; ICALL-VTABLE-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 {
; ICALL-VTABLE-NEXT: entry:
; ICALL-VTABLE-NEXT: [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8
; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
; ICALL-VTABLE-NEXT: [[OFFSET_VAR:%.*]] = sub nuw i64 [[TMP0]], 16
; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV7Derived to i64), [[OFFSET_VAR]]
; ICALL-VTABLE-NEXT: br i1 [[TMP2]], label [[IF_THEN_DIRECT_CALL:%.*]], label [[IF_ELSE_ORIG_INDIRECT:%.*]], !prof [[PROF4:![0-9]+]]
; ICALL-VTABLE: if.then.direct_call:
; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr [[B]])
; ICALL-VTABLE-NEXT: to label [[IF_END_ICP:%.*]] unwind label [[LPAD:%.*]]
; ICALL-VTABLE: if.else.orig_indirect:
; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV4Base to i64), [[OFFSET_VAR]]
; ICALL-VTABLE-NEXT: br i1 [[TMP4]], label [[IF_THEN_DIRECT_CALL1:%.*]], label [[IF_ELSE_ORIG_INDIRECT2:%.*]], !prof [[PROF5:![0-9]+]]
; ICALL-VTABLE: if.then.direct_call1:
; ICALL-VTABLE-NEXT: [[TMP5:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr [[B]])
; ICALL-VTABLE-NEXT: to label [[IF_END_ICP3:%.*]] unwind label [[LPAD]]
; ICALL-VTABLE: if.else.orig_indirect2:
; ICALL-VTABLE-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VTABLE]], align 8
; ICALL-VTABLE-NEXT: [[CALL:%.*]] = invoke i32 [[TMP6]](ptr [[B]])
; ICALL-VTABLE-NEXT: to label [[IF_END_ICP3]] unwind label [[LPAD]]
; ICALL-VTABLE: if.end.icp3:
; ICALL-VTABLE-NEXT: [[TMP7:%.*]] = phi i32 [ [[CALL]], [[IF_ELSE_ORIG_INDIRECT2]] ], [ [[TMP5]], [[IF_THEN_DIRECT_CALL1]] ]
; ICALL-VTABLE-NEXT: br label [[IF_END_ICP]]
; ICALL-VTABLE: if.end.icp:
; ICALL-VTABLE-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP7]], [[IF_END_ICP3]] ], [ [[TMP3]], [[IF_THEN_DIRECT_CALL]] ]
; ICALL-VTABLE-NEXT: br label [[TRY_CONT:%.*]]
; ICALL-VTABLE: lpad:
; ICALL-VTABLE-NEXT: [[TMP9:%.*]] = landingpad { ptr, i32 }
; ICALL-VTABLE-NEXT: cleanup
; ICALL-VTABLE-NEXT: catch ptr @_ZTI5Error
; ICALL-VTABLE-NEXT: [[TMP10:%.*]] = extractvalue { ptr, i32 } [[TMP9]], 1
; ICALL-VTABLE-NEXT: [[TMP11:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr nonnull @_ZTI5Error)
; ICALL-VTABLE-NEXT: [[MATCHES:%.*]] = icmp eq i32 [[TMP10]], [[TMP11]]
; ICALL-VTABLE-NEXT: br i1 [[MATCHES]], label [[CATCH:%.*]], label [[EHCLEANUP:%.*]]
; ICALL-VTABLE: catch:
; ICALL-VTABLE-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP9]], 0
; ICALL-VTABLE-NEXT: [[CALL3:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr nonnull align 1 dereferenceable(1) [[E]])
; ICALL-VTABLE-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1:%.*]]
; ICALL-VTABLE: invoke.cont2:
; ICALL-VTABLE-NEXT: call void @__cxa_end_catch()
; ICALL-VTABLE-NEXT: br label [[TRY_CONT]]
; ICALL-VTABLE: try.cont:
; ICALL-VTABLE-NEXT: [[RET_0:%.*]] = phi i32 [ [[CALL3]], [[INVOKE_CONT2]] ], [ [[TMP8]], [[IF_END_ICP]] ]
; ICALL-VTABLE-NEXT: ret i32 [[RET_0]]
; ICALL-VTABLE: lpad1:
; ICALL-VTABLE-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 }
; ICALL-VTABLE-NEXT: cleanup
; ICALL-VTABLE-NEXT: invoke void @__cxa_end_catch()
; ICALL-VTABLE-NEXT: to label [[INVOKE_CONT4:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
; ICALL-VTABLE: invoke.cont4:
; ICALL-VTABLE-NEXT: br label [[EHCLEANUP]]
; ICALL-VTABLE: ehcleanup:
; ICALL-VTABLE-NEXT: [[LPAD_VAL7_MERGED:%.*]] = phi { ptr, i32 } [ [[TMP13]], [[INVOKE_CONT4]] ], [ [[TMP9]], [[LPAD]] ]
; ICALL-VTABLE-NEXT: resume { ptr, i32 } [[LPAD_VAL7_MERGED]]
; ICALL-VTABLE: terminate.lpad:
; ICALL-VTABLE-NEXT: [[TMP14:%.*]] = landingpad { ptr, i32 }
; ICALL-VTABLE-NEXT: catch ptr null
; ICALL-VTABLE-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0
; ICALL-VTABLE-NEXT: unreachable
;
entry:
%e = alloca %class.Error
%vtable = load ptr, ptr %b, !prof !4
%0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
tail call void @llvm.assume(i1 %0)
%1 = load ptr, ptr %vtable
%call = invoke i32 %1(ptr %b)
to label %try.cont unwind label %lpad, !prof !5

lpad:
%2 = landingpad { ptr, i32 }
cleanup
catch ptr @_ZTI5Error
%3 = extractvalue { ptr, i32 } %2, 1
%4 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @_ZTI5Error)
%matches = icmp eq i32 %3, %4
br i1 %matches, label %catch, label %ehcleanup

catch:
%5 = extractvalue { ptr, i32 } %2, 0

%call3 = invoke i32 @_ZN5Error10error_codeEv(ptr nonnull align 1 dereferenceable(1) %e)
to label %invoke.cont2 unwind label %lpad1

invoke.cont2:
call void @__cxa_end_catch()
br label %try.cont

try.cont:
%ret.0 = phi i32 [ %call3, %invoke.cont2 ], [ %call, %entry ]
ret i32 %ret.0

lpad1:
%6 = landingpad { ptr, i32 }
cleanup
invoke void @__cxa_end_catch()
to label %invoke.cont4 unwind label %terminate.lpad

invoke.cont4:
br label %ehcleanup

ehcleanup:
%lpad.val7.merged = phi { ptr, i32 } [ %6, %invoke.cont4 ], [ %2, %lpad ]
resume { ptr, i32 } %lpad.val7.merged

terminate.lpad:
%7 = landingpad { ptr, i32 }
catch ptr null
%8 = extractvalue { ptr, i32 } %7, 0
unreachable
}

declare i1 @llvm.type.test(ptr, metadata)
declare void @llvm.assume(i1 noundef)
declare i32 @__gxx_personality_v0(...)
declare i32 @llvm.eh.typeid.for(ptr)

declare i32 @_ZN5Error10error_codeEv(ptr nonnull align 1 dereferenceable(1))

declare void @__cxa_end_catch()

define i32 @_ZN4Base10get_ticketEv(ptr %this) align 2 personality ptr @__gxx_personality_v0 {
entry:
%call = tail call i32 @_Z13get_ticket_idv()
%cmp.not = icmp eq i32 %call, -1
br i1 %cmp.not, label %if.end, label %if.then

if.then:
ret i32 %call

if.end:
%exception = tail call ptr @__cxa_allocate_exception(i64 1)
invoke void @_ZN5ErrorC1EPKci(ptr nonnull align 1 dereferenceable(1) %exception, ptr nonnull @.str, i32 1)
to label %invoke.cont unwind label %lpad

invoke.cont:
unreachable

lpad:
%0 = landingpad { ptr, i32 }
cleanup
resume { ptr, i32 } %0
}

define i32 @_ZN7Derived10get_ticketEv(ptr %this) align 2 personality ptr @__gxx_personality_v0 {
entry:
%call = tail call i32 @_Z13get_ticket_idv()
%cmp.not = icmp eq i32 %call, -1
br i1 %cmp.not, label %if.end, label %if.then

if.then:
ret i32 %call

if.end:
%exception = tail call ptr @__cxa_allocate_exception(i64 1)
invoke void @_ZN5ErrorC1EPKci(ptr nonnull align 1 dereferenceable(1) %exception, ptr nonnull @.str, i32 2)
to label %invoke.cont unwind label %lpad

invoke.cont:
unreachable

lpad:
%0 = landingpad { ptr, i32 }
cleanup
resume { ptr, i32 } %0
}

declare i32 @_Z13get_ticket_idv()
declare ptr @__cxa_allocate_exception(i64)
declare void @_ZN5ErrorC1EPKci(ptr nonnull align 1 dereferenceable(1), ptr, i32)

!0 = !{i64 16, !"_ZTS4Base"}
!1 = !{i64 16, !"_ZTSM4BaseFivE.virtual"}
!2 = !{i64 16, !"_ZTS7Derived"}
!3 = !{i64 16, !"_ZTSM7DerivedFivE.virtual"}
!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700}
!5 = !{!"VP", i32 0, i64 1600, i64 14811317294552474744, i64 900, i64 9261744921105590125, i64 700}
95 changes: 95 additions & 0 deletions llvm/test/Transforms/PGOProfile/icp_vtable_offsets.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
; RUN: opt < %s -passes=pgo-icall-prom -enable-vtable-cmp -S | FileCheck %s --check-prefix=ICALL-PROM
; RUN: opt < %s -passes=pgo-icall-prom -enable-vtable-cmp -S -icp-vtable-cmp-inst-threshold=5 -icp-vtable-cmp-inst-last-candidate-threshold=5 -icp-vtable-cmp-total-inst-threshold=5 | FileCheck %s --check-prefix=ICALL-VTABLE

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@_ZTV5Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }, !type !0
@_ZTV8Derived1 = constant { [4 x ptr], [3 x ptr] } { [4 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev], [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base25func2Ev] }, !type !1, !type !2, !type !3
@_ZTV5Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base25func2Ev] }, !type !2
@_ZTV8Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base35func3Ev], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base25func2Ev], [4 x ptr] [ptr inttoptr (i64 -16 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }, !type !4, !type !5, !type !6, !type !7
@_ZTV5Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base35func3Ev] }, !type !6

; Indirect call has one function candidate. The vtable profiles show the function
; might come from three vtables, and these three vtables have two different offsets.
define i32 @test_one_function_two_offsets_three_vtables(ptr %d) {
; ICALL-PROM-LABEL: define i32 @test_one_function_two_offsets_three_vtables(
; ICALL-PROM-SAME: ptr [[D:%.*]]) {
; ICALL-PROM-NEXT: entry:
; ICALL-PROM-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
; ICALL-PROM-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS5Base1")
; ICALL-PROM-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
; ICALL-PROM-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-PROM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-PROM-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN5Base15func1Ev
; ICALL-PROM-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF7:![0-9]+]]
; ICALL-PROM: if.true.direct_targ:
; ICALL-PROM-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN5Base15func1Ev(ptr [[D]])
; ICALL-PROM-NEXT: br label [[IF_END_ICP:%.*]]
; ICALL-PROM: if.false.orig_indirect:
; ICALL-PROM-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
; ICALL-PROM-NEXT: br label [[IF_END_ICP]]
; ICALL-PROM: if.end.icp:
; ICALL-PROM-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
; ICALL-PROM-NEXT: ret i32 [[TMP4]]
;
; ICALL-VTABLE-LABEL: define i32 @test_one_function_two_offsets_three_vtables(
; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
; ICALL-VTABLE-NEXT: entry:
; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
; ICALL-VTABLE-NEXT: [[OFFSET_VAR:%.*]] = sub nuw i64 [[TMP0]], 16
; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS5Base1")
; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
; ICALL-VTABLE-NEXT: [[OFFSET_VAR1:%.*]] = sub nuw i64 [[TMP0]], 64
; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived1 to i64), [[OFFSET_VAR]]
; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived2 to i64), [[OFFSET_VAR1]]
; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV5Base1 to i64), [[OFFSET_VAR]]
; ICALL-VTABLE-NEXT: [[ICMP_OR:%.*]] = or i1 [[TMP2]], [[TMP3]]
; ICALL-VTABLE-NEXT: [[ICMP_OR2:%.*]] = or i1 [[ICMP_OR]], [[TMP4]]
; ICALL-VTABLE-NEXT: br i1 [[ICMP_OR2]], label [[IF_THEN_DIRECT_CALL:%.*]], label [[IF_ELSE_ORIG_INDIRECT:%.*]], !prof [[PROF7:![0-9]+]]
; ICALL-VTABLE: if.then.direct_call:
; ICALL-VTABLE-NEXT: [[TMP5:%.*]] = tail call i32 @_ZN5Base15func1Ev(ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[IF_END_ICP:%.*]]
; ICALL-VTABLE: if.else.orig_indirect:
; ICALL-VTABLE-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-VTABLE-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-VTABLE-NEXT: [[CALL:%.*]] = tail call i32 [[TMP6]](ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[IF_END_ICP]]
; ICALL-VTABLE: if.end.icp:
; ICALL-VTABLE-NEXT: [[TMP7:%.*]] = phi i32 [ [[CALL]], [[IF_ELSE_ORIG_INDIRECT]] ], [ [[TMP5]], [[IF_THEN_DIRECT_CALL]] ]
; ICALL-VTABLE-NEXT: ret i32 [[TMP7]]
;
entry:
%vtable = load ptr, ptr %d, !prof !8
%0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS5Base1")
tail call void @llvm.assume(i1 %0)
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%1 = load ptr, ptr %vfn
%call = tail call i32 %1(ptr %d), !prof !9
ret i32 %call
}

define i32 @_ZN5Base15func1Ev(ptr %this) {
entry:
ret i32 2
}


declare i1 @llvm.type.test(ptr, metadata)
declare void @llvm.assume(i1)
declare i32 @_ZN5Base25func2Ev(ptr)
declare i32 @_ZN5Base15func0Ev(ptr)
declare void @_ZN5Base35func3Ev(ptr)

!0 = !{i64 16, !"_ZTS5Base1"}
!1 = !{i64 16, !"_ZTS5Base1"}
!2 = !{i64 48, !"_ZTS5Base2"}
!3 = !{i64 16, !"_ZTS8Derived1"}
!4 = !{i64 64, !"_ZTS5Base1"}
!5 = !{i64 40, !"_ZTS5Base2"}
!6 = !{i64 16, !"_ZTS5Base3"}
!7 = !{i64 16, !"_ZTS8Derived2"}
!8 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 800, i64 5035968517245772950, i64 500, i64 3215870116411581797, i64 300}
!9 = !{!"VP", i32 0, i64 1600, i64 6804820478065511155, i64 1600}
107 changes: 107 additions & 0 deletions llvm/test/Transforms/PGOProfile/icp_vtable_offsets2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
; RUN: opt < %s -passes=pgo-icall-prom -enable-vtable-cmp -icp-vtable-cmp-inst-threshold=0 -icp-vtable-cmp-inst-last-candidate-threshold=1 -icp-vtable-cmp-total-inst-threshold=1 -S | FileCheck %s --check-prefix=ICALL-VTABLE-PROM

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@_ZTV5Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei, ptr @_ZN5Base15func2Ev] }, !type !0
@_ZTV8Derived1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN8Derived15func2Ev] }, !type !0, !type !1
@_ZTV8Derived2 = constant { [4 x ptr], [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN5Base25func3Ev, ptr @_ZN8Derived25func2Ev], [4 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base15func1Ei, ptr @_ZThn8_N8Derived25func2Ev] }, !type !2, !type !3, !type !4
@_ZTV5Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base25func3Ev] }, !type !3

define i32 @_Z4funcP5Base1(ptr %d) {
; ICALL-PROM-LABEL: define i32 @_Z4funcP5Base1(
; ICALL-PROM-SAME: ptr [[D:%.*]]) {
; ICALL-PROM-NEXT: entry:
; ICALL-PROM-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
; ICALL-PROM-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS5Base1")
; ICALL-PROM-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
; ICALL-PROM-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-PROM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-PROM-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN8Derived15func2Ev
; ICALL-PROM-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5:![0-9]+]]
; ICALL-PROM: if.true.direct_targ:
; ICALL-PROM-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN8Derived15func2Ev(ptr [[D]])
; ICALL-PROM-NEXT: br label [[IF_END_ICP:%.*]]
; ICALL-PROM: if.false.orig_indirect:
; ICALL-PROM-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @_ZThn8_N8Derived25func2Ev
; ICALL-PROM-NEXT: br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF6:![0-9]+]]
; ICALL-PROM: if.true.direct_targ1:
; ICALL-PROM-NEXT: [[TMP5:%.*]] = tail call i32 @_ZThn8_N8Derived25func2Ev(ptr [[D]])
; ICALL-PROM-NEXT: br label [[IF_END_ICP3:%.*]]
; ICALL-PROM: if.false.orig_indirect2:
; ICALL-PROM-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
; ICALL-PROM-NEXT: br label [[IF_END_ICP3]]
; ICALL-PROM: if.end.icp3:
; ICALL-PROM-NEXT: [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_TARG1]] ]
; ICALL-PROM-NEXT: br label [[IF_END_ICP]]
; ICALL-PROM: if.end.icp:
; ICALL-PROM-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[IF_END_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
; ICALL-PROM-NEXT: ret i32 [[TMP7]]
;
; ICALL-VTABLE-PROM-LABEL: define i32 @_Z4funcP5Base1(
; ICALL-VTABLE-PROM-SAME: ptr [[D:%.*]]) {
; ICALL-VTABLE-PROM-NEXT: entry:
; ICALL-VTABLE-PROM-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
; ICALL-VTABLE-PROM-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
; ICALL-VTABLE-PROM-NEXT: [[OFFSET_VAR:%.*]] = sub nuw i64 [[TMP0]], 16
; ICALL-VTABLE-PROM-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS5Base1")
; ICALL-VTABLE-PROM-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
; ICALL-VTABLE-PROM-NEXT: [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived1 to i64), [[OFFSET_VAR]]
; ICALL-VTABLE-PROM-NEXT: br i1 [[TMP2]], label [[IF_THEN_DIRECT_CALL:%.*]], label [[IF_ELSE_ORIG_INDIRECT:%.*]], !prof [[PROF5:![0-9]+]]
; ICALL-VTABLE-PROM: if.then.direct_call:
; ICALL-VTABLE-PROM-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN8Derived15func2Ev(ptr [[D]])
; ICALL-VTABLE-PROM-NEXT: br label [[IF_END_ICP:%.*]]
; ICALL-VTABLE-PROM: if.else.orig_indirect:
; ICALL-VTABLE-PROM-NEXT: [[OFFSET_VAR1:%.*]] = sub nuw i64 [[TMP0]], 48
; ICALL-VTABLE-PROM-NEXT: [[TMP4:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived2 to i64), [[OFFSET_VAR1]]
; ICALL-VTABLE-PROM-NEXT: br i1 [[TMP4]], label [[IF_THEN_DIRECT_CALL2:%.*]], label [[IF_ELSE_ORIG_INDIRECT3:%.*]], !prof [[PROF6:![0-9]+]]
; ICALL-VTABLE-PROM: if.then.direct_call2:
; ICALL-VTABLE-PROM-NEXT: [[TMP5:%.*]] = tail call i32 @_ZThn8_N8Derived25func2Ev(ptr [[D]])
; ICALL-VTABLE-PROM-NEXT: br label [[IF_END_ICP4:%.*]]
; ICALL-VTABLE-PROM: if.else.orig_indirect3:
; ICALL-VTABLE-PROM-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-VTABLE-PROM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-VTABLE-PROM-NEXT: [[CALL:%.*]] = tail call i32 [[TMP6]](ptr [[D]])
; ICALL-VTABLE-PROM-NEXT: br label [[IF_END_ICP4]]
; ICALL-VTABLE-PROM: if.end.icp4:
; ICALL-VTABLE-PROM-NEXT: [[TMP7:%.*]] = phi i32 [ [[CALL]], [[IF_ELSE_ORIG_INDIRECT3]] ], [ [[TMP5]], [[IF_THEN_DIRECT_CALL2]] ]
; ICALL-VTABLE-PROM-NEXT: br label [[IF_END_ICP]]
; ICALL-VTABLE-PROM: if.end.icp:
; ICALL-VTABLE-PROM-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP7]], [[IF_END_ICP4]] ], [ [[TMP3]], [[IF_THEN_DIRECT_CALL]] ]
; ICALL-VTABLE-PROM-NEXT: ret i32 [[TMP8]]
;
entry:
%vtable = load ptr, ptr %d, !prof !5
%0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS5Base1")
tail call void @llvm.assume(i1 %0)
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%1 = load ptr, ptr %vfn, align 8
%call = tail call i32 %1(ptr %d), !prof !6
ret i32 %call
}

declare i1 @llvm.type.test(ptr, metadata)

declare void @llvm.assume(i1 noundef)
declare i32 @_ZN8Derived15func1Ei(ptr, i32)
declare i32 @_ZN5Base15func1Ei(ptr, i32)
declare i32 @_ZN5Base15func2Ev(ptr)
declare i32 @_ZN8Derived25func2Ev(ptr)
declare i32 @_ZN5Base25func3Ev(ptr)

define i32 @_ZThn8_N8Derived25func2Ev(ptr %this) {
ret i32 1
}

define i32 @_ZN8Derived15func2Ev(ptr %this) {
ret i32 2
}

!0 = !{i64 16, !"_ZTS5Base1"}
!1 = !{i64 16, !"_ZTS8Derived1"}
!2 = !{i64 48, !"_ZTS5Base1"}
!3 = !{i64 16, !"_ZTS5Base2"}
!4 = !{i64 16, !"_ZTS8Derived2"}
!5 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 800, i64 5035968517245772950, i64 800}
!6 = !{!"VP", i32 0, i64 1600, i64 8283424862230071372, i64 800, i64 -7571493466221013720, i64 800}
88 changes: 88 additions & 0 deletions llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
; RUN: opt < %s -passes=pgo-icall-prom -pass-remarks=pgo-icall-prom -S 2>&1 | FileCheck %s --check-prefixes=REMARK,ICALL-FUNC
; RUN: opt < %s -passes=pgo-icall-prom -pass-remarks=pgo-icall-prom -enable-vtable-cmp -icp-vtable-cmp-inst-threshold=4 -icp-vtable-cmp-inst-last-candidate-threshold=4 -icp-vtable-cmp-total-inst-threshold=4 -S 2>&1 | FileCheck %s --check-prefixes=REMARK,ICALL-VTABLE

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; REMARK: Promote indirect call to _ZN7Derived5func1Eii with count 900 out of 1600
; REMARK: Promote indirect call to _ZN4Base5func1Eii with count 700 out of 700

@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, align 8, !type !0, !type !1, !type !2, !type !3
@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, align 8, !type !0, !type !1

define i32 @test_tail_call(ptr %ptr, i32 %a, i32 %b) {
; ICALL-FUNC-LABEL: define i32 @test_tail_call(
; ICALL-FUNC-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
; ICALL-FUNC-NEXT: entry:
; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[PTR]], align 8
; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN7Derived5func1Eii
; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[TMP4:%.*]], !prof [[PROF4:![0-9]+]]
; ICALL-FUNC: if.true.direct_targ:
; ICALL-FUNC-NEXT: [[TMP3:%.*]] = musttail call i32 @_ZN7Derived5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
; ICALL-FUNC-NEXT: ret i32 [[TMP3]]
; ICALL-FUNC: 4:
; ICALL-FUNC-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func1Eii
; ICALL-FUNC-NEXT: br i1 [[TMP5]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[TMP7:%.*]], !prof [[PROF5:![0-9]+]]
; ICALL-FUNC: if.true.direct_targ1:
; ICALL-FUNC-NEXT: [[TMP6:%.*]] = musttail call i32 @_ZN4Base5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
; ICALL-FUNC-NEXT: ret i32 [[TMP6]]
; ICALL-FUNC: 7:
; ICALL-FUNC-NEXT: [[CALL:%.*]] = musttail call i32 [[TMP1]](ptr [[PTR]], i32 [[A]], i32 [[B]])
; ICALL-FUNC-NEXT: ret i32 [[CALL]]
;
; ICALL-VTABLE-LABEL: define i32 @test_tail_call(
; ICALL-VTABLE-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
; ICALL-VTABLE-NEXT: entry:
; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[PTR]], align 8
; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
; ICALL-VTABLE-NEXT: [[OFFSET_VAR:%.*]] = sub nuw i64 [[TMP0]], 16
; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV7Derived to i64), [[OFFSET_VAR]]
; ICALL-VTABLE-NEXT: br i1 [[TMP2]], label [[IF_THEN_DIRECT_TAIL_CALL:%.*]], label [[IF_ELSE_ORIG_INDIRECT_CALL:%.*]], !prof [[PROF4:![0-9]+]]
; ICALL-VTABLE: if.then.direct_tail_call:
; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = musttail call i32 @_ZN7Derived5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
; ICALL-VTABLE-NEXT: ret i32 [[TMP3]]
; ICALL-VTABLE: if.else.orig_indirect_call:
; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV4Base to i64), [[OFFSET_VAR]]
; ICALL-VTABLE-NEXT: br i1 [[TMP4]], label [[IF_THEN_DIRECT_TAIL_CALL1:%.*]], label [[IF_ELSE_ORIG_INDIRECT_CALL2:%.*]], !prof [[PROF5:![0-9]+]]
; ICALL-VTABLE: if.then.direct_tail_call1:
; ICALL-VTABLE-NEXT: [[TMP5:%.*]] = musttail call i32 @_ZN4Base5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
; ICALL-VTABLE-NEXT: ret i32 [[TMP5]]
; ICALL-VTABLE: if.else.orig_indirect_call2:
; ICALL-VTABLE-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VTABLE]], align 8
; ICALL-VTABLE-NEXT: [[CALL:%.*]] = musttail call i32 [[TMP6]](ptr [[PTR]], i32 [[A]], i32 [[B]])
; ICALL-VTABLE-NEXT: ret i32 [[CALL]]
;
entry:
%vtable = load ptr, ptr %ptr, !prof !4
%0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
tail call void @llvm.assume(i1 %0)
%1 = load ptr, ptr %vtable
%call = musttail call i32 %1(ptr %ptr, i32 %a, i32 %b), !prof !5
ret i32 %call
}

declare i1 @llvm.type.test(ptr, metadata)
declare void @llvm.assume(i1)
define i32 @_ZN7Derived5func1Eii(ptr %this, i32 %a, i32 %b) {
entry:
%sub = sub nsw i32 %a, %b
ret i32 %sub
}

define i32 @_ZN4Base5func1Eii(ptr %this, i32 %a, i32 %b) {
entry:
%add = add nsw i32 %b, %a
ret i32 %add
}

!0 = !{i64 16, !"_ZTS4Base"}
!1 = !{i64 16, !"_ZTSM4BaseFiiiE.virtual"}
!2 = !{i64 16, !"_ZTS7Derived"}
!3 = !{i64 16, !"_ZTSM7DerivedFiiiE.virtual"}
!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700}
!5 = !{!"VP", i32 0, i64 1600, i64 7889036118036845314, i64 900, i64 10495086226207060333, i64 700}