835 changes: 814 additions & 21 deletions llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,9 @@ extern cl::opt<bool> DebugInfoCorrelate;
// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
extern cl::opt<bool> EnableVTableValueProfiling;
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
// Command line option to enable vtable-based comparison in pass
// `pgo-icall-prom`.
extern cl::opt<bool> EnableVTableCmp;
} // namespace llvm

// Return a string describing the branch condition that can be
Expand Down Expand Up @@ -2178,6 +2181,23 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,

auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);

if (EnableVTableCmp) {
SmallVector<MDNode *, 2> Types;
for (GlobalVariable &G : M.globals()) {
if (!G.hasName())
continue;
Types.clear();
G.getMetadata(LLVMContext::MD_type, Types);

// Attach the vtable's PGO name as a metadata here in the prelink
// optimizer pipeline, so the postlink optimizer pipeline could use
// consistent names as annotated (in the form of md5hash(name)) in the
// value profiles.
if (!Types.empty())
createPGOVTableNameMetadata(G, getPGOName(G, false /* InLTO*/));
}
}

if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
return PreservedAnalyses::all();
Expand Down
295 changes: 289 additions & 6 deletions llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,285 @@ CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
return *NewInst;
}

// Returns true if instruction `I` SHOULD NOT sink to `ElseBlock`.
// `IntendedInstr` is an instruction that's not in `ElseBlock` yet but would
// sink there.
static bool HasUseInNonElseBlock(Instruction &I, BasicBlock *ElseBlock,
Instruction *IntendedInstr) {
// Conservatively returns true so the instruction won't sink into other BBs.
// For instance, `llvm.assume` may appear unused.
if (I.use_empty())
return true;

for (Use &U : I.uses()) {
Instruction *User = dyn_cast<Instruction>(U.getUser());

// Conservatively returns true if there is a non-instruction user.
if (!User)
return true;

// `User` and `I` are in different BB.
if (User != IntendedInstr && User->getParent() != ElseBlock)
return true;
}
return false;
}

// Given the following block,
// Sink the instructions that are used by function-based comparisons.
static void SinkVFuncOnlyInstructionsIfSafe(BasicBlock *SrcBlock,
BasicBlock *DestBlock,
Instruction *VTableInstr,
const CallBase &CB) {
// Collect all sinkable instructions and move them all together after
// instruction iteration completes. Moving instructions in the middle of
// iteration might invalidate iterators.
std::vector<Instruction *> SinkInsts;
// Reversely iterate from the end of a BB, and skip the terminator instruction
// at the end of the BB.
for (Instruction &Inst :
make_range(++SrcBlock->getTerminator()->getReverseIterator(),
SrcBlock->rend())) {
if (Inst.isDebugOrPseudoInst())
continue;

// Exit loop if vtable instr is seen.
if (&Inst == VTableInstr)
break;

// Don't sink instructions to DestBlock if `Inst` has uses in other blocks.
if (HasUseInNonElseBlock(Inst, DestBlock, nullptr))
continue;

LoadInst *LI = dyn_cast<LoadInst>(&Inst);
if (LI) {
Value *V = cast<Value>(LI);
// This load instruction loads the vfunc-ptr for indirect call 'CB',
// and it doesn't have other users.
if (V == CB.getCalledOperand()) {
SinkInsts.push_back(&Inst);
Value *Addr = LI->getPointerOperand();

if (Instruction *AddrInst = dyn_cast<Instruction>(Addr)) {
if (!HasUseInNonElseBlock(*AddrInst, DestBlock, &Inst)) {
SinkInsts.push_back(AddrInst);
}
}
break;
}
}
}
for (Instruction *Inst : SinkInsts) {
Inst->moveBefore(&*DestBlock->getFirstInsertionPt());
}
}

// Returns the or result of all icmp instructions.
Value *getOrResult(std::vector<Value *> &ICmps, IRBuilder<> &Builder) {
assert(!ICmps.empty() && "Must have at least one icmp instructions");
if (ICmps.size() == 1)
return ICmps[0];

std::vector<Value *> OrResults;
int i = 0, NumICmp = ICmps.size();
for (i = 0; i + 1 < NumICmp; i += 2) {
Value *Or = Builder.CreateOr(ICmps[i], ICmps[i + 1], "icmp-or");
OrResults.push_back(Or);
}
if (i < NumICmp) {
OrResults.push_back(ICmps[i]);
}
return getOrResult(OrResults, Builder);
}

// From
// bb:
// vptr = load
// func-addr = gep vptr
// funcptr = load
// res = call funcptr
//
// To
// bb:
// vptr = load
// cond = icmp vptr
// br cond if.then, if.else
//
// if.then:
// res1 = call direct-func
// if.else:
// func-addr = gep vptr
// funcptr = load func-addr
// res2 = call funcptr
//
// bb.merge:
// res = phi [res1, if.then] [res2, if.else]
CallBase &llvm::promoteIndirectCallWithVTableInfo(
CallBase &CB, Function *TargetFunction,
const SmallVector<VTableCandidate> &VTable2Candidate,
const std::vector<int> &VTableIndices,
const std::unordered_map<int, Value *> &VTableOffsetToValueMap,
uint64_t &SumPromotedVTableCount, MDNode *BranchWeights) {
SumPromotedVTableCount = 0;
// It might be a global variable, or alias
IRBuilder<> Builder(&CB);
CallBase *OrigIndirectCall = &CB;

// The VTableInstr that's being instrumented. It should remain the same across
// all candidates.
Instruction *VTableInstr = nullptr;

std::vector<Value *> ICmps;
for (auto Index : VTableIndices) {
SumPromotedVTableCount += VTable2Candidate[Index].VTableValCount;
const auto &VTableCandidateInfo = VTable2Candidate[Index];
if (VTableInstr == nullptr) {
VTableInstr = VTableCandidateInfo.VTableInstr;
}
assert(VTableCandidateInfo.VTableInstr == VTableInstr &&
"VTableInstr should remain the same across all candidate");

Value *VTableVar = Builder.CreatePtrToInt(
VTableCandidateInfo.VTableVariable, Builder.getInt64Ty());
assert(
VTableOffsetToValueMap.find(VTableCandidateInfo.AddressPointOffset) !=
VTableOffsetToValueMap.end() &&
"Didn't find a value for offset");

Value *OffsetVar =
VTableOffsetToValueMap.at(VTableCandidateInfo.AddressPointOffset);
Value *ICmp = Builder.CreateICmpEQ(VTableVar, OffsetVar);
ICmps.push_back(ICmp);
}

Value *Cond = getOrResult(ICmps, Builder);

// FIXME:
// This should be optimized to the comparison with a newly-created (alias of?)
// vtable variable, and only frequently accessed vtables are created.

if (CB.isMustTailCall()) {
// From:
// bb:
// vptr = load
// func-addr = gep vptr
// funcptr = load
// res = tail call funcptr
// ret res
//
// To:
// bb:
// vptr = load
// minus = sub vptr, vtable
// cond = icmp minus, constant-offset
// br cond true if.then, false if.else
// if.then:
// %res1 = musttail call direct_callee
// ret %res1
// if.else:
// func-addr = gep vptr
// funcptr = load
// res2 = musttail call funcptr
// ret res2
BasicBlock *OrigBlock = CB.getParent();
Instruction *ThenTerm =
SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights);
BasicBlock *ThenBlock = ThenTerm->getParent();
BasicBlock *ElseBlock = CB.getParent();
ThenBlock->setName("if.then.direct_call");
CallBase *NewInst = cast<CallBase>(CB.clone());
NewInst->insertBefore(ThenTerm);
NewInst->setCalledOperand(TargetFunction);
// right now it becomes
// bb:
// vptr = load
// func-addr = gep vptr <-- 0
// funcptr = load <-- 1
// minus = sub vptr, vtablevar
// cond = icmp minus, constant-offset
// br cond true, if.true, if.end
// if.true:
// res_clone = tail call funcptr
// br if.end
// if.end:
// res = tail call funcptr
// ret res

// Now sink instructions 0 and 1 if possible to `if.end` block.
SinkVFuncOnlyInstructionsIfSafe(OrigBlock, ElseBlock, VTableInstr, CB);

Value *NewRetVal = NewInst;
auto Next = CB.getNextNode();
if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) {
assert(BitCast->getOperand(0) == &CB &&
"bitcast following musttail call must use the call");
auto NewBitCast = BitCast->clone();
NewBitCast->replaceUsesOfWith(&CB, NewInst);
NewBitCast->insertBefore(ThenTerm);
NewRetVal = NewBitCast;
Next = BitCast->getNextNode();
}

// Place a clone of the return instruction after the new call site.
ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
assert(Ret && "musttail call must precede a ret with an optional bitcast");
auto NewRet = Ret->clone();
if (Ret->getReturnValue()) {
NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal);
}
NewRet->insertBefore(ThenTerm);
// A return instruction is terminating, so we don't need the terminating
// instruction just created.
ThenTerm->eraseFromParent();
return *NewInst;
} // end if for musttail call.

// create if-then-else structure. The original instruction is moved into else,
// and vfunc-only instructions are sinked into else.
// a clone of the original instruction is in the then block.
Instruction *ThenTerm = nullptr;
Instruction *ElseTerm = nullptr;

BasicBlock *OrigBlock = CB.getParent();
SplitBlockAndInsertIfThenElse(Cond, &CB, &ThenTerm, &ElseTerm, BranchWeights);
BasicBlock *ThenBlock = ThenTerm->getParent();
BasicBlock *ElseBlock = ElseTerm->getParent();
BasicBlock *MergeBlock = OrigIndirectCall->getParent();

CallBase *NewInst = cast<CallBase>(OrigIndirectCall->clone());
OrigIndirectCall->moveBefore(ElseTerm);
NewInst->insertBefore(ThenTerm);
// Rewrite NewInst to use direct callee to sink the vfunc-only instructions.
NewInst->setCalledOperand(TargetFunction);
// Now proceed to sink those instructions.
SinkVFuncOnlyInstructionsIfSafe(OrigBlock, ElseBlock, VTableInstr, CB);

if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigIndirectCall)) {
auto *NewInvoke = cast<InvokeInst>(NewInst);

// Invoke instructions are terminating themselves.
ThenTerm->eraseFromParent();
ElseTerm->eraseFromParent();

// Branch from the "merge" block to the original normal destination.
Builder.SetInsertPoint(MergeBlock);
Builder.CreateBr(OrigInvoke->getNormalDest());

// Fix-up phi nodes in the original invoke's normal and unwind destinations.
fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock);
fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);

// Now set the normal destinations of the invoke instructions to be the
// "merge" block.
OrigInvoke->setNormalDest(MergeBlock);
NewInvoke->setNormalDest(MergeBlock);
}

// create a phi node for the returned value
createRetPHINode(OrigIndirectCall, NewInst, MergeBlock, Builder);
return *NewInst;
}

bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
const char **FailureReason) {
assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
Expand Down Expand Up @@ -467,12 +746,16 @@ bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
}

CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
CastInst **RetBitCast) {
assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");

// Set the called function of the call site to be the given callee (but don't
// change the type).
CB.setCalledOperand(Callee);
CastInst **RetBitCast,
bool DirectCalleeAlreadySet) {
if (!DirectCalleeAlreadySet) {
assert(!CB.getCalledFunction() &&
"Only indirect call sites can be promoted");

// Set the called function of the call site to be the given callee (but
// don't change the type).
CB.setCalledOperand(Callee);
}

// Since the call site will no longer be direct, we must clear metadata that
// is only appropriate for indirect calls. This includes !prof and !callees
Expand Down
261 changes: 261 additions & 0 deletions llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-FUNC
; RUN: opt < %s -passes=pgo-icall-prom -enable-vtable-cmp -vtable-prom-max-num-additional-op-for-one-function=4 -S | FileCheck %s --check-prefixes=ICALL-VTABLE

; One target function, one offset, allow at most three vtables

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@_ZTV4Base = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0
@_ZTV8Derived1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !1
@_ZTV8Derived2 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived25func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !2
@_ZTV8Derived3 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived35func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !3

define i32 @test_one_function_one_offset_one_vtable(ptr %d) {
; ICALL-FUNC-LABEL: define i32 @test_one_function_one_offset_one_vtable(
; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
; ICALL-FUNC-NEXT: entry:
; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF4:![0-9]+]]
; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
; ICALL-FUNC-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5:![0-9]+]]
; ICALL-FUNC: if.true.direct_targ:
; ICALL-FUNC-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
; ICALL-FUNC-NEXT: br label [[IF_END_ICP:%.*]]
; ICALL-FUNC: if.false.orig_indirect:
; ICALL-FUNC-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
; ICALL-FUNC-NEXT: br label [[IF_END_ICP]]
; ICALL-FUNC: if.end.icp:
; ICALL-FUNC-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
; ICALL-FUNC-NEXT: ret i32 [[TMP4]]
;
; ICALL-VTABLE-LABEL: define i32 @test_one_function_one_offset_one_vtable(
; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
; ICALL-VTABLE-NEXT: entry:
; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF4:![0-9]+]]
; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
; ICALL-VTABLE-NEXT: [[OFFSETVAR1:%.*]] = sub nuw i64 [[TMP0]], 16
; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived2 to i64), [[OFFSETVAR1]]
; ICALL-VTABLE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]], !prof [[PROF5:![0-9]+]]
; ICALL-VTABLE: 3:
; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[TMP7:%.*]]
; ICALL-VTABLE: 5:
; ICALL-VTABLE-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-VTABLE-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-VTABLE-NEXT: [[CALL:%.*]] = tail call i32 [[TMP6]](ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[TMP7]]
; ICALL-VTABLE: 7:
; ICALL-VTABLE-NEXT: [[TMP8:%.*]] = phi i32 [ [[CALL]], [[TMP5]] ], [ [[TMP4]], [[TMP3]] ]
; ICALL-VTABLE-NEXT: ret i32 [[TMP8]]
;
entry:
%vtable = load ptr, ptr %d, !prof !7
%0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
tail call void @llvm.assume(i1 %0)
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%1 = load ptr, ptr %vfn
%call = tail call i32 %1(ptr %d), !prof !5
ret i32 %call
}

define i32 @test_one_function_one_offset_two_vtables(ptr %d) {
; ICALL-FUNC-LABEL: define i32 @test_one_function_one_offset_two_vtables(
; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
; ICALL-FUNC-NEXT: entry:
; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF6:![0-9]+]]
; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
; ICALL-FUNC-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5]]
; ICALL-FUNC: if.true.direct_targ:
; ICALL-FUNC-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
; ICALL-FUNC-NEXT: br label [[IF_END_ICP:%.*]]
; ICALL-FUNC: if.false.orig_indirect:
; ICALL-FUNC-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
; ICALL-FUNC-NEXT: br label [[IF_END_ICP]]
; ICALL-FUNC: if.end.icp:
; ICALL-FUNC-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
; ICALL-FUNC-NEXT: ret i32 [[TMP4]]
;
; ICALL-VTABLE-LABEL: define i32 @test_one_function_one_offset_two_vtables(
; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
; ICALL-VTABLE-NEXT: entry:
; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF6:![0-9]+]]
; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
; ICALL-VTABLE-NEXT: [[OFFSETVAR1:%.*]] = sub nuw i64 [[TMP0]], 16
; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived1 to i64), [[OFFSETVAR1]]
; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived2 to i64), [[OFFSETVAR1]]
; ICALL-VTABLE-NEXT: [[VTABLE_CMP_OR:%.*]] = or i1 [[TMP2]], [[TMP3]]
; ICALL-VTABLE-NEXT: br i1 [[VTABLE_CMP_OR]], label [[TMP4:%.*]], label [[TMP6:%.*]], !prof [[PROF5]]
; ICALL-VTABLE: 4:
; ICALL-VTABLE-NEXT: [[TMP5:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[TMP8:%.*]]
; ICALL-VTABLE: 6:
; ICALL-VTABLE-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-VTABLE-NEXT: [[TMP7:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-VTABLE-NEXT: [[CALL:%.*]] = tail call i32 [[TMP7]](ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[TMP8]]
; ICALL-VTABLE: 8:
; ICALL-VTABLE-NEXT: [[TMP9:%.*]] = phi i32 [ [[CALL]], [[TMP6]] ], [ [[TMP5]], [[TMP4]] ]
; ICALL-VTABLE-NEXT: ret i32 [[TMP9]]
;
entry:
%vtable = load ptr, ptr %d, !prof !6
%0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
tail call void @llvm.assume(i1 %0)
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%1 = load ptr, ptr %vfn
%call = tail call i32 %1(ptr %d), !prof !5
ret i32 %call
}

define i32 @test_one_function_one_offset_three_vtables(ptr %d) {
; ICALL-FUNC-LABEL: define i32 @test_one_function_one_offset_three_vtables(
; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
; ICALL-FUNC-NEXT: entry:
; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF7:![0-9]+]]
; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
; ICALL-FUNC-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5]]
; ICALL-FUNC: if.true.direct_targ:
; ICALL-FUNC-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
; ICALL-FUNC-NEXT: br label [[IF_END_ICP:%.*]]
; ICALL-FUNC: if.false.orig_indirect:
; ICALL-FUNC-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
; ICALL-FUNC-NEXT: br label [[IF_END_ICP]]
; ICALL-FUNC: if.end.icp:
; ICALL-FUNC-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
; ICALL-FUNC-NEXT: ret i32 [[TMP4]]
;
; ICALL-VTABLE-LABEL: define i32 @test_one_function_one_offset_three_vtables(
; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
; ICALL-VTABLE-NEXT: entry:
; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF7:![0-9]+]]
; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
; ICALL-VTABLE-NEXT: [[OFFSETVAR1:%.*]] = sub nuw i64 [[TMP0]], 16
; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived1 to i64), [[OFFSETVAR1]]
; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived2 to i64), [[OFFSETVAR1]]
; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV4Base to i64), [[OFFSETVAR1]]
; ICALL-VTABLE-NEXT: [[VTABLE_CMP_OR:%.*]] = or i1 [[TMP2]], [[TMP3]]
; ICALL-VTABLE-NEXT: [[VTABLE_CMP_OR1:%.*]] = or i1 [[VTABLE_CMP_OR]], [[TMP4]]
; ICALL-VTABLE-NEXT: br i1 [[VTABLE_CMP_OR1]], label [[TMP5:%.*]], label [[TMP7:%.*]], !prof [[PROF8:![0-9]+]]
; ICALL-VTABLE: 5:
; ICALL-VTABLE-NEXT: [[TMP6:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[TMP9:%.*]]
; ICALL-VTABLE: 7:
; ICALL-VTABLE-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-VTABLE-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-VTABLE-NEXT: [[CALL:%.*]] = tail call i32 [[TMP8]](ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[TMP9]]
; ICALL-VTABLE: 9:
; ICALL-VTABLE-NEXT: [[TMP10:%.*]] = phi i32 [ [[CALL]], [[TMP7]] ], [ [[TMP6]], [[TMP5]] ]
; ICALL-VTABLE-NEXT: ret i32 [[TMP10]]
;
entry:
%vtable = load ptr, ptr %d, !prof !4
%0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
tail call void @llvm.assume(i1 %0)
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%1 = load ptr, ptr %vfn
%call = tail call i32 %1(ptr %d), !prof !5
ret i32 %call
}

define i32 @test_one_function_one_offset_four_vtables(ptr %d) {
; ICALL-FUNC-LABEL: define i32 @test_one_function_one_offset_four_vtables(
; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
; ICALL-FUNC-NEXT: entry:
; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF8:![0-9]+]]
; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
; ICALL-FUNC-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5]]
; ICALL-FUNC: if.true.direct_targ:
; ICALL-FUNC-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
; ICALL-FUNC-NEXT: br label [[IF_END_ICP:%.*]]
; ICALL-FUNC: if.false.orig_indirect:
; ICALL-FUNC-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
; ICALL-FUNC-NEXT: br label [[IF_END_ICP]]
; ICALL-FUNC: if.end.icp:
; ICALL-FUNC-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
; ICALL-FUNC-NEXT: ret i32 [[TMP4]]
;
; ICALL-VTABLE-LABEL: define i32 @test_one_function_one_offset_four_vtables(
; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
; ICALL-VTABLE-NEXT: entry:
; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]]
; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
; ICALL-VTABLE-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
; ICALL-VTABLE-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5]]
; ICALL-VTABLE: if.true.direct_targ:
; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[IF_END_ICP:%.*]]
; ICALL-VTABLE: if.false.orig_indirect:
; ICALL-VTABLE-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
; ICALL-VTABLE-NEXT: br label [[IF_END_ICP]]
; ICALL-VTABLE: if.end.icp:
; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
; ICALL-VTABLE-NEXT: ret i32 [[TMP4]]
;
entry:
%vtable = load ptr, ptr %d, !prof !8
%0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
tail call void @llvm.assume(i1 %0)
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%1 = load ptr, ptr %vfn
%call = tail call i32 %1(ptr %d), !prof !5
ret i32 %call
}

declare i1 @llvm.type.test(ptr, metadata)
declare void @llvm.assume(i1 noundef)
declare i32 @_ZN4Base5func1Ei(ptr, i32)
declare i32 @_ZN8Derived15func1Ei(ptr, i32)
declare i32 @_ZN8Derived25func1Ei(ptr, i32)
declare i32 @_ZN8Derived35func1Ei(ptr, i32)

define i32 @_ZN4Base5func2Ev(ptr %this) {
; ICALL-FUNC-LABEL: define i32 @_ZN4Base5func2Ev(
; ICALL-FUNC-SAME: ptr [[THIS:%.*]]) {
; ICALL-FUNC-NEXT: entry:
; ICALL-FUNC-NEXT: ret i32 0
;
; ICALL-VTABLE-LABEL: define i32 @_ZN4Base5func2Ev(
; ICALL-VTABLE-SAME: ptr [[THIS:%.*]]) {
; ICALL-VTABLE-NEXT: entry:
; ICALL-VTABLE-NEXT: ret i32 0
;
entry:
ret i32 0
}

!0 = !{i64 16, !"_ZTS4Base"}
!1 = !{i64 16, !"_ZTS8Derived1"}
!2 = !{i64 16, !"_ZTS8Derived2"}
!3 = !{i64 16, !"_ZTS8Derived3"}
!4 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 600, i64 5035968517245772950, i64 550, i64 1960855528937986108, i64 500}
!5 = !{!"VP", i32 0, i64 1600, i64 -3104805163612457913, i64 1600}
!6 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 1000, i64 5035968517245772950, i64 600}
!7 = !{!"VP", i32 2, i64 1600, i64 5035968517245772950, i64 1600}
!8 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 550, i64 5035968517245772950, i64 450, i64 1960855528937986108, i64 310, i64 -3121110164882083017, i64 290}