Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 142 additions & 36 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
Expand All @@ -40,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InterleavedRange.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
Expand All @@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend,
"Number of function clones created during ThinLTO backend");
STATISTIC(FunctionsClonedThinBackend,
"Number of functions that had clones created during ThinLTO backend");
STATISTIC(
FunctionCloneDuplicatesThinBackend,
"Number of function clone duplicates detected during ThinLTO backend");
STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
"cloned) during whole program analysis");
STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
Expand Down Expand Up @@ -5186,19 +5191,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
return Changed;
}

// Compute a SHA1 hash of the callsite and alloc version information of clone I
// in the summary, to use in detection of duplicate clones.
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) {
SHA1 Hasher;
// Update hash with any callsites that call non-default (non-zero) callee
// versions.
for (auto &SN : FS->callsites()) {
// In theory all callsites and allocs in this function should have the same
// number of clone entries, but handle any discrepancies gracefully below
// for NDEBUG builds.
assert(
SN.Clones.size() > I &&
"Callsite summary has fewer entries than other summaries in function");
if (SN.Clones.size() <= I || !SN.Clones[I])
continue;
uint8_t Data[sizeof(SN.Clones[I])];
support::endian::write32le(Data, SN.Clones[I]);
Hasher.update(Data);
}
// Update hash with any allocs that have non-default (non-None) hints.
for (auto &AN : FS->allocs()) {
// In theory all callsites and allocs in this function should have the same
// number of clone entries, but handle any discrepancies gracefully below
// for NDEBUG builds.
assert(AN.Versions.size() > I &&
"Alloc summary has fewer entries than other summaries in function");
if (AN.Versions.size() <= I ||
(AllocationType)AN.Versions[I] == AllocationType::None)
continue;
Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1));
}
return support::endian::read64le(Hasher.result().data());
}

static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
&FuncToAliasMap) {
&FuncToAliasMap,
FunctionSummary *FS) {
auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This lambda and the one below were refactored out of existing code to enable reuse

// We might have created this when adjusting callsite in another
// function. It should be a declaration.
assert(DeclGV->isDeclaration());
NewGV->takeName(DeclGV);
DeclGV->replaceAllUsesWith(NewGV);
DeclGV->eraseFromParent();
};

// Handle aliases to this function, and create analogous alias clones to the
// provided clone of this function.
auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
if (!FuncToAliasMap.count(&F))
return;
for (auto *A : FuncToAliasMap[&F]) {
std::string AliasName = getMemProfFuncName(A->getName(), I);
auto *PrevA = M.getNamedAlias(AliasName);
auto *NewA = GlobalAlias::create(A->getValueType(),
A->getType()->getPointerAddressSpace(),
A->getLinkage(), AliasName, NewF);
NewA->copyAttributesFrom(A);
if (PrevA)
TakeDeclNameAndReplace(PrevA, NewA);
}
};

// The first "clone" is the original copy, we should only call this if we
// needed to create new clones.
assert(NumClones > 1);
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
VMaps.reserve(NumClones - 1);
FunctionsClonedThinBackend++;

// Map of hash of callsite/alloc versions to the instantiated function clone
// (possibly the original) implementing those calls. Used to avoid
// instantiating duplicate function clones.
// FIXME: Ideally the thin link would not generate such duplicate clones to
// start with, but right now it happens due to phase ordering in the function
// assignment and possible new clones that produces. We simply make each
// duplicate an alias to the matching instantiated clone recorded in the map
// (except for available_externally which are made declarations as they would
// be aliases in the prevailing module, and available_externally aliases are
// not well supported right now).
DenseMap<uint64_t, Function *> HashToFunc;

// Save the hash of the original function version.
HashToFunc[ComputeHash(FS, 0)] = &F;

for (unsigned I = 1; I < NumClones; I++) {
VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
std::string Name = getMemProfFuncName(F.getName(), I);
auto Hash = ComputeHash(FS, I);
// If this clone would duplicate a previously seen clone, don't generate the
// duplicate clone body, just make an alias to satisfy any (potentially
// cross-module) references.
if (HashToFunc.contains(Hash)) {
FunctionCloneDuplicatesThinBackend++;
auto *Func = HashToFunc[Hash];
if (Func->hasAvailableExternallyLinkage()) {
// Skip these as EliminateAvailableExternallyPass does not handle
// available_externally aliases correctly and we end up with an
// available_externally alias to a declaration. Just create a
// declaration for now as we know we will have a definition in another
// module.
auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone decl " << ore::NV("Decl", Decl.getCallee()));
continue;
}
auto *PrevF = M.getFunction(Name);
auto *Alias = GlobalAlias::create(Name, Func);
if (PrevF)
TakeDeclNameAndReplace(PrevF, Alias);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone alias " << ore::NV("Alias", Alias));

// Now handle aliases to this function, and clone those as well.
CloneFuncAliases(Func, I);
continue;
}
auto *NewF = CloneFunction(&F, *VMaps.back());
HashToFunc[Hash] = NewF;
FunctionClonesThinBackend++;
// Strip memprof and callsite metadata from clone as they are no longer
// needed.
Expand All @@ -5208,40 +5321,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
}
}
std::string Name = getMemProfFuncName(F.getName(), I);
auto *PrevF = M.getFunction(Name);
if (PrevF) {
// We might have created this when adjusting callsite in another
// function. It should be a declaration.
assert(PrevF->isDeclaration());
NewF->takeName(PrevF);
PrevF->replaceAllUsesWith(NewF);
PrevF->eraseFromParent();
} else
if (PrevF)
TakeDeclNameAndReplace(PrevF, NewF);
else
NewF->setName(Name);
updateSubprogramLinkageName(NewF, Name);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone " << ore::NV("NewFunction", NewF));

// Now handle aliases to this function, and clone those as well.
if (!FuncToAliasMap.count(&F))
continue;
for (auto *A : FuncToAliasMap[&F]) {
std::string Name = getMemProfFuncName(A->getName(), I);
auto *PrevA = M.getNamedAlias(Name);
auto *NewA = GlobalAlias::create(A->getValueType(),
A->getType()->getPointerAddressSpace(),
A->getLinkage(), Name, NewF);
NewA->copyAttributesFrom(A);
if (PrevA) {
// We might have created this when adjusting callsite in another
// function. It should be a declaration.
assert(PrevA->isDeclaration());
NewA->takeName(PrevA);
PrevA->replaceAllUsesWith(NewA);
PrevA->eraseFromParent();
}
}
CloneFuncAliases(NewF, I);
}
return VMaps;
}
Expand Down Expand Up @@ -5401,7 +5491,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
bool ClonesCreated = false;
unsigned NumClonesCreated = 0;
auto CloneFuncIfNeeded = [&](unsigned NumClones) {
auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) {
// We should at least have version 0 which is the original copy.
assert(NumClones > 0);
// If only one copy needed use original.
Expand All @@ -5415,7 +5505,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
assert(NumClonesCreated == NumClones);
return;
}
VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS);
// The first "clone" is the original copy, which doesn't have a VMap.
assert(VMaps.size() == NumClones - 1);
Changed = true;
Expand All @@ -5424,9 +5514,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
};

auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
Function *CalledFunction) {
Function *CalledFunction, FunctionSummary *FS) {
// Perform cloning if not yet done.
CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS);

assert(!isMemProfClone(*CalledFunction));

Expand All @@ -5448,6 +5538,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// below.
auto CalleeOrigName = CalledFunction->getName();
for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
// If the VMap is empty, this clone was a duplicate of another and was
// created as an alias or a declaration.
if (J > 0 && VMaps[J - 1]->empty())
continue;
Comment on lines +5543 to +5544
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if this small block of code warrants a comment:

// If the VMap is empty, this clone was a duplicate of another and was created
// as an alias or a declaration.

The same applies to the three more instances of if statements below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

// Do nothing if this version calls the original version of its
// callee.
if (!StackNode.Clones[J])
Expand Down Expand Up @@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
#endif

// Perform cloning if not yet done.
CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS);

OrigAllocsThinBackend++;
AllocVersionsThinBackend += AllocNode.Versions.size();
Expand Down Expand Up @@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {

// Update the allocation types per the summary info.
for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
// If the VMap is empty, this clone was a duplicate of another and
// was created as an alias or a declaration.
if (J > 0 && VMaps[J - 1]->empty())
continue;
// Ignore any that didn't get an assigned allocation type.
if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
continue;
Expand Down Expand Up @@ -5671,7 +5769,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// we don't need to do ICP, but might need to clone this
// function as it is the target of other cloned calls.
if (NumClones)
CloneFuncIfNeeded(NumClones);
CloneFuncIfNeeded(NumClones, FS);
}

else {
Expand All @@ -5691,7 +5789,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
}
#endif

CloneCallsite(StackNode, CB, CalledFunction);
CloneCallsite(StackNode, CB, CalledFunction, FS);
}
} else if (CB->isTailCall() && CalledFunction) {
// Locate the synthesized callsite info for the callee VI, if any was
Expand All @@ -5701,7 +5799,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
assert(Callsite != MapTailCallCalleeVIToCallsite.end());
CloneCallsite(Callsite->second, CB, CalledFunction);
CloneCallsite(Callsite->second, CB, CalledFunction, FS);
}
}
}
Expand Down Expand Up @@ -5847,6 +5945,10 @@ void MemProfContextDisambiguation::performICP(
// check.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
// If the VMap is empty, this clone was a duplicate of another and was
// created as an alias or a declaration.
if (J > 0 && VMaps[J - 1]->empty())
continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
Expand Down Expand Up @@ -5892,6 +5994,10 @@ void MemProfContextDisambiguation::performICP(
// TotalCount and the number promoted.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
// If the VMap is empty, this clone was a duplicate of another and was
// created as an alias or a declaration.
if (J > 0 && VMaps[J - 1]->empty())
continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
Expand Down
Loading