From b96f7918c0f9f4f7c8eed3a7b5669545a7b7396e Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Wed, 1 Oct 2025 10:09:35 -0700 Subject: [PATCH 1/2] [MemProf] Suppress duplicate clones in the LTO backend In some cases due to phase ordering issues with re-cloning during function assignment, we may end up with duplicate clones in the summaries (calling the same set of callee clones and/or allocation hints). Ideally we would fix this in the thin link, but for now, detect and suppress these in the LTO backend. In order to satisfy possibly cross-module references, make each duplicate an alias to the first identical copy, which gets materialized. This reduces ThinLTO backend compile times. --- .../IPO/MemProfContextDisambiguation.cpp | 172 ++++++++++++++---- llvm/test/ThinLTO/X86/memprof-dups.ll | 138 ++++++++++++++ .../ThinLTO/X86/memprof_imported_internal.ll | 10 +- 3 files changed, 279 insertions(+), 41 deletions(-) create mode 100644 llvm/test/ThinLTO/X86/memprof-dups.ll diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 15f4d76300bff..8e3348748cd23 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -40,6 +41,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" @@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend, "Number of function clones created during ThinLTO backend"); STATISTIC(FunctionsClonedThinBackend, "Number of functions that had clones created during ThinLTO backend"); +STATISTIC( + FunctionCloneDuplicatesThinBackend, + "Number of function clone duplicates detected during ThinLTO backend"); STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " "cloned) during whole program analysis"); STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " @@ -5186,19 +5191,129 @@ bool CallsiteContextGraph::assignFunctions() { return Changed; } +// Compute a SHA1 hash of the callsite and alloc version information of clone I +// in the summary, to use in detection of duplicate clones. +std::string ComputeHash(StringMap &HashToFunc, FunctionSummary *FS, + unsigned I) { + SHA1 Hasher; + // Update hash with any callsites that call non-default (non-zero) callee + // versions. + for (auto &SN : FS->callsites()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert( + SN.Clones.size() > I && + "Callsite summary has fewer entries than other summaries in function"); + if (SN.Clones.size() <= I || !SN.Clones[I]) + continue; + uint8_t Data[4]; + support::endian::write32le(Data, SN.Clones[I]); + Hasher.update(Data); + } + // Update hash with any allocs that have non-default (non-None) hints. + for (auto &AN : FS->allocs()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert(AN.Versions.size() > I && + "Alloc summary has fewer entries than other summaries in function"); + if (AN.Versions.size() <= I || + (AllocationType)AN.Versions[I] == AllocationType::None) + continue; + Hasher.update(ArrayRef(&AN.Versions[I], 1)); + } + return toHex(Hasher.result()); +} + static SmallVector, 4> createFunctionClones( Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map> - &FuncToAliasMap) { + &FuncToAliasMap, + FunctionSummary *FS) { + auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) { + // We might have created this when adjusting callsite in another + // function. It should be a declaration. + assert(DeclGV->isDeclaration()); + NewGV->takeName(DeclGV); + DeclGV->replaceAllUsesWith(NewGV); + DeclGV->eraseFromParent(); + }; + + // Handle aliases to this function, and create analogous alias clones to the + // provided clone of this function. + auto CloneFuncAliases = [&](Function *NewF, unsigned I) { + if (!FuncToAliasMap.count(&F)) + return; + for (auto *A : FuncToAliasMap[&F]) { + std::string AliasName = getMemProfFuncName(A->getName(), I); + auto *PrevA = M.getNamedAlias(AliasName); + auto *NewA = GlobalAlias::create(A->getValueType(), + A->getType()->getPointerAddressSpace(), + A->getLinkage(), AliasName, NewF); + NewA->copyAttributesFrom(A); + if (PrevA) + TakeDeclNameAndReplace(PrevA, NewA); + } + }; + // The first "clone" is the original copy, we should only call this if we // needed to create new clones. assert(NumClones > 1); SmallVector, 4> VMaps; VMaps.reserve(NumClones - 1); FunctionsClonedThinBackend++; + + // Map of hash of callsite/alloc versions to the instantiated function clone + // (possibly the original) implementing those calls. Used to avoid + // instantiating duplicate function clones. + // FIXME: Ideally the thin link would not generate such duplicate clones to + // start with, but right now it happens due to phase ordering in the function + // assignment and possible new clones that produces. We simply make each + // duplicate an alias to the matching instantiated clone recorded in the map + // (except for available_externally which are made declarations as they would + // be aliases in the prevailing module, and available_externally aliases are + // not well supported right now). + StringMap HashToFunc; + + // Save the hash of the original function version. + auto Hash = ComputeHash(HashToFunc, FS, 0); + HashToFunc[Hash] = &F; + for (unsigned I = 1; I < NumClones; I++) { VMaps.emplace_back(std::make_unique()); + std::string Name = getMemProfFuncName(F.getName(), I); + auto Hash = ComputeHash(HashToFunc, FS, I); + // If this clone would duplicate a previously seen clone, don't generate the + // duplicate clone body, just make an alias to satisfy any (potentially + // cross-module) references. + if (HashToFunc.contains(Hash)) { + FunctionCloneDuplicatesThinBackend++; + auto *Func = HashToFunc[Hash]; + if (Func->hasAvailableExternallyLinkage()) { + // Skip these as EliminateAvailableExternallyPass does not handle + // available_externally aliases correctly and we end up with an + // available_externally alias to a declaration. Just create a + // declaration for now as we know we will have a definition in another + // module. + auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone decl " << ore::NV("Decl", Decl.getCallee())); + continue; + } + auto *PrevF = M.getFunction(Name); + auto *Alias = GlobalAlias::create(Name, Func); + if (PrevF) + TakeDeclNameAndReplace(PrevF, Alias); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone alias " << ore::NV("Alias", Alias)); + + // Now handle aliases to this function, and clone those as well. + CloneFuncAliases(Func, I); + continue; + } auto *NewF = CloneFunction(&F, *VMaps.back()); + HashToFunc[Hash] = NewF; FunctionClonesThinBackend++; // Strip memprof and callsite metadata from clone as they are no longer // needed. @@ -5208,40 +5323,17 @@ static SmallVector, 4> createFunctionClones( Inst.setMetadata(LLVMContext::MD_callsite, nullptr); } } - std::string Name = getMemProfFuncName(F.getName(), I); auto *PrevF = M.getFunction(Name); - if (PrevF) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevF->isDeclaration()); - NewF->takeName(PrevF); - PrevF->replaceAllUsesWith(NewF); - PrevF->eraseFromParent(); - } else + if (PrevF) + TakeDeclNameAndReplace(PrevF, NewF); + else NewF->setName(Name); updateSubprogramLinkageName(NewF, Name); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) << "created clone " << ore::NV("NewFunction", NewF)); // Now handle aliases to this function, and clone those as well. - if (!FuncToAliasMap.count(&F)) - continue; - for (auto *A : FuncToAliasMap[&F]) { - std::string Name = getMemProfFuncName(A->getName(), I); - auto *PrevA = M.getNamedAlias(Name); - auto *NewA = GlobalAlias::create(A->getValueType(), - A->getType()->getPointerAddressSpace(), - A->getLinkage(), Name, NewF); - NewA->copyAttributesFrom(A); - if (PrevA) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevA->isDeclaration()); - NewA->takeName(PrevA); - PrevA->replaceAllUsesWith(NewA); - PrevA->eraseFromParent(); - } - } + CloneFuncAliases(NewF, I); } return VMaps; } @@ -5401,7 +5493,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { SmallVector, 4> VMaps; bool ClonesCreated = false; unsigned NumClonesCreated = 0; - auto CloneFuncIfNeeded = [&](unsigned NumClones) { + auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) { // We should at least have version 0 which is the original copy. assert(NumClones > 0); // If only one copy needed use original. @@ -5415,7 +5507,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { assert(NumClonesCreated == NumClones); return; } - VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); + VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS); // The first "clone" is the original copy, which doesn't have a VMap. assert(VMaps.size() == NumClones - 1); Changed = true; @@ -5424,9 +5516,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { }; auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, - Function *CalledFunction) { + Function *CalledFunction, FunctionSummary *FS) { // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); + CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS); assert(!isMemProfClone(*CalledFunction)); @@ -5448,6 +5540,8 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // below. auto CalleeOrigName = CalledFunction->getName(); for (unsigned J = 0; J < StackNode.Clones.size(); J++) { + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Do nothing if this version calls the original version of its // callee. if (!StackNode.Clones[J]) @@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { #endif // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); + CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS); OrigAllocsThinBackend++; AllocVersionsThinBackend += AllocNode.Versions.size(); @@ -5624,6 +5718,8 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // Update the allocation types per the summary info. for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Ignore any that didn't get an assigned allocation type. if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) continue; @@ -5671,7 +5767,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // we don't need to do ICP, but might need to clone this // function as it is the target of other cloned calls. if (NumClones) - CloneFuncIfNeeded(NumClones); + CloneFuncIfNeeded(NumClones, FS); } else { @@ -5691,7 +5787,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { } #endif - CloneCallsite(StackNode, CB, CalledFunction); + CloneCallsite(StackNode, CB, CalledFunction, FS); } } else if (CB->isTailCall() && CalledFunction) { // Locate the synthesized callsite info for the callee VI, if any was @@ -5701,7 +5797,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); assert(Callsite != MapTailCallCalleeVIToCallsite.end()); - CloneCallsite(Callsite->second, CB, CalledFunction); + CloneCallsite(Callsite->second, CB, CalledFunction, FS); } } } @@ -5847,6 +5943,8 @@ void MemProfContextDisambiguation::performICP( // check. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast((*VMaps[J - 1])[CB]); @@ -5892,6 +5990,8 @@ void MemProfContextDisambiguation::performICP( // TotalCount and the number promoted. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast((*VMaps[J - 1])[CB]); diff --git a/llvm/test/ThinLTO/X86/memprof-dups.ll b/llvm/test/ThinLTO/X86/memprof-dups.ll new file mode 100644 index 0000000000000..8accc836456ed --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-dups.ll @@ -0,0 +1,138 @@ +;; Check that duplicate spurious duplicate (identical) clones are simply +;; created as aliases to the first identical copy, rather than creating +;; multiple clones that call the same callee clones or have the same +;; allocation types. This currently happens in some cases due to additional +;; cloning performed during function assignment. +;; +;; The ThinLTO combined summary was manually modified as described there +;; to force multiple identical copies of various functions. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: rm -rf %t && split-file %s %t && cd %t +; RUN: llvm-as src.ll -o src.o +; RUN: llvm-as src.o.thinlto.ll -o src.o.thinlto.bc +; RUN: opt -passes=memprof-context-disambiguation -stats \ +; RUN: -memprof-import-summary=src.o.thinlto.bc \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: src.o -S 2>&1 | FileCheck %s + +; CHECK: created clone bar.memprof.1 +;; Duplicates of bar are created as declarations since bar is available_externally, +;; and the compiler does not well support available_externally aliases. +; CHECK: created clone decl bar.memprof.2 +; CHECK: created clone decl bar.memprof.3 +; CHECK: created clone _Z3foov.memprof.1 +;; Duplicates of _Z3foov are created as aliases to the appropriate materialized +;; clone of _Z3foov. +; CHECK: created clone alias _Z3foov.memprof.2 +; CHECK: created clone alias _Z3foov.memprof.3 + +;--- src.ll +source_filename = "memprof-distrib-alias.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_Z8fooAliasv = alias ptr (...), ptr @_Z3foov + +;; Original alias is unchanged. +; CHECK: @_Z8fooAliasv = alias ptr (...), ptr @_Z3foov{{$}} +;; We create an equivalent alias for the cloned def @_Z3foov.memprof.1. +; CHECK: @_Z8fooAliasv.memprof.1 = alias ptr (...), ptr @_Z3foov.memprof.1 + +;; We should also create aliases for the duplicate clones of _Z3foov +;; (_Z3foov.memprof.2 and _Z3foov.memprof.3) to the versions they are duplicates +;; of, and ditto for the associated @_Z8fooAliasv clones. +;; +;; _Z3foov.memprof.2 is a duplicate of original _Z3foov, and thus so is _Z8fooAliasv.memprof.2 +; CHECK: @_Z3foov.memprof.2 = alias ptr (), ptr @_Z3foov{{$}} +; CHECK: @_Z8fooAliasv.memprof.2 = alias ptr (...), ptr @_Z3foov{{$}} +;; _Z3foov.memprof.3 is a duplicate of _Z3foov.memprof.1, and thus so is _Z8fooAliasv.memprof.3 +; CHECK: @_Z3foov.memprof.3 = alias ptr (), ptr @_Z3foov.memprof.1 +; CHECK: @_Z8fooAliasv.memprof.3 = alias ptr (...), ptr @_Z3foov.memprof.1 + +; CHECK-LABEL: define i32 @main() +define i32 @main() #0 { +entry: + ;; The first call to bar does not allocate cold memory. It should call + ;; the original function, which eventually calls the original allocation + ;; decorated with a "notcold" attribute. + ; CHECK: call {{.*}} @bar() + %call = call ptr @bar(), !callsite !0 + ;; The second call to bar allocates cold memory. It should call the cloned + ;; function which eventually calls a cloned allocation decorated with a + ;; "cold" attribute. + ; CHECK: call {{.*}} @bar.memprof.1() + %call1 = call ptr @bar(), !callsite !1 + ret i32 0 +} + +; CHECK-LABEL: define available_externally i32 @bar() +define available_externally i32 @bar() #0 { +entry: + ; CHECK: call {{.*}} @_Z8fooAliasv() + %call = call ptr @_Z8fooAliasv(), !callsite !8 + ret i32 0 +} + +declare ptr @_Znam(i64) + +; CHECK-LABEL: define ptr @_Z3foov() +define ptr @_Z3foov() #0 { +entry: + ; CHECK: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]] + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +; We create actual clone for bar.memprof.1. +; CHECK: define available_externally i32 @bar.memprof.1() +; CHECK: call {{.*}} @_Z3foov.memprof.1() + +;; bar.memprof.2 and bar.memprof.3 are duplicates (of original bar and +;; bar.memprof.1, respectively). However, they are available externally, +;; so rather than create an alias we simply create a declaration, since the +;; compiler does not fully support available_externally aliases. +; CHECK: declare i32 @bar.memprof.2 +; CHECK: declare i32 @bar.memprof.3 + +; We create actual clone for foo.memprof.1. +; CHECK: define {{.*}} @_Z3foov.memprof.1() +; CHECK: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]] + +; CHECK: attributes #[[NOTCOLD]] = { "memprof"="notcold" } +; CHECK: attributes #[[COLD]] = { "memprof"="cold" } + +; CHECK: 4 memprof-context-disambiguation - Number of function clone duplicates detected during ThinLTO backend +; CHECK: 2 memprof-context-disambiguation - Number of function clones created during ThinLTO backend + +attributes #0 = { noinline optnone } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 1234, i64 8632435727821051414} +!5 = !{!6, !"cold"} +!6 = !{i64 9086428284934609951, i64 1234, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 1234} + +;--- src.o.thinlto.ll +; ModuleID = 'src.o.thinlto.ll' +source_filename = "src.o.thinlto.bc" + +^0 = module: (path: "src.o", hash: (1720506022, 1575514144, 2506794664, 3599359797, 3160884478)) +^1 = gv: (guid: 6583049656999245004, summaries: (alias: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), aliasee: ^2))) +;; Summary for _Z3foov, where the allocs part has been manually modified to add +;; two additional clones that are the same as the prior versions: +;; ... allocs: ((versions: (notcold, cold, notcold, cold), ... +^2 = gv: (guid: 9191153033785521275, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (notcold, cold, notcold, cold), memProf: ((type: notcold, stackIds: (1234, 8632435727821051414)), (type: cold, stackIds: (1234, 15025054523792398438)))))))) +^3 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^4)), callsites: ((callee: ^4, clones: (0), stackIds: (8632435727821051414)), (callee: ^4, clones: (1), stackIds: (15025054523792398438)))))) +;; Summary for bar, where the callsites part has been manually modified to add +;; two additional clones that are the same as the prior clones: +;; ... callsites: ((callee: ^1, clones: (0, 1, 0, 1), ... +^4 = gv: (guid: 16434608426314478903, summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0, 1, 0, 1), stackIds: (1234)))))) +^6 = flags: 353 +^7 = blockcount: 0 diff --git a/llvm/test/ThinLTO/X86/memprof_imported_internal.ll b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll index a6e254ca4d586..09784f823ec2e 100644 --- a/llvm/test/ThinLTO/X86/memprof_imported_internal.ll +++ b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll @@ -63,14 +63,14 @@ ; CHECK: tail call void @_ZL9internal1v.llvm.3267420853450984672() ; CHECK: tail call void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() ; CHECK-LABEL: declare void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() -;; We should have 2 clones of src2.cc's internal1 function, calling a single -;; clone of external2. +;; We should have one clone of src2.cc's internal1 function, calling a single +;; clone of external2, and a second clone that was detected to be a duplicate +;; of the first that becomes a declaration (since this is available_externally - +;; in the module with the prevailing copy it would be an alias to clone 1). ; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.1() ; CHECK: tail call void @_Z9external2v.memprof.1() ; CHECK: tail call void @_Z9external2v.memprof.1() -; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.2() -; CHECK: tail call void @_Z9external2v.memprof.1() -; CHECK: tail call void @_Z9external2v.memprof.1() +; CHECK: declare void @_ZL9internal1v.llvm.3267420853450984672.memprof.2() ; CHECK-NOT: memprof ;--- src1.ll From 9359770b5e0cc614d2ccacdb5384f95e275ad847 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Fri, 3 Oct 2025 15:53:16 -0700 Subject: [PATCH 2/2] Address comments --- .../IPO/MemProfContextDisambiguation.cpp | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 8e3348748cd23..fad307296182b 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -5193,8 +5193,7 @@ bool CallsiteContextGraph::assignFunctions() { // Compute a SHA1 hash of the callsite and alloc version information of clone I // in the summary, to use in detection of duplicate clones. -std::string ComputeHash(StringMap &HashToFunc, FunctionSummary *FS, - unsigned I) { +uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) { SHA1 Hasher; // Update hash with any callsites that call non-default (non-zero) callee // versions. @@ -5207,7 +5206,7 @@ std::string ComputeHash(StringMap &HashToFunc, FunctionSummary *FS, "Callsite summary has fewer entries than other summaries in function"); if (SN.Clones.size() <= I || !SN.Clones[I]) continue; - uint8_t Data[4]; + uint8_t Data[sizeof(SN.Clones[I])]; support::endian::write32le(Data, SN.Clones[I]); Hasher.update(Data); } @@ -5223,7 +5222,7 @@ std::string ComputeHash(StringMap &HashToFunc, FunctionSummary *FS, continue; Hasher.update(ArrayRef(&AN.Versions[I], 1)); } - return toHex(Hasher.result()); + return support::endian::read64le(Hasher.result().data()); } static SmallVector, 4> createFunctionClones( @@ -5274,16 +5273,15 @@ static SmallVector, 4> createFunctionClones( // (except for available_externally which are made declarations as they would // be aliases in the prevailing module, and available_externally aliases are // not well supported right now). - StringMap HashToFunc; + DenseMap HashToFunc; // Save the hash of the original function version. - auto Hash = ComputeHash(HashToFunc, FS, 0); - HashToFunc[Hash] = &F; + HashToFunc[ComputeHash(FS, 0)] = &F; for (unsigned I = 1; I < NumClones; I++) { VMaps.emplace_back(std::make_unique()); std::string Name = getMemProfFuncName(F.getName(), I); - auto Hash = ComputeHash(HashToFunc, FS, I); + auto Hash = ComputeHash(FS, I); // If this clone would duplicate a previously seen clone, don't generate the // duplicate clone body, just make an alias to satisfy any (potentially // cross-module) references. @@ -5540,6 +5538,8 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // below. auto CalleeOrigName = CalledFunction->getName(); for (unsigned J = 0; J < StackNode.Clones.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. if (J > 0 && VMaps[J - 1]->empty()) continue; // Do nothing if this version calls the original version of its @@ -5718,6 +5718,8 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // Update the allocation types per the summary info. for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and + // was created as an alias or a declaration. if (J > 0 && VMaps[J - 1]->empty()) continue; // Ignore any that didn't get an assigned allocation type. @@ -5943,6 +5945,8 @@ void MemProfContextDisambiguation::performICP( // check. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. if (J > 0 && VMaps[J - 1]->empty()) continue; // Copy 0 is the original function. @@ -5990,6 +5994,8 @@ void MemProfContextDisambiguation::performICP( // TotalCount and the number promoted. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. if (J > 0 && VMaps[J - 1]->empty()) continue; // Copy 0 is the original function.