diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index fa0079bac435c..f54a1933431de 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -10703,12 +10703,15 @@ bool LLParser::parseOptionalCallsites(std::vector &Callsites) { return true; SmallVector StackIdIndices; - do { - uint64_t StackId = 0; - if (parseUInt64(StackId)) - return true; - StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId)); - } while (EatIfPresent(lltok::comma)); + // Synthesized callsite records will not have a stack id list. + if (Lex.getKind() != lltok::rparen) { + do { + uint64_t StackId = 0; + if (parseUInt64(StackId)) + return true; + StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId)); + } while (EatIfPresent(lltok::comma)); + } if (parseToken(lltok::rparen, "expected ')' in stackIds")) return true; diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 1966ce2908371..8e594d155ca95 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -4204,7 +4204,8 @@ static SmallVector, 4> createFunctionClones( // Locate the summary for F. This is complicated by the fact that it might // have been internalized or promoted. static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, - const ModuleSummaryIndex *ImportSummary) { + const ModuleSummaryIndex *ImportSummary, + const Function *CallingFunc = nullptr) { // FIXME: Ideally we would retain the original GUID in some fashion on the // function (e.g. as metadata), but for now do our best to locate the // summary without that information. @@ -4219,20 +4220,48 @@ static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, // Now query with the original name before any promotion was performed. StringRef OrigName = ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName()); + // When this pass is enabled, we always add thinlto_src_file provenance + // metadata to imported function definitions, which allows us to recreate the + // original internal symbol's GUID. + auto SrcFileMD = F.getMetadata("thinlto_src_file"); + // If this is a call to an imported/promoted local for which we didn't import + // the definition, the metadata will not exist on the declaration. However, + // since we are doing this early, before any inlining in the LTO backend, we + // can simply look at the metadata on the calling function which must have + // been from the same module if F was an internal symbol originally. + if (!SrcFileMD && F.isDeclaration()) { + // We would only call this for a declaration for a direct callsite, in which + // case the caller would have provided the calling function pointer. + assert(CallingFunc); + SrcFileMD = CallingFunc->getMetadata("thinlto_src_file"); + // If this is a promoted local (OrigName != F.getName()), since this is a + // declaration, it must be imported from a different module and therefore we + // should always find the metadata on its calling function. Any call to a + // promoted local that came from this module should still be a definition. + assert(SrcFileMD || OrigName == F.getName()); + } + StringRef SrcFile = M.getSourceFileName(); + if (SrcFileMD) + SrcFile = dyn_cast(SrcFileMD->getOperand(0))->getString(); std::string OrigId = GlobalValue::getGlobalIdentifier( - OrigName, GlobalValue::InternalLinkage, M.getSourceFileName()); + OrigName, GlobalValue::InternalLinkage, SrcFile); TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId)); - if (TheFnVI) - return TheFnVI; - // Could be a promoted local imported from another module. We need to pass - // down more info here to find the original module id. For now, try with - // the OrigName which might have been stored in the OidGuidMap in the - // index. This would not work if there were same-named locals in multiple - // modules, however. - auto OrigGUID = - ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName)); - if (OrigGUID) - TheFnVI = ImportSummary->getValueInfo(OrigGUID); + // Internal func in original module may have gotten a numbered suffix if we + // imported an external function with the same name. This happens + // automatically during IR linking for naming conflicts. It would have to + // still be internal in that case (otherwise it would have been renamed on + // promotion in which case we wouldn't have a naming conflict). + if (!TheFnVI && OrigName == F.getName() && F.hasLocalLinkage() && + F.getName().contains('.')) { + OrigName = F.getName().rsplit('.').first; + OrigId = GlobalValue::getGlobalIdentifier( + OrigName, GlobalValue::InternalLinkage, SrcFile); + TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId)); + } + // The only way we may not have a VI is if this is a declaration created for + // an imported reference. For distributed ThinLTO we may not have a VI for + // such declarations in the distributed summary. + assert(TheFnVI || F.isDeclaration()); return TheFnVI; } @@ -4591,7 +4620,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // Locate the synthesized callsite info for the callee VI, if any was // created, and use that for cloning. ValueInfo CalleeVI = - findValueInfoForFunc(*CalledFunction, M, ImportSummary); + findValueInfoForFunc(*CalledFunction, M, ImportSummary, &F); if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); assert(Callsite != MapTailCallCalleeVIToCallsite.end()); diff --git a/llvm/test/ThinLTO/X86/memprof_imported_internal.ll b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll new file mode 100644 index 0000000000000..a6e254ca4d586 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll @@ -0,0 +1,152 @@ +;; Test to make sure that the memprof ThinLTO backend finds the correct summary +;; for an imported promoted local, so that we can perform the correct cloning. +;; In particular, we should be able to use the thinlto_src_file metadata to +;; recreate its original GUID. In particular, this test contains promoted +;; internal functions with the same original name as those that were imported, +;; and we want to ensure we don't use those by mistake. + +;; The original code looks something like: +;; +;; src1.cc: +;; extern void external1(); +;; extern void external2(); +;; static void internal1() { +;; external2(); +;; } +;; static void internal2() { +;; external2(); +;; } +;; int main() { +;; internal1(); +;; internal2(); +;; external1(); +;; return 0; +;; } +;; +;; src2.cc: +;; extern void external2(); +;; static void internal1() { +;; external2(); +;; } +;; static void internal2() { +;; external2(); +;; } +;; void external1() { +;; internal1(); +;; internal2(); +;; } +;; +;; The assembly for src1 shown below was dumped after function importing, with +;; some hand modification to ensure we import the definitions of src2.cc's +;; external1 and internal1 functions, and the declaration only for its +;; internal2 function. I also hand modified it to add !callsite metadata +;; to a few calls, and the distributed ThinLTO summary in src1.o.thinlto.ll to +;; contain callsite metadata records with cloning results. + +; RUN: rm -rf %t && split-file %s %t && cd %t +; RUN: llvm-as src1.ll -o src1.o +; RUN: llvm-as src1.o.thinlto.ll -o src1.o.thinlto.bc + +; RUN: opt -passes=memprof-context-disambiguation src1.o -S -memprof-import-summary=src1.o.thinlto.bc | FileCheck %s + +;; Per the cloning results in the summary, none of the original functions should +;; call any memprof clones. +; CHECK-NOT: memprof +;; We should have one clone of src1.cc's internal1 that calls a clone of +;; external2. +; CHECK-LABEL: define void @_ZL9internal1v.llvm.5985484347676238233.memprof.1() +; CHECK: tail call void @_Z9external2v.memprof.1() +; CHECK-LABEL: declare void @_Z9external2v.memprof.1() +;; We should have one clone of external1 that calls a clone of internal2 from +;; a synthesized callsite record (for a tail call with a missing frame). +; CHECK-LABEL: define available_externally {{.*}} void @_Z9external1v.memprof.1() +; CHECK: tail call void @_ZL9internal1v.llvm.3267420853450984672() +; CHECK: tail call void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() +; CHECK-LABEL: declare void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() +;; We should have 2 clones of src2.cc's internal1 function, calling a single +;; clone of external2. +; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.1() +; CHECK: tail call void @_Z9external2v.memprof.1() +; CHECK: tail call void @_Z9external2v.memprof.1() +; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.2() +; CHECK: tail call void @_Z9external2v.memprof.1() +; CHECK: tail call void @_Z9external2v.memprof.1() +; CHECK-NOT: memprof + +;--- src1.ll +; ModuleID = 'src1.o' +source_filename = "src1.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local noundef i32 @main() { +entry: + tail call void @_ZL9internal1v.llvm.5985484347676238233() + tail call void @_ZL9internal2v.llvm.5985484347676238233() + tail call void @_Z9external1v() + ret i32 0 +} + +define void @_ZL9internal1v.llvm.5985484347676238233() { +entry: + tail call void @_Z9external2v(), !callsite !8 + ret void +} + +define void @_ZL9internal2v.llvm.5985484347676238233() { +entry: + tail call void @_Z9external2v() + ret void +} + +declare void @_Z9external2v() + +define available_externally dso_local void @_Z9external1v() !thinlto_src_module !6 !thinlto_src_file !7 { +entry: + tail call void @_ZL9internal1v.llvm.3267420853450984672() + tail call void @_ZL9internal2v.llvm.3267420853450984672() + ret void +} + +define available_externally void @_ZL9internal1v.llvm.3267420853450984672() !thinlto_src_module !6 !thinlto_src_file !7 { +entry: + ;; This one has more callsite records than the other version of internal1, + ;; which would cause the code to iterate past the end of the callsite + ;; records if we incorrectly got the other internal1's summary. + tail call void @_Z9external2v(), !callsite !9 + tail call void @_Z9external2v(), !callsite !10 + ret void +} + +declare void @_ZL9internal2v.llvm.3267420853450984672() + +!6 = !{!"src2.o"} +!7 = !{!"src2.cc"} +!8 = !{i64 12345} +!9 = !{i64 23456} +!10 = !{i64 34567} + +;--- src1.o.thinlto.ll +; ModuleID = 'src1.o.thinlto.bc' +source_filename = "src1.o.thinlto.bc" + +^0 = module: (path: "src1.o", hash: (1393604173, 1072112025, 2857473630, 2016801496, 3238735916)) +^1 = module: (path: "src2.o", hash: (760755700, 1705397472, 4198605753, 677969311, 2408738824)) +;; src2.o:internal1. It specifies that we should have 3 clones total (including +;; original). +^3 = gv: (guid: 1143217136900127394, summaries: (function: (module: ^1, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^6, tail: 1), (callee: ^6, tail: 1)), callsites: ((callee: ^6, clones: (0, 1, 1), stackIds: (23456)), (callee: ^6, clones: (0, 1, 1), stackIds: (34567)))))) +;; src2.o:internal2. It was manually modified to have importType = declaration. +^4 = gv: (guid: 3599593882704738259, summaries: (function: (module: ^1, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: declaration), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^6, tail: 1))))) +;; src1.o:internal1. +^5 = gv: (guid: 6084810090198994915, summaries: (function: (module: ^0, flags: (linkage: internal, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^6, tail: 1)), callsites: ((callee: ^6, clones: (0, 1), stackIds: (12345)))))) +^6 = gv: (guid: 8596367375252297795) +;; src1.o:internal2. +^7 = gv: (guid: 11092151021205906565, summaries: (function: (module: ^0, flags: (linkage: internal, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^6, tail: 1))))) +;; src2.o:external1. It contains a synthesized callsite record for the tail call +;; to internal2 (the empty stackId list indicates it is synthesized for a +;; discovered missing tail call frame. +^8 = gv: (guid: 12313225385227428720, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, calls: ((callee: ^3, tail: 1), (callee: ^4, tail: 1)), callsites: ((callee: ^4, clones: (0, 1), stackIds: ()))))) +;; src1.o:main. +^9 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^5, tail: 1), (callee: ^7, tail: 1), (callee: ^8, tail: 1))))) +^10 = flags: 97 +^11 = blockcount: 0 diff --git a/llvm/test/ThinLTO/X86/memprof_imported_internal2.ll b/llvm/test/ThinLTO/X86/memprof_imported_internal2.ll new file mode 100644 index 0000000000000..b431747fc7306 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof_imported_internal2.ll @@ -0,0 +1,93 @@ +;; Test to make sure that the memprof ThinLTO backend finds the correct summary +;; when there was a naming conflict between an internal function in the original +;; module and an imported external function with the same name. The IR linking +;; will automatically append a "." followed by a numbered suffix to the existing +;; local name in that case. Note this can happen with C where the mangling would +;; be the same for the internal and external functions of the same name (C++ +;; would have different mangling). + +;; Note we don't need any MemProf related metadata for this to fail to find a +;; ValueInfo and crash if the wrong GUID is computed for the renamed local. + +;; The original code looks something like: +;; +;; src1.c: +;; extern void external1(); +;; extern void external2(); +;; static void foo() { +;; external2(); +;; } +;; int main() { +;; external1(); +;; foo(); +;; return 0; +;; } +;; +;; src2.c: +;; extern void external2(); +;; void foo() { +;; external2(); +;; } +;; void external1() { +;; foo(); +;; } +;; +;; The assembly for src1 shown below was dumped after function importing. + +; RUN: rm -rf %t && split-file %s %t && cd %t +; RUN: llvm-as src1.ll -o src1.o +; RUN: llvm-as src1.o.thinlto.ll -o src1.o.thinlto.bc + +;; Simply check that we don't crash when trying to find the ValueInfo for each +;; function in the IR. +; RUN: opt -passes=memprof-context-disambiguation src1.o -S -memprof-import-summary=src1.o.thinlto.bc + +;--- src1.ll +; ModuleID = 'src1.o' +source_filename = "src1.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local noundef i32 @main() { +entry: + tail call void @external1() + tail call void @foo.2() + ret i32 0 +} + +define internal void @foo.2() { +entry: + tail call void @external2() + ret void +} + +declare void @external2() + +define available_externally dso_local void @foo() !thinlto_src_module !1 !thinlto_src_file !2 { +entry: + tail call void @external2() + ret void +} + +define available_externally dso_local void @external1() !thinlto_src_module !1 !thinlto_src_file !2 { +entry: + tail call void @foo() + ret void +} + +!1 = !{!"src2.o"} +!2 = !{!"src2.c"} + +;--- src1.o.thinlto.ll +; ModuleID = 'src1.o.thinlto.bc' +source_filename = "src1.o.thinlto.bc" + +^0 = module: (path: "src1.o", hash: (2435941910, 498944982, 2551913764, 2759430100, 3918124321)) +^1 = module: (path: "src2.o", hash: (1826286437, 1557684621, 1220464477, 2734102338, 1025249503)) +^2 = module: (path: "src3.o", hash: (1085916433, 503665945, 2163560042, 340524, 2255774964)) +^3 = gv: (guid: 1456206394295721279, summaries: (function: (module: ^0, flags: (linkage: internal, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0)))) ;; src1.c:foo +^4 = gv: (guid: 6699318081062747564, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0)))) ;; src2.c:foo +^5 = gv: (guid: 13087145834073153720, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^4, tail: 1))))) ;; src1.c:external1 +^6 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^5, tail: 1), (callee: ^3, tail: 1))))) ;; src1.c:main +^8 = flags: 97 +^9 = blockcount: 0