diff --git a/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll b/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll new file mode 100644 index 0000000000000..e213fbaf3fa14 --- /dev/null +++ b/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll @@ -0,0 +1,70 @@ +; REQUIRES: x86-registered-target + +;; Test that passing -supports-hot-cold-new to the thin link prevents memprof +;; metadata and attributes from being removed from the distributed ThinLTO +;; backend, and vice versa without passing -supports-hot-cold-new. + +;; First check with -supports-hot-cold-new. +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -save-temps \ +; RUN: -supports-hot-cold-new \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -o %t.out + +;; Ensure that the index file reflects the -supports-hot-cold-new, as that is +;; how the ThinLTO backend behavior is controlled. +; RUN: llvm-dis %t.out.index.bc -o - | FileCheck %s --check-prefix=CHECK-INDEX-ON +;; Flags are printed in decimal, but this corresponds to 0x161, and 0x100 is +;; the value indicating -supports-hot-cold-new was enabled. +; CHECK-INDEX-ON: flags: 353 + +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj + +; RUN: llvm-dis %t.s.0.preopt.bc -o - | FileCheck %s --check-prefix=CHECK-IR +; CHECK-IR: !memprof {{.*}} !callsite +; CHECK-IR: "memprof"="cold" + +;; Next check without -supports-hot-cold-new, we should not perform +;; context disambiguation, and we should strip memprof metadata and +;; attributes before optimization during the distributed backend. +; RUN: llvm-lto2 run %t.o -save-temps \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -o %t.out + +;; Ensure that the index file reflects not having -supports-hot-cold-new. +; RUN: llvm-dis %t.out.index.bc -o - | FileCheck %s --check-prefix=CHECK-INDEX-OFF +;; Flags are printed in decimal, but this corresponds to 0x61, without 0x100 set. +; CHECK-INDEX-OFF: flags: 97 + +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj + +; RUN: llvm-dis %t.s.0.preopt.bc -o - | FileCheck %s \ +; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \ +; RUN: --implicit-check-not "memprof"="cold" + +source_filename = "thinlto-distributed-supports-hot-cold-new.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + %call1 = call ptr @_Znam(i64 0) #1 + ret i32 0 +} + +declare ptr @_Znam(i64) + +attributes #0 = { noinline optnone } +attributes #1 = { "memprof"="cold" } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 6fdc378ab5c3b..b411148b7722a 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1305,6 +1305,9 @@ class ModuleSummaryIndex { /// Indicates that summary-based synthetic entry count propagation has run bool HasSyntheticEntryCounts = false; + /// Indicates that we linked with allocator supporting hot/cold new operators. + bool WithSupportsHotColdNew = false; + /// Indicates that distributed backend should skip compilation of the /// module. Flag is suppose to be set by distributed ThinLTO indexing /// when it detected that the module is not needed during the final @@ -1513,6 +1516,9 @@ class ModuleSummaryIndex { bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; } void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; } + bool withSupportsHotColdNew() const { return WithSupportsHotColdNew; } + void setWithSupportsHotColdNew() { WithSupportsHotColdNew = true; } + bool skipModuleByDistributedBackend() const { return SkipModuleByDistributedBackend; } diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index dabc367c80c88..b7dfd630ad821 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -95,6 +95,11 @@ setupStatsFile(StringRef StatsFilename); /// ordered indices to elements in the input array. std::vector generateModulesOrdering(ArrayRef R); +/// Updates MemProf attributes (and metadata) based on whether the index +/// has recorded that we are linking with allocation libraries containing +/// the necessary APIs for downstream transformations. +void updateMemProfAttributes(Module &Mod, const ModuleSummaryIndex &Index); + class LTO; struct SymbolResolution; class ThinBackendProc; diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 64616240d4a3c..a1d7044558921 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -8067,7 +8067,7 @@ static Expected getEnableSplitLTOUnitFlag(BitstreamCursor &Stream, case bitc::FS_FLAGS: { // [flags] uint64_t Flags = Record[0]; // Scan flags. - assert(Flags <= 0xff && "Unexpected bits in flag"); + assert(Flags <= 0x1ff && "Unexpected bits in flag"); return Flags & 0x8; } diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index 99e5044bdb9b1..7cba03e93e97f 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -107,11 +107,13 @@ uint64_t ModuleSummaryIndex::getFlags() const { Flags |= 0x40; if (withWholeProgramVisibility()) Flags |= 0x80; + if (withSupportsHotColdNew()) + Flags |= 0x100; return Flags; } void ModuleSummaryIndex::setFlags(uint64_t Flags) { - assert(Flags <= 0xff && "Unexpected bits in flag"); + assert(Flags <= 0x1ff && "Unexpected bits in flag"); // 1 bit: WithGlobalValueDeadStripping flag. // Set on combined index only. if (Flags & 0x1) @@ -145,6 +147,10 @@ void ModuleSummaryIndex::setFlags(uint64_t Flags) { // Set on combined index only. if (Flags & 0x80) setWithWholeProgramVisibility(); + // 1 bit: WithSupportsHotColdNew flag. + // Set on combined index only. + if (Flags & 0x100) + setWithSupportsHotColdNew(); } // Collect for the given module the list of function it defines diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index fee09fcd76bdb..de9b8f1da62be 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -76,6 +76,10 @@ cl::opt EnableLTOInternalization( cl::desc("Enable global value internalization in LTO")); } +/// Indicate we are linking with an allocator that supports hot/cold operator +/// new interfaces. +extern cl::opt SupportsHotColdNew; + /// Enable MemProf context disambiguation for thin link. extern cl::opt EnableMemProfContextDisambiguation; @@ -1079,6 +1083,14 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) { return StatsFileOrErr.takeError(); std::unique_ptr StatsFile = std::move(StatsFileOrErr.get()); + // TODO: Ideally this would be controlled automatically by detecting that we + // are linking with an allocator that supports these interfaces, rather than + // an internal option (which would still be needed for tests, however). For + // example, if the library exported a symbol like __malloc_hot_cold the linker + // could recognize that and set a flag in the lto::Config. + if (SupportsHotColdNew) + ThinLTO.CombinedIndex.setWithSupportsHotColdNew(); + Error Result = runRegularLTO(AddStream); if (!Result) Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols); @@ -1089,6 +1101,37 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) { return Result; } +void lto::updateMemProfAttributes(Module &Mod, + const ModuleSummaryIndex &Index) { + if (Index.withSupportsHotColdNew()) + return; + + // The profile matcher applies hotness attributes directly for allocations, + // and those will cause us to generate calls to the hot/cold interfaces + // unconditionally. If supports-hot-cold-new was not enabled in the LTO + // link then assume we don't want these calls (e.g. not linking with + // the appropriate library, or otherwise trying to disable this behavior). + for (auto &F : Mod) { + for (auto &BB : F) { + for (auto &I : BB) { + auto *CI = dyn_cast(&I); + if (!CI) + continue; + if (CI->hasFnAttr("memprof")) + CI->removeFnAttr("memprof"); + // Strip off all memprof metadata as it is no longer needed. + // Importantly, this avoids the addition of new memprof attributes + // after inlining propagation. + // TODO: If we support additional types of MemProf metadata beyond hot + // and cold, we will need to update the metadata based on the allocator + // APIs supported instead of completely stripping all. + CI->setMetadata(LLVMContext::MD_memprof, nullptr); + CI->setMetadata(LLVMContext::MD_callsite, nullptr); + } + } + } +} + Error LTO::runRegularLTO(AddStreamFn AddStream) { // Setup optimization remarks. auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( @@ -1142,6 +1185,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { } } + updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex); + // If allowed, upgrade public vcall visibility metadata to linkage unit // visibility before whole program devirtualization in the optimizer. updateVCallVisibilityInModule(*RegularLTO.CombinedModule, diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index d574ae5737d55..a18963fcaf85d 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -565,6 +565,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, // the module, if applicable. Mod.setPartialSampleProfileRatio(CombinedIndex); + updateMemProfAttributes(Mod, CombinedIndex); + updatePublicTypeTestCalls(Mod, CombinedIndex.withWholeProgramVisibility()); if (Conf.CodeGenOnly) { diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index e65b9dad13805..a3ca910a6b152 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -104,6 +104,12 @@ static cl::opt MemProfImportSummary( cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden); +// Indicate we are linking with an allocator that supports hot/cold operator +// new interfaces. +cl::opt SupportsHotColdNew( + "supports-hot-cold-new", cl::init(false), cl::Hidden, + cl::desc("Linking with hot/cold operator new interfaces")); + /// CRTP base for graphs built from either IR or ThinLTO summary index. /// /// The graph represents the call contexts in all memprof metadata on allocation @@ -3190,6 +3196,17 @@ bool MemProfContextDisambiguation::processModule( if (ImportSummary) return applyImport(M); + // TODO: If/when other types of memprof cloning are enabled beyond just for + // hot and cold, we will need to change this to individually control the + // AllocationType passed to addStackNodesForMIB during CCG construction. + // Note that we specifically check this after applying imports above, so that + // the option isn't needed to be passed to distributed ThinLTO backend + // clang processes, which won't necessarily have visibility into the linker + // dependences. Instead the information is communicated from the LTO link to + // the backends via the combined summary index. + if (!SupportsHotColdNew) + return false; + ModuleCallsiteContextGraph CCG(M, OREGetter); return CCG.process(); } @@ -3241,6 +3258,14 @@ void MemProfContextDisambiguation::run( ModuleSummaryIndex &Index, function_ref isPrevailing) { + // TODO: If/when other types of memprof cloning are enabled beyond just for + // hot and cold, we will need to change this to individually control the + // AllocationType passed to addStackNodesForMIB during CCG construction. + // The index was set from the option, so these should be in sync. + assert(Index.withSupportsHotColdNew() == SupportsHotColdNew); + if (!SupportsHotColdNew) + return; + IndexCallsiteContextGraph CCG(Index, isPrevailing); CCG.process(); } diff --git a/llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll b/llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll new file mode 100644 index 0000000000000..3ed68e8137a35 --- /dev/null +++ b/llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll @@ -0,0 +1,65 @@ +;; Test that passing -supports-hot-cold-new to the LTO link allows context +;; disambiguation to proceed, and also prevents memprof metadata and attributes +;; from being removed from the LTO backend, and vice versa without passing +;; -supports-hot-cold-new. + +;; Note that this tests regular LTO (with a summary) due to the module flag +;; disabling ThinLTO. + +;; First check with -supports-hot-cold-new. +; RUN: opt -module-summary %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-dump-ccg \ +; RUN: -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP +; DUMP: Callsite Context Graph: + +; RUN: llvm-dis %t.out.0.0.preopt.bc -o - | FileCheck %s --check-prefix=IR +; IR: !memprof {{.*}} !callsite +; IR: "memprof"="cold" + +;; Next check without -supports-hot-cold-new, we should not perform +;; context disambiguation, and we should strip memprof metadata and +;; attributes before optimization. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-dump-ccg \ +; RUN: -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \ +; RUN: --implicit-check-not "Callsite Context Graph:" + +; RUN: llvm-dis %t.out.0.0.preopt.bc -o - | FileCheck %s \ +; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \ +; RUN: --implicit-check-not "memprof"="cold" + +source_filename = "memprof-supports-hot-cold-new.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + %call2 = call ptr @_Znam(i64 0) #1 + ret i32 0 +} + +declare ptr @_Znam(i64) + +attributes #0 = { noinline optnone } +attributes #1 = { "memprof"="cold" } + +!llvm.module.flags = !{!6} + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} + +;; Force regular LTO even though we have a summary. +!6 = !{i32 1, !"ThinLTO", i32 0} diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll index ac3119fff2ae5..0d466830ba57d 100644 --- a/llvm/test/ThinLTO/X86/memprof-basic.ll +++ b/llvm/test/ThinLTO/X86/memprof-basic.ll @@ -36,6 +36,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ ; RUN: -r=%t.o,sleep, \ @@ -55,6 +56,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll index 55aab19241084..5bc2f3005f461 100644 --- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll @@ -58,6 +58,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ ; RUN: -r=%t.o,sleep, \ @@ -78,6 +79,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll index b77df883d35b7..13d2036822b2e 100644 --- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll @@ -95,6 +95,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_Z1Db,plx \ ; RUN: -r=%t.o,_Z1Cb,plx \ diff --git a/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll b/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll index 25e974e98f45c..9a72ae43b2f1e 100644 --- a/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll +++ b/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll @@ -51,6 +51,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ ; RUN: -r=%t.o,sleep, \ @@ -65,6 +66,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ diff --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll index 8b9b64f0c1920..76273959f4f4a 100644 --- a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll +++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll @@ -56,6 +56,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,sleep, \ ; RUN: -r=%t.o,_Znam, \ @@ -78,6 +79,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ diff --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll index bc537a5f3fbba..feb9c94344223 100644 --- a/llvm/test/ThinLTO/X86/memprof-inlined.ll +++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll @@ -46,6 +46,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ ; RUN: -r=%t.o,sleep, \ @@ -67,6 +68,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ diff --git a/llvm/test/ThinLTO/X86/memprof-inlined2.ll b/llvm/test/ThinLTO/X86/memprof-inlined2.ll index 02baf9fb4cd8e..9f67849e1aa30 100644 --- a/llvm/test/ThinLTO/X86/memprof-inlined2.ll +++ b/llvm/test/ThinLTO/X86/memprof-inlined2.ll @@ -44,6 +44,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_Z3barv,plx \ ; RUN: -r=%t.o,_Z3bazv,plx \ diff --git a/llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll b/llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll new file mode 100644 index 0000000000000..9e69377d14443 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll @@ -0,0 +1,57 @@ +;; Test that passing -supports-hot-cold-new to the LTO link allows context +;; disambiguation to proceed, and also prevents memprof metadata and attributes +;; from being removed from the LTO backend, and vice versa without passing +;; -supports-hot-cold-new. + +;; First check with -supports-hot-cold-new. +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-dump-ccg \ +; RUN: -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP +; DUMP: Callsite Context Graph: + +; RUN: llvm-dis %t.out.1.0.preopt.bc -o - | FileCheck %s --check-prefix=IR +; IR: !memprof {{.*}} !callsite +; IR: "memprof"="cold" + +;; Next check without -supports-hot-cold-new, we should not perform +;; context disambiguation, and we should strip memprof metadata and +;; attributes before optimization. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-dump-ccg \ +; RUN: -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \ +; RUN: --implicit-check-not "Callsite Context Graph:" + +; RUN: llvm-dis %t.out.1.0.preopt.bc -o - | FileCheck %s \ +; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \ +; RUN: --implicit-check-not "memprof"="cold" + +source_filename = "memprof-supports-hot-cold-new.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + %call2 = call ptr @_Znam(i64 0) #1 + ret i32 0 +} + +declare ptr @_Znam(i64) + +attributes #0 = { noinline optnone } +attributes #1 = { "memprof"="cold" } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll index bd938754ce9d0..483582c6ced95 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll @@ -34,7 +34,7 @@ ;; -stats requires asserts ; REQUIRES: asserts -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ @@ -45,6 +45,15 @@ ;; We should have cloned bar, baz, and foo, for the cold memory allocation. ; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED +;; Check again without -supports-hot-cold-new and ensure all MIB are cold and +;; that there is no cloning. +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -stats -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --implicit-check-not="Callsite Context Graph" \ +; RUN: --implicit-check-not="created clone" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll index 1f23ad3c6a51b..53de404c8f286 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll @@ -56,7 +56,7 @@ ;; -stats requires asserts ; REQUIRES: asserts -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll index d1659b524860a..aa51006d263e9 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll @@ -93,7 +93,7 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll b/llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll index b94e9b855b747..ac19eeca3b33f 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll @@ -48,7 +48,7 @@ ;; -stats requires asserts ; REQUIRES: asserts -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \ diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll index f3216aa13d88f..a37aa4ccf1f42 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll @@ -54,7 +54,7 @@ ;; -stats requires asserts ; REQUIRES: asserts -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll index f1b74f13fb148..06162430449a1 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll @@ -44,7 +44,7 @@ ;; -stats requires asserts ; REQUIRES: asserts -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll index 5a247f8607996..cc97b5290e25a 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll @@ -42,7 +42,7 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP