| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,233 @@ | ||
| //===-ThinLTOCodeGenerator.h - LLVM Link Time Optimizer -------------------===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // This file declares the ThinLTOCodeGenerator class, similar to the | ||
| // LTOCodeGenerator but for the ThinLTO scheme. It provides an interface for | ||
| // linker plugin. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LTO_THINLTOCODEGENERATOR_H | ||
| #define LLVM_LTO_THINLTOCODEGENERATOR_H | ||
|
|
||
| #include "llvm-c/lto.h" | ||
| #include "llvm/ADT/StringRef.h" | ||
| #include "llvm/ADT/StringSet.h" | ||
| #include "llvm/ADT/Triple.h" | ||
| #include "llvm/Support/CodeGen.h" | ||
| #include "llvm/Support/MemoryBuffer.h" | ||
| #include "llvm/Target/TargetOptions.h" | ||
|
|
||
| #include <string> | ||
|
|
||
| namespace llvm { | ||
| class FunctionInfoIndex; | ||
| class LLVMContext; | ||
| class TargetMachine; | ||
|
|
||
| /// Helper to gather options relevant to the target machine creation | ||
| struct TargetMachineBuilder { | ||
| Triple TheTriple; | ||
| std::string MCpu; | ||
| std::string MAttr; | ||
| TargetOptions Options; | ||
| Reloc::Model RelocModel = Reloc::Default; | ||
| CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default; | ||
|
|
||
| std::unique_ptr<TargetMachine> create() const; | ||
| }; | ||
|
|
||
| /// This class define an interface similar to the LTOCodeGenerator, but adapted | ||
| /// for ThinLTO processing. | ||
| /// The ThinLTOCodeGenerator is not intended to be reuse for multiple | ||
| /// compilation: the model is that the client adds modules to the generator and | ||
| /// ask to perform the ThinLTO optimizations / codegen, and finally destroys the | ||
| /// codegenerator. | ||
| class ThinLTOCodeGenerator { | ||
| public: | ||
| /// Add given module to the code generator. | ||
| void addModule(StringRef Identifier, StringRef Data); | ||
|
|
||
| /** | ||
| * Adds to a list of all global symbols that must exist in the final generated | ||
| * code. If a symbol is not listed there, it will be optimized away if it is | ||
| * inlined into every usage. | ||
| */ | ||
| void preserveSymbol(StringRef Name); | ||
|
|
||
| /** | ||
| * Adds to a list of all global symbols that are cross-referenced between | ||
| * ThinLTO files. If the ThinLTO CodeGenerator can ensure that every | ||
| * references from a ThinLTO module to this symbol is optimized away, then | ||
| * the symbol can be discarded. | ||
| */ | ||
| void crossReferenceSymbol(StringRef Name); | ||
|
|
||
| /** | ||
| * Process all the modules that were added to the code generator in parallel. | ||
| * | ||
| * Client can access the resulting object files using getProducedBinaries() | ||
| */ | ||
| void run(); | ||
|
|
||
| /** | ||
| * Return the "in memory" binaries produced by the code generator. | ||
| */ | ||
| std::vector<std::unique_ptr<MemoryBuffer>> &getProducedBinaries() { | ||
| return ProducedBinaries; | ||
| } | ||
|
|
||
| /** | ||
| * \defgroup Options setters | ||
| * @{ | ||
| */ | ||
|
|
||
| /** | ||
| * \defgroup Cache controlling options | ||
| * | ||
| * These entry points control the ThinLTO cache. The cache is intended to | ||
| * support incremental build, and thus needs to be persistent accross build. | ||
| * The client enabled the cache by supplying a path to an existing directory. | ||
| * The code generator will use this to store objects files that may be reused | ||
| * during a subsequent build. | ||
| * To avoid filling the disk space, a few knobs are provided: | ||
| * - The pruning interval limit the frequency at which the garbage collector | ||
| * will try to scan the cache directory to prune it from expired entries. | ||
| * Setting to -1 disable the pruning (default). | ||
| * - The pruning expiration time indicates to the garbage collector how old | ||
| * an entry needs to be to be removed. | ||
| * - Finally, the garbage collector can be instructed to prune the cache till | ||
| * the occupied space goes below a threshold. | ||
| * @{ | ||
| */ | ||
|
|
||
| struct CachingOptions { | ||
| std::string Path; | ||
| int PruningInterval = -1; // seconds, -1 to disable pruning | ||
| unsigned int Expiration; // seconds. | ||
| unsigned MaxPercentageOfAvailableSpace; // percentage. | ||
| }; | ||
|
|
||
| /// Provide a path to a directory where to store the cached files for | ||
| /// incremental build. | ||
| void setCacheDir(std::string Path) { CacheOptions.Path = std::move(Path); } | ||
|
|
||
| /// Cache policy: interval (seconds) between two prune of the cache. Set to a | ||
| /// negative value (default) to disable pruning. | ||
| void setCachePruningInterval(int Interval) { | ||
| CacheOptions.PruningInterval = Interval; | ||
| } | ||
|
|
||
| /// Cache policy: expiration (in seconds) for an entry. | ||
| void setCacheEntryExpiration(unsigned Expiration) { | ||
| CacheOptions.Expiration = Expiration; | ||
| } | ||
|
|
||
| /** | ||
| * Sets the maximum cache size that can be persistent across build, in terms | ||
| * of percentage of the available space on the the disk. Set to 100 to | ||
| * indicate no limit, 50 to indicate that the cache size will not be left over | ||
| * half the available space. A value over 100 will be reduced to 100. | ||
| * | ||
| * The formula looks like: | ||
| * AvailableSpace = FreeSpace + ExistingCacheSize | ||
| * NewCacheSize = AvailableSpace * P/100 | ||
| * | ||
| */ | ||
| void setMaxCacheSizeRelativeToAvailableSpace(unsigned Percentage) { | ||
| CacheOptions.MaxPercentageOfAvailableSpace = Percentage; | ||
| } | ||
|
|
||
| /**@}*/ | ||
|
|
||
| /// Set the path to a directory where to save temporaries at various stages of | ||
| /// the processing. | ||
| void setSaveTempsDir(std::string Path) { SaveTempsDir = std::move(Path); } | ||
|
|
||
| /// CPU to use to initialize the TargetMachine | ||
| void setCpu(std::string Cpu) { TMBuilder.MCpu = std::move(Cpu); } | ||
|
|
||
| /// Subtarget attributes | ||
| void setAttr(std::string MAttr) { TMBuilder.MAttr = std::move(MAttr); } | ||
|
|
||
| /// TargetMachine options | ||
| void setTargetOptions(TargetOptions Options) { | ||
| TMBuilder.Options = std::move(Options); | ||
| } | ||
|
|
||
| /// CodeModel | ||
| void setCodePICModel(Reloc::Model Model) { TMBuilder.RelocModel = Model; } | ||
|
|
||
| /// CodeGen optimization level | ||
| void setCodeGenOptLevel(CodeGenOpt::Level CGOptLevel) { | ||
| TMBuilder.CGOptLevel = CGOptLevel; | ||
| } | ||
|
|
||
| /**@}*/ | ||
|
|
||
| /** | ||
| * \defgroup Set of APIs to run individual stages in isolation. | ||
| * @{ | ||
| */ | ||
|
|
||
| /** | ||
| * Produce the combined function index from all the bitcode files: | ||
| * "thin-link". | ||
| */ | ||
| std::unique_ptr<FunctionInfoIndex> linkCombinedIndex(); | ||
|
|
||
| /** | ||
| * Perform promotion and renaming of exported internal functions. | ||
| */ | ||
| void promote(Module &Module, FunctionInfoIndex &Index); | ||
|
|
||
| /** | ||
| * Perform cross-module importing for the module identified by | ||
| * ModuleIdentifier. | ||
| */ | ||
| void crossModuleImport(Module &Module, FunctionInfoIndex &Index); | ||
|
|
||
| /** | ||
| * Perform post-importing ThinLTO optimizations. | ||
| */ | ||
| void optimize(Module &Module); | ||
|
|
||
| /** | ||
| * Perform ThinLTO CodeGen. | ||
| */ | ||
| std::unique_ptr<MemoryBuffer> codegen(Module &Module); | ||
|
|
||
| /**@}*/ | ||
|
|
||
| private: | ||
| /// Helper factory to build a TargetMachine | ||
| TargetMachineBuilder TMBuilder; | ||
|
|
||
| /// Vector holding the in-memory buffer containing the produced binaries. | ||
| std::vector<std::unique_ptr<MemoryBuffer>> ProducedBinaries; | ||
|
|
||
| /// Vector holding the input buffers containing the bitcode modules to | ||
| /// process. | ||
| std::vector<MemoryBufferRef> Modules; | ||
|
|
||
| /// Set of symbols that need to be preserved outside of the set of bitcode | ||
| /// files. | ||
| StringSet<> PreservedSymbols; | ||
|
|
||
| /// Set of symbols that are cross-referenced between bitcode files. | ||
| StringSet<> CrossReferencedSymbols; | ||
|
|
||
| /// Control the caching behavior. | ||
| CachingOptions CacheOptions; | ||
|
|
||
| /// Path to a directory to save the temporary bitcode files. | ||
| std::string SaveTempsDir; | ||
| }; | ||
| } | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,10 @@ | ||
| add_llvm_library(LLVMLTO | ||
| LTOModule.cpp | ||
| LTOCodeGenerator.cpp | ||
| ThinLTOCodeGenerator.cpp | ||
|
|
||
| ADDITIONAL_HEADER_DIRS | ||
| ${LLVM_MAIN_INCLUDE_DIR}/llvm/LTO | ||
| ) | ||
|
|
||
| add_dependencies(LLVMLTO intrinsics_gen) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,3 +34,4 @@ required_libraries = | |
| Scalar | ||
| Support | ||
| Target | ||
| TransformUtils | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,384 @@ | ||
| //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // This file implements the Thin Link Time Optimization library. This library is | ||
| // intended to be used by linker to optimize code at link time. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "llvm/LTO/ThinLTOCodeGenerator.h" | ||
|
|
||
| #include "llvm/ADT/StringExtras.h" | ||
| #include "llvm/ADT/Statistic.h" | ||
| #include "llvm/Analysis/TargetLibraryInfo.h" | ||
| #include "llvm/Analysis/TargetTransformInfo.h" | ||
| #include "llvm/Bitcode/ReaderWriter.h" | ||
| #include "llvm/Bitcode/BitcodeWriterPass.h" | ||
| #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" | ||
| #include "llvm/IR/LLVMContext.h" | ||
| #include "llvm/IR/DiagnosticPrinter.h" | ||
| #include "llvm/IR/LegacyPassManager.h" | ||
| #include "llvm/IR/Mangler.h" | ||
| #include "llvm/IRReader/IRReader.h" | ||
| #include "llvm/Linker/Linker.h" | ||
| #include "llvm/MC/SubtargetFeature.h" | ||
| #include "llvm/Object/FunctionIndexObjectFile.h" | ||
| #include "llvm/Support/SourceMgr.h" | ||
| #include "llvm/Support/TargetRegistry.h" | ||
| #include "llvm/Support/ThreadPool.h" | ||
| #include "llvm/Target/TargetMachine.h" | ||
| #include "llvm/Transforms/IPO.h" | ||
| #include "llvm/Transforms/IPO/FunctionImport.h" | ||
| #include "llvm/Transforms/IPO/PassManagerBuilder.h" | ||
| #include "llvm/Transforms/ObjCARC.h" | ||
| #include "llvm/Transforms/Utils/FunctionImportUtils.h" | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| namespace { | ||
|
|
||
| static cl::opt<int> ThreadCount("threads", | ||
| cl::init(std::thread::hardware_concurrency())); | ||
|
|
||
| static void diagnosticHandler(const DiagnosticInfo &DI) { | ||
| DiagnosticPrinterRawOStream DP(errs()); | ||
| DI.print(DP); | ||
| errs() << '\n'; | ||
| } | ||
|
|
||
| // Simple helper to load a module from bitcode | ||
| static std::unique_ptr<Module> | ||
| loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context, | ||
| bool Lazy) { | ||
| SMDiagnostic Err; | ||
| ErrorOr<std::unique_ptr<Module>> ModuleOrErr(nullptr); | ||
| if (Lazy) { | ||
| ModuleOrErr = | ||
| getLazyBitcodeModule(MemoryBuffer::getMemBuffer(Buffer, false), Context, | ||
| /* ShouldLazyLoadMetadata */ Lazy); | ||
| } else { | ||
| ModuleOrErr = parseBitcodeFile(Buffer, Context); | ||
| } | ||
| if (std::error_code EC = ModuleOrErr.getError()) { | ||
| Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error, | ||
| EC.message()); | ||
| Err.print("ThinLTO", errs()); | ||
| report_fatal_error("Can't load module, abort."); | ||
| } | ||
| return std::move(ModuleOrErr.get()); | ||
| } | ||
|
|
||
| // Simple helper to save temporary files for debug. | ||
| static void saveTempBitcode(const Module &TheModule, StringRef TempDir, | ||
| unsigned count, StringRef Suffix) { | ||
| if (TempDir.empty()) | ||
| return; | ||
| // User asked to save temps, let dump the bitcode file after import. | ||
| auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix; | ||
| std::error_code EC; | ||
| raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None); | ||
| if (EC) | ||
| report_fatal_error(Twine("Failed to open ") + SaveTempPath + | ||
| " to save optimized bitcode\n"); | ||
| WriteBitcodeToFile(&TheModule, OS, true, false); | ||
| } | ||
|
|
||
| static StringMap<MemoryBufferRef> | ||
| generateModuleMap(const std::vector<MemoryBufferRef> &Modules) { | ||
| StringMap<MemoryBufferRef> ModuleMap; | ||
| for (auto &ModuleBuffer : Modules) { | ||
| assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) == | ||
| ModuleMap.end() && | ||
| "Expect unique Buffer Identifier"); | ||
| ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer; | ||
| } | ||
| return ModuleMap; | ||
| } | ||
|
|
||
| /// Provide a "loader" for the FunctionImporter to access function from other | ||
| /// modules. | ||
| class ModuleLoader { | ||
| /// The context that will be used for importing. | ||
| LLVMContext &Context; | ||
|
|
||
| /// Map from Module identifier to MemoryBuffer. Used by clients like the | ||
| /// FunctionImported to request loading a Module. | ||
| StringMap<MemoryBufferRef> &ModuleMap; | ||
|
|
||
| public: | ||
| ModuleLoader(LLVMContext &Context, StringMap<MemoryBufferRef> &ModuleMap) | ||
| : Context(Context), ModuleMap(ModuleMap) {} | ||
|
|
||
| /// Load a module on demand. | ||
| std::unique_ptr<Module> operator()(StringRef Identifier) { | ||
| return loadModuleFromBuffer(ModuleMap[Identifier], Context, /*Lazy*/ true); | ||
| } | ||
| }; | ||
|
|
||
| static void promoteModule(Module &TheModule, const FunctionInfoIndex &Index) { | ||
| if (renameModuleForThinLTO(TheModule, Index)) | ||
| report_fatal_error("renameModuleForThinLTO failed"); | ||
| } | ||
|
|
||
| static void crossImportIntoModule(Module &TheModule, | ||
| const FunctionInfoIndex &Index, | ||
| StringMap<MemoryBufferRef> &ModuleMap) { | ||
| ModuleLoader Loader(TheModule.getContext(), ModuleMap); | ||
| FunctionImporter Importer(Index, Loader); | ||
| Importer.importFunctions(TheModule); | ||
| } | ||
|
|
||
| static void optimizeModule(Module &TheModule, TargetMachine &TM) { | ||
| // Populate the PassManager | ||
| PassManagerBuilder PMB; | ||
| PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); | ||
| PMB.Inliner = createFunctionInliningPass(); | ||
| // FIXME: should get it from the bitcode? | ||
| PMB.OptLevel = 3; | ||
| PMB.LoopVectorize = true; | ||
| PMB.SLPVectorize = true; | ||
| PMB.VerifyInput = true; | ||
| PMB.VerifyOutput = false; | ||
|
|
||
| legacy::PassManager PM; | ||
|
|
||
| // Add the TTI (required to inform the vectorizer about register size for | ||
| // instance) | ||
| PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis())); | ||
|
|
||
| // Add optimizations | ||
| PMB.populateThinLTOPassManager(PM); | ||
| PM.add(createObjCARCContractPass()); | ||
|
|
||
| PM.run(TheModule); | ||
| } | ||
|
|
||
| std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule, | ||
| TargetMachine &TM) { | ||
| SmallVector<char, 128> OutputBuffer; | ||
|
|
||
| // CodeGen | ||
| { | ||
| raw_svector_ostream OS(OutputBuffer); | ||
| legacy::PassManager PM; | ||
| if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile, | ||
| /* DisableVerify */ true)) | ||
| report_fatal_error("Failed to setup codegen"); | ||
|
|
||
| // Run codegen now. resulting binary is in OutputBuffer. | ||
| PM.run(TheModule); | ||
| } | ||
| return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer)); | ||
| } | ||
|
|
||
| static std::unique_ptr<MemoryBuffer> | ||
| ProcessThinLTOModule(Module &TheModule, const FunctionInfoIndex &Index, | ||
| StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM, | ||
| ThinLTOCodeGenerator::CachingOptions CacheOptions, | ||
| StringRef SaveTempsDir, unsigned count) { | ||
|
|
||
| // Save temps: after IPO. | ||
| saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc"); | ||
|
|
||
| // "Benchmark"-like optimization: single-source case | ||
| bool SingleModule = (ModuleMap.size() == 1); | ||
|
|
||
| if (!SingleModule) { | ||
| promoteModule(TheModule, Index); | ||
|
|
||
| // Save temps: after promotion. | ||
| saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc"); | ||
|
|
||
| crossImportIntoModule(TheModule, Index, ModuleMap); | ||
|
|
||
| // Save temps: after cross-module import. | ||
| saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); | ||
| } | ||
|
|
||
| optimizeModule(TheModule, TM); | ||
|
|
||
| saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc"); | ||
|
|
||
| return codegenModule(TheModule, TM); | ||
| } | ||
|
|
||
| // Initialize the TargetMachine builder for a given Triple | ||
| static void initTMBuilder(TargetMachineBuilder &TMBuilder, | ||
| const Triple &TheTriple) { | ||
| // Set a default CPU for Darwin triples (copied from LTOCodeGenerator). | ||
| // FIXME this looks pretty terrible... | ||
| if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) { | ||
| if (TheTriple.getArch() == llvm::Triple::x86_64) | ||
| TMBuilder.MCpu = "core2"; | ||
| else if (TheTriple.getArch() == llvm::Triple::x86) | ||
| TMBuilder.MCpu = "yonah"; | ||
| else if (TheTriple.getArch() == llvm::Triple::aarch64) | ||
| TMBuilder.MCpu = "cyclone"; | ||
| } | ||
| TMBuilder.TheTriple = std::move(TheTriple); | ||
| } | ||
|
|
||
| } // end anonymous namespace | ||
|
|
||
| void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { | ||
| MemoryBufferRef Buffer(Data, Identifier); | ||
| if (Modules.empty()) { | ||
| // First module added, so initialize the triple and some options | ||
| LLVMContext Context; | ||
| Triple TheTriple(getBitcodeTargetTriple(Buffer, Context)); | ||
| initTMBuilder(TMBuilder, Triple(TheTriple)); | ||
| } | ||
| #ifndef NDEBUG | ||
| else { | ||
| LLVMContext Context; | ||
| assert(TMBuilder.TheTriple.str() == | ||
| getBitcodeTargetTriple(Buffer, Context) && | ||
| "ThinLTO modules with different triple not supported"); | ||
| } | ||
| #endif | ||
| Modules.push_back(Buffer); | ||
| } | ||
|
|
||
| void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) { | ||
| PreservedSymbols.insert(Name); | ||
| } | ||
|
|
||
| void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { | ||
| CrossReferencedSymbols.insert(Name); | ||
| } | ||
|
|
||
| // TargetMachine factory | ||
| std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const { | ||
| std::string ErrMsg; | ||
| const Target *TheTarget = | ||
| TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg); | ||
| if (!TheTarget) { | ||
| report_fatal_error("Can't load target for this Triple: " + ErrMsg); | ||
| } | ||
|
|
||
| // Use MAttr as the default set of features. | ||
| SubtargetFeatures Features(MAttr); | ||
| Features.getDefaultSubtargetFeatures(TheTriple); | ||
| std::string FeatureStr = Features.getString(); | ||
| return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( | ||
| TheTriple.str(), MCpu, FeatureStr, Options, RelocModel, | ||
| CodeModel::Default, CGOptLevel)); | ||
| } | ||
|
|
||
| /** | ||
| * Produce the combined function index from all the bitcode files: | ||
| * "thin-link". | ||
| */ | ||
| std::unique_ptr<FunctionInfoIndex> ThinLTOCodeGenerator::linkCombinedIndex() { | ||
| std::unique_ptr<FunctionInfoIndex> CombinedIndex; | ||
| uint64_t NextModuleId = 0; | ||
| for (auto &ModuleBuffer : Modules) { | ||
| ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr = | ||
| object::FunctionIndexObjectFile::create(ModuleBuffer, diagnosticHandler, | ||
| false); | ||
| if (std::error_code EC = ObjOrErr.getError()) { | ||
| // FIXME diagnose | ||
| errs() << "error: can't create FunctionIndexObjectFile for buffer: " | ||
| << EC.message() << "\n"; | ||
| return nullptr; | ||
| } | ||
| auto Index = (*ObjOrErr)->takeIndex(); | ||
| if (CombinedIndex) { | ||
| CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId); | ||
| } else { | ||
| CombinedIndex = std::move(Index); | ||
| } | ||
| } | ||
| return CombinedIndex; | ||
| } | ||
|
|
||
| /** | ||
| * Perform promotion and renaming of exported internal functions. | ||
| */ | ||
| void ThinLTOCodeGenerator::promote(Module &TheModule, | ||
| FunctionInfoIndex &Index) { | ||
| promoteModule(TheModule, Index); | ||
| } | ||
|
|
||
| /** | ||
| * Perform cross-module importing for the module identified by ModuleIdentifier. | ||
| */ | ||
| void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, | ||
| FunctionInfoIndex &Index) { | ||
| auto ModuleMap = generateModuleMap(Modules); | ||
| crossImportIntoModule(TheModule, Index, ModuleMap); | ||
| } | ||
|
|
||
| /** | ||
| * Perform post-importing ThinLTO optimizations. | ||
| */ | ||
| void ThinLTOCodeGenerator::optimize(Module &TheModule) { | ||
| initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); | ||
| optimizeModule(TheModule, *TMBuilder.create()); | ||
| } | ||
|
|
||
| /** | ||
| * Perform ThinLTO CodeGen. | ||
| */ | ||
| std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) { | ||
| initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); | ||
| return codegenModule(TheModule, *TMBuilder.create()); | ||
| } | ||
|
|
||
| // Main entry point for the ThinLTO processing | ||
| void ThinLTOCodeGenerator::run() { | ||
| // Sequential linking phase | ||
| auto Index = linkCombinedIndex(); | ||
|
|
||
| // Save temps: index. | ||
| if (!SaveTempsDir.empty()) { | ||
| auto SaveTempPath = SaveTempsDir + "index.bc"; | ||
| std::error_code EC; | ||
| raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); | ||
| if (EC) | ||
| report_fatal_error(Twine("Failed to open ") + SaveTempPath + | ||
| " to save optimized bitcode\n"); | ||
| WriteFunctionSummaryToFile(*Index, OS); | ||
| } | ||
|
|
||
| // Prepare the resulting object vector | ||
| assert(ProducedBinaries.empty() && "The generator should not be reused"); | ||
| ProducedBinaries.resize(Modules.size()); | ||
|
|
||
| // Prepare the module map. | ||
| auto ModuleMap = generateModuleMap(Modules); | ||
|
|
||
| // Parallel optimizer + codegen | ||
| { | ||
| ThreadPool Pool(ThreadCount); | ||
| int count = 0; | ||
| for (auto &ModuleBuffer : Modules) { | ||
| Pool.async([&](int count) { | ||
| LLVMContext Context; | ||
|
|
||
| // Parse module now | ||
| auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false); | ||
|
|
||
| // Save temps: original file. | ||
| if (!SaveTempsDir.empty()) { | ||
| saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); | ||
| } | ||
|
|
||
| ProducedBinaries[count] = ProcessThinLTOModule( | ||
| *TheModule, *Index, ModuleMap, *TMBuilder.create(), CacheOptions, | ||
| SaveTempsDir, count); | ||
| }, count); | ||
| count++; | ||
| } | ||
| } | ||
|
|
||
| // If statistics were requested, print them out now. | ||
| if (llvm::AreStatisticsEnabled()) | ||
| llvm::PrintStatistics(); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" | ||
| target triple = "x86_64-apple-macosx10.11.0" | ||
|
|
||
|
|
||
| define i32 @main() #0 { | ||
| entry: | ||
| call void (...) @weakalias() | ||
| call void (...) @analias() | ||
| %call = call i32 (...) @referencestatics() | ||
| %call1 = call i32 (...) @referenceglobals() | ||
| %call2 = call i32 (...) @referencecommon() | ||
| call void (...) @setfuncptr() | ||
| call void (...) @callfuncptr() | ||
| call void (...) @callweakfunc() | ||
| ret i32 0 | ||
| } | ||
|
|
||
| declare void @weakalias(...) #1 | ||
|
|
||
| declare void @analias(...) #1 | ||
|
|
||
| declare i32 @referencestatics(...) #1 | ||
|
|
||
| declare i32 @referenceglobals(...) #1 | ||
|
|
||
| declare i32 @referencecommon(...) #1 | ||
|
|
||
| declare void @setfuncptr(...) #1 | ||
|
|
||
| declare void @callfuncptr(...) #1 | ||
|
|
||
| declare void @callweakfunc(...) #1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,139 @@ | ||
| ; Do setup work for all below tests: generate bitcode and combined index | ||
| ; RUN: llvm-as -function-summary %s -o %t.bc | ||
| ; RUN: llvm-as -function-summary %p/Inputs/funcimport.ll -o %t2.bc | ||
| ; RUN: llvm-lto -thinlto-action=thinlink -o %t3.bc %t.bc %t2.bc | ||
|
|
||
| ; Ensure statics are promoted/renamed correctly from this file (all but | ||
| ; constant variable need promotion). | ||
| ; RUN: llvm-lto -thinlto-action=promote %t.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=EXPORTSTATIC | ||
| ; EXPORTSTATIC-DAG: @staticvar.llvm.0 = hidden global | ||
| ; EXPORTSTATIC-DAG: @staticconstvar = internal unnamed_addr constant | ||
| ; EXPORTSTATIC-DAG: @P.llvm.0 = hidden global void ()* null | ||
| ; EXPORTSTATIC-DAG: define hidden i32 @staticfunc.llvm.0 | ||
| ; EXPORTSTATIC-DAG: define hidden void @staticfunc2.llvm.0 | ||
|
|
||
| ; Ensure that both weak alias to an imported function and strong alias to a | ||
| ; non-imported function are correctly turned into declarations. | ||
| ; Also ensures that alias to a linkonce function is turned into a declaration | ||
| ; and that the associated linkonce function is not in the output, as it is | ||
| ; lazily linked and never referenced/materialized. | ||
| ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=IMPORTGLOB1 | ||
| ; IMPORTGLOB1-DAG: define available_externally void @globalfunc1 | ||
| ; IMPORTGLOB1-DAG: declare void @weakalias | ||
| ; IMPORTGLOB1-DAG: declare void @analias | ||
| ; IMPORTGLOB1-NOT: @linkoncealias | ||
| ; IMPORTGLOB1-NOT: @linkoncefunc | ||
| ; IMPORTGLOB1-NOT: declare void @globalfunc2 | ||
|
|
||
| ; Verify that the optimizer run | ||
| ; RUN: llvm-lto -thinlto-action=optimize %t2.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=OPTIMIZED | ||
| ; OPTIMIZED: define i32 @main() | ||
|
|
||
| ; Verify that the codegen run | ||
| ; RUN: llvm-lto -thinlto-action=codegen %t2.bc -o - | llvm-nm -o - | FileCheck %s --check-prefix=CODEGEN | ||
| ; CODEGEN: T _main | ||
|
|
||
| ; Verify that all run together | ||
| ; RUN: llvm-lto -thinlto-action=run %t2.bc %t.bc | ||
| ; RUN: llvm-nm -o - < %t.bc.thinlto.o | FileCheck %s --check-prefix=ALL | ||
| ; RUN: llvm-nm -o - < %t2.bc.thinlto.o | FileCheck %s --check-prefix=ALL2 | ||
| ; ALL: T _callfuncptr | ||
| ; ALL2: T _main | ||
|
|
||
| target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" | ||
| target triple = "x86_64-apple-macosx10.11.0" | ||
|
|
||
| @globalvar_in_section = global i32 1, align 4 | ||
| @globalvar = global i32 1, align 4 | ||
| @staticvar = internal global i32 1, align 4 | ||
| @staticvar2 = internal global i32 1, align 4 | ||
| @staticconstvar = internal unnamed_addr constant [2 x i32] [i32 10, i32 20], align 4 | ||
| @commonvar = common global i32 0, align 4 | ||
| @P = internal global void ()* null, align 8 | ||
|
|
||
| @weakalias = weak alias void (...), bitcast (void ()* @globalfunc1 to void (...)*) | ||
| @analias = alias void (...), bitcast (void ()* @globalfunc2 to void (...)*) | ||
| @linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*) | ||
|
|
||
| define void @globalfunc1() #0 { | ||
| entry: | ||
| ret void | ||
| } | ||
|
|
||
| define void @globalfunc2() #0 { | ||
| entry: | ||
| ret void | ||
| } | ||
|
|
||
| define linkonce_odr void @linkoncefunc() #0 { | ||
| entry: | ||
| ret void | ||
| } | ||
|
|
||
| define i32 @referencestatics(i32 %i) #0 { | ||
| entry: | ||
| %i.addr = alloca i32, align 4 | ||
| store i32 %i, i32* %i.addr, align 4 | ||
| %call = call i32 @staticfunc() | ||
| %0 = load i32, i32* @staticvar, align 4 | ||
| %add = add nsw i32 %call, %0 | ||
| %1 = load i32, i32* %i.addr, align 4 | ||
| %idxprom = sext i32 %1 to i64 | ||
| %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* @staticconstvar, i64 0, i64 %idxprom | ||
| %2 = load i32, i32* %arrayidx, align 4 | ||
| %add1 = add nsw i32 %add, %2 | ||
| ret i32 %add1 | ||
| } | ||
|
|
||
| define i32 @referenceglobals(i32 %i) #0 { | ||
| entry: | ||
| %i.addr = alloca i32, align 4 | ||
| store i32 %i, i32* %i.addr, align 4 | ||
| call void @globalfunc1() | ||
| %0 = load i32, i32* @globalvar, align 4 | ||
| ret i32 %0 | ||
| } | ||
|
|
||
| define i32 @referencecommon(i32 %i) #0 { | ||
| entry: | ||
| %i.addr = alloca i32, align 4 | ||
| store i32 %i, i32* %i.addr, align 4 | ||
| %0 = load i32, i32* @commonvar, align 4 | ||
| ret i32 %0 | ||
| } | ||
|
|
||
| define void @setfuncptr() #0 { | ||
| entry: | ||
| store void ()* @staticfunc2, void ()** @P, align 8 | ||
| ret void | ||
| } | ||
|
|
||
| define void @callfuncptr() #0 { | ||
| entry: | ||
| %0 = load void ()*, void ()** @P, align 8 | ||
| call void %0() | ||
| ret void | ||
| } | ||
|
|
||
| @weakvar = weak global i32 1, align 4 | ||
| define weak void @weakfunc() #0 { | ||
| entry: | ||
| ret void | ||
| } | ||
|
|
||
| define void @callweakfunc() #0 { | ||
| entry: | ||
| call void @weakfunc() | ||
| ret void | ||
| } | ||
|
|
||
| define internal i32 @staticfunc() #0 { | ||
| entry: | ||
| ret i32 1 | ||
| } | ||
|
|
||
| define internal void @staticfunc2() #0 { | ||
| entry: | ||
| %0 = load i32, i32* @staticvar2, align 4 | ||
| ret void | ||
| } |