233 changes: 233 additions & 0 deletions llvm/include/llvm/LTO/ThinLTOCodeGenerator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
//===-ThinLTOCodeGenerator.h - LLVM Link Time Optimizer -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the ThinLTOCodeGenerator class, similar to the
// LTOCodeGenerator but for the ThinLTO scheme. It provides an interface for
// linker plugin.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LTO_THINLTOCODEGENERATOR_H
#define LLVM_LTO_THINLTOCODEGENERATOR_H

#include "llvm-c/lto.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Target/TargetOptions.h"

#include <string>

namespace llvm {
class FunctionInfoIndex;
class LLVMContext;
class TargetMachine;

/// Helper to gather options relevant to the target machine creation
struct TargetMachineBuilder {
Triple TheTriple;
std::string MCpu;
std::string MAttr;
TargetOptions Options;
Reloc::Model RelocModel = Reloc::Default;
CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;

std::unique_ptr<TargetMachine> create() const;
};

/// This class define an interface similar to the LTOCodeGenerator, but adapted
/// for ThinLTO processing.
/// The ThinLTOCodeGenerator is not intended to be reuse for multiple
/// compilation: the model is that the client adds modules to the generator and
/// ask to perform the ThinLTO optimizations / codegen, and finally destroys the
/// codegenerator.
class ThinLTOCodeGenerator {
public:
/// Add given module to the code generator.
void addModule(StringRef Identifier, StringRef Data);

/**
* Adds to a list of all global symbols that must exist in the final generated
* code. If a symbol is not listed there, it will be optimized away if it is
* inlined into every usage.
*/
void preserveSymbol(StringRef Name);

/**
* Adds to a list of all global symbols that are cross-referenced between
* ThinLTO files. If the ThinLTO CodeGenerator can ensure that every
* references from a ThinLTO module to this symbol is optimized away, then
* the symbol can be discarded.
*/
void crossReferenceSymbol(StringRef Name);

/**
* Process all the modules that were added to the code generator in parallel.
*
* Client can access the resulting object files using getProducedBinaries()
*/
void run();

/**
* Return the "in memory" binaries produced by the code generator.
*/
std::vector<std::unique_ptr<MemoryBuffer>> &getProducedBinaries() {
return ProducedBinaries;
}

/**
* \defgroup Options setters
* @{
*/

/**
* \defgroup Cache controlling options
*
* These entry points control the ThinLTO cache. The cache is intended to
* support incremental build, and thus needs to be persistent accross build.
* The client enabled the cache by supplying a path to an existing directory.
* The code generator will use this to store objects files that may be reused
* during a subsequent build.
* To avoid filling the disk space, a few knobs are provided:
* - The pruning interval limit the frequency at which the garbage collector
* will try to scan the cache directory to prune it from expired entries.
* Setting to -1 disable the pruning (default).
* - The pruning expiration time indicates to the garbage collector how old
* an entry needs to be to be removed.
* - Finally, the garbage collector can be instructed to prune the cache till
* the occupied space goes below a threshold.
* @{
*/

struct CachingOptions {
std::string Path;
int PruningInterval = -1; // seconds, -1 to disable pruning
unsigned int Expiration; // seconds.
unsigned MaxPercentageOfAvailableSpace; // percentage.
};

/// Provide a path to a directory where to store the cached files for
/// incremental build.
void setCacheDir(std::string Path) { CacheOptions.Path = std::move(Path); }

/// Cache policy: interval (seconds) between two prune of the cache. Set to a
/// negative value (default) to disable pruning.
void setCachePruningInterval(int Interval) {
CacheOptions.PruningInterval = Interval;
}

/// Cache policy: expiration (in seconds) for an entry.
void setCacheEntryExpiration(unsigned Expiration) {
CacheOptions.Expiration = Expiration;
}

/**
* Sets the maximum cache size that can be persistent across build, in terms
* of percentage of the available space on the the disk. Set to 100 to
* indicate no limit, 50 to indicate that the cache size will not be left over
* half the available space. A value over 100 will be reduced to 100.
*
* The formula looks like:
* AvailableSpace = FreeSpace + ExistingCacheSize
* NewCacheSize = AvailableSpace * P/100
*
*/
void setMaxCacheSizeRelativeToAvailableSpace(unsigned Percentage) {
CacheOptions.MaxPercentageOfAvailableSpace = Percentage;
}

/**@}*/

/// Set the path to a directory where to save temporaries at various stages of
/// the processing.
void setSaveTempsDir(std::string Path) { SaveTempsDir = std::move(Path); }

/// CPU to use to initialize the TargetMachine
void setCpu(std::string Cpu) { TMBuilder.MCpu = std::move(Cpu); }

/// Subtarget attributes
void setAttr(std::string MAttr) { TMBuilder.MAttr = std::move(MAttr); }

/// TargetMachine options
void setTargetOptions(TargetOptions Options) {
TMBuilder.Options = std::move(Options);
}

/// CodeModel
void setCodePICModel(Reloc::Model Model) { TMBuilder.RelocModel = Model; }

/// CodeGen optimization level
void setCodeGenOptLevel(CodeGenOpt::Level CGOptLevel) {
TMBuilder.CGOptLevel = CGOptLevel;
}

/**@}*/

/**
* \defgroup Set of APIs to run individual stages in isolation.
* @{
*/

/**
* Produce the combined function index from all the bitcode files:
* "thin-link".
*/
std::unique_ptr<FunctionInfoIndex> linkCombinedIndex();

/**
* Perform promotion and renaming of exported internal functions.
*/
void promote(Module &Module, FunctionInfoIndex &Index);

/**
* Perform cross-module importing for the module identified by
* ModuleIdentifier.
*/
void crossModuleImport(Module &Module, FunctionInfoIndex &Index);

/**
* Perform post-importing ThinLTO optimizations.
*/
void optimize(Module &Module);

/**
* Perform ThinLTO CodeGen.
*/
std::unique_ptr<MemoryBuffer> codegen(Module &Module);

/**@}*/

private:
/// Helper factory to build a TargetMachine
TargetMachineBuilder TMBuilder;

/// Vector holding the in-memory buffer containing the produced binaries.
std::vector<std::unique_ptr<MemoryBuffer>> ProducedBinaries;

/// Vector holding the input buffers containing the bitcode modules to
/// process.
std::vector<MemoryBufferRef> Modules;

/// Set of symbols that need to be preserved outside of the set of bitcode
/// files.
StringSet<> PreservedSymbols;

/// Set of symbols that are cross-referenced between bitcode files.
StringSet<> CrossReferencedSymbols;

/// Control the caching behavior.
CachingOptions CacheOptions;

/// Path to a directory to save the temporary bitcode files.
std::string SaveTempsDir;
};
}
#endif
3 changes: 2 additions & 1 deletion llvm/lib/LTO/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
add_llvm_library(LLVMLTO
LTOModule.cpp
LTOCodeGenerator.cpp
ThinLTOCodeGenerator.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/LTO
)
)

add_dependencies(LLVMLTO intrinsics_gen)
1 change: 1 addition & 0 deletions llvm/lib/LTO/LLVMBuild.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ required_libraries =
Scalar
Support
Target
TransformUtils
12 changes: 12 additions & 0 deletions llvm/lib/LTO/LTOModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ bool LTOModule::isBitcodeFile(const char *Path) {
return bool(BCData);
}

bool LTOModule::isThinLTO() {
// Right now the detection is only based on the summary presence. We may want
// to add a dedicated flag at some point.
return hasFunctionSummary(IRFile->getMemoryBufferRef(),
[](const DiagnosticInfo &DI) {
DiagnosticPrinterRawOStream DP(errs());
DI.print(DP);
errs() << '\n';
return;
});
}

bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
StringRef TriplePrefix) {
ErrorOr<MemoryBufferRef> BCOrErr =
Expand Down
384 changes: 384 additions & 0 deletions llvm/lib/LTO/ThinLTOCodeGenerator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,384 @@
//===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Thin Link Time Optimization library. This library is
// intended to be used by linker to optimize code at link time.
//
//===----------------------------------------------------------------------===//

#include "llvm/LTO/ThinLTOCodeGenerator.h"

#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/FunctionIndexObjectFile.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"

using namespace llvm;

namespace {

static cl::opt<int> ThreadCount("threads",
cl::init(std::thread::hardware_concurrency()));

static void diagnosticHandler(const DiagnosticInfo &DI) {
DiagnosticPrinterRawOStream DP(errs());
DI.print(DP);
errs() << '\n';
}

// Simple helper to load a module from bitcode
static std::unique_ptr<Module>
loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
bool Lazy) {
SMDiagnostic Err;
ErrorOr<std::unique_ptr<Module>> ModuleOrErr(nullptr);
if (Lazy) {
ModuleOrErr =
getLazyBitcodeModule(MemoryBuffer::getMemBuffer(Buffer, false), Context,
/* ShouldLazyLoadMetadata */ Lazy);
} else {
ModuleOrErr = parseBitcodeFile(Buffer, Context);
}
if (std::error_code EC = ModuleOrErr.getError()) {
Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
EC.message());
Err.print("ThinLTO", errs());
report_fatal_error("Can't load module, abort.");
}
return std::move(ModuleOrErr.get());
}

// Simple helper to save temporary files for debug.
static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
unsigned count, StringRef Suffix) {
if (TempDir.empty())
return;
// User asked to save temps, let dump the bitcode file after import.
auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix;
std::error_code EC;
raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None);
if (EC)
report_fatal_error(Twine("Failed to open ") + SaveTempPath +
" to save optimized bitcode\n");
WriteBitcodeToFile(&TheModule, OS, true, false);
}

static StringMap<MemoryBufferRef>
generateModuleMap(const std::vector<MemoryBufferRef> &Modules) {
StringMap<MemoryBufferRef> ModuleMap;
for (auto &ModuleBuffer : Modules) {
assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
ModuleMap.end() &&
"Expect unique Buffer Identifier");
ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer;
}
return ModuleMap;
}

/// Provide a "loader" for the FunctionImporter to access function from other
/// modules.
class ModuleLoader {
/// The context that will be used for importing.
LLVMContext &Context;

/// Map from Module identifier to MemoryBuffer. Used by clients like the
/// FunctionImported to request loading a Module.
StringMap<MemoryBufferRef> &ModuleMap;

public:
ModuleLoader(LLVMContext &Context, StringMap<MemoryBufferRef> &ModuleMap)
: Context(Context), ModuleMap(ModuleMap) {}

/// Load a module on demand.
std::unique_ptr<Module> operator()(StringRef Identifier) {
return loadModuleFromBuffer(ModuleMap[Identifier], Context, /*Lazy*/ true);
}
};

static void promoteModule(Module &TheModule, const FunctionInfoIndex &Index) {
if (renameModuleForThinLTO(TheModule, Index))
report_fatal_error("renameModuleForThinLTO failed");
}

static void crossImportIntoModule(Module &TheModule,
const FunctionInfoIndex &Index,
StringMap<MemoryBufferRef> &ModuleMap) {
ModuleLoader Loader(TheModule.getContext(), ModuleMap);
FunctionImporter Importer(Index, Loader);
Importer.importFunctions(TheModule);
}

static void optimizeModule(Module &TheModule, TargetMachine &TM) {
// Populate the PassManager
PassManagerBuilder PMB;
PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
PMB.Inliner = createFunctionInliningPass();
// FIXME: should get it from the bitcode?
PMB.OptLevel = 3;
PMB.LoopVectorize = true;
PMB.SLPVectorize = true;
PMB.VerifyInput = true;
PMB.VerifyOutput = false;

legacy::PassManager PM;

// Add the TTI (required to inform the vectorizer about register size for
// instance)
PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));

// Add optimizations
PMB.populateThinLTOPassManager(PM);
PM.add(createObjCARCContractPass());

PM.run(TheModule);
}

std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
TargetMachine &TM) {
SmallVector<char, 128> OutputBuffer;

// CodeGen
{
raw_svector_ostream OS(OutputBuffer);
legacy::PassManager PM;
if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
/* DisableVerify */ true))
report_fatal_error("Failed to setup codegen");

// Run codegen now. resulting binary is in OutputBuffer.
PM.run(TheModule);
}
return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
}

static std::unique_ptr<MemoryBuffer>
ProcessThinLTOModule(Module &TheModule, const FunctionInfoIndex &Index,
StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
ThinLTOCodeGenerator::CachingOptions CacheOptions,
StringRef SaveTempsDir, unsigned count) {

// Save temps: after IPO.
saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc");

// "Benchmark"-like optimization: single-source case
bool SingleModule = (ModuleMap.size() == 1);

if (!SingleModule) {
promoteModule(TheModule, Index);

// Save temps: after promotion.
saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc");

crossImportIntoModule(TheModule, Index, ModuleMap);

// Save temps: after cross-module import.
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
}

optimizeModule(TheModule, TM);

saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc");

return codegenModule(TheModule, TM);
}

// Initialize the TargetMachine builder for a given Triple
static void initTMBuilder(TargetMachineBuilder &TMBuilder,
const Triple &TheTriple) {
// Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
// FIXME this looks pretty terrible...
if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
if (TheTriple.getArch() == llvm::Triple::x86_64)
TMBuilder.MCpu = "core2";
else if (TheTriple.getArch() == llvm::Triple::x86)
TMBuilder.MCpu = "yonah";
else if (TheTriple.getArch() == llvm::Triple::aarch64)
TMBuilder.MCpu = "cyclone";
}
TMBuilder.TheTriple = std::move(TheTriple);
}

} // end anonymous namespace

void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
MemoryBufferRef Buffer(Data, Identifier);
if (Modules.empty()) {
// First module added, so initialize the triple and some options
LLVMContext Context;
Triple TheTriple(getBitcodeTargetTriple(Buffer, Context));
initTMBuilder(TMBuilder, Triple(TheTriple));
}
#ifndef NDEBUG
else {
LLVMContext Context;
assert(TMBuilder.TheTriple.str() ==
getBitcodeTargetTriple(Buffer, Context) &&
"ThinLTO modules with different triple not supported");
}
#endif
Modules.push_back(Buffer);
}

void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
PreservedSymbols.insert(Name);
}

void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
CrossReferencedSymbols.insert(Name);
}

// TargetMachine factory
std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
std::string ErrMsg;
const Target *TheTarget =
TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
if (!TheTarget) {
report_fatal_error("Can't load target for this Triple: " + ErrMsg);
}

// Use MAttr as the default set of features.
SubtargetFeatures Features(MAttr);
Features.getDefaultSubtargetFeatures(TheTriple);
std::string FeatureStr = Features.getString();
return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
CodeModel::Default, CGOptLevel));
}

/**
* Produce the combined function index from all the bitcode files:
* "thin-link".
*/
std::unique_ptr<FunctionInfoIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
std::unique_ptr<FunctionInfoIndex> CombinedIndex;
uint64_t NextModuleId = 0;
for (auto &ModuleBuffer : Modules) {
ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
object::FunctionIndexObjectFile::create(ModuleBuffer, diagnosticHandler,
false);
if (std::error_code EC = ObjOrErr.getError()) {
// FIXME diagnose
errs() << "error: can't create FunctionIndexObjectFile for buffer: "
<< EC.message() << "\n";
return nullptr;
}
auto Index = (*ObjOrErr)->takeIndex();
if (CombinedIndex) {
CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId);
} else {
CombinedIndex = std::move(Index);
}
}
return CombinedIndex;
}

/**
* Perform promotion and renaming of exported internal functions.
*/
void ThinLTOCodeGenerator::promote(Module &TheModule,
FunctionInfoIndex &Index) {
promoteModule(TheModule, Index);
}

/**
* Perform cross-module importing for the module identified by ModuleIdentifier.
*/
void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
FunctionInfoIndex &Index) {
auto ModuleMap = generateModuleMap(Modules);
crossImportIntoModule(TheModule, Index, ModuleMap);
}

/**
* Perform post-importing ThinLTO optimizations.
*/
void ThinLTOCodeGenerator::optimize(Module &TheModule) {
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
optimizeModule(TheModule, *TMBuilder.create());
}

/**
* Perform ThinLTO CodeGen.
*/
std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
return codegenModule(TheModule, *TMBuilder.create());
}

// Main entry point for the ThinLTO processing
void ThinLTOCodeGenerator::run() {
// Sequential linking phase
auto Index = linkCombinedIndex();

// Save temps: index.
if (!SaveTempsDir.empty()) {
auto SaveTempPath = SaveTempsDir + "index.bc";
std::error_code EC;
raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
if (EC)
report_fatal_error(Twine("Failed to open ") + SaveTempPath +
" to save optimized bitcode\n");
WriteFunctionSummaryToFile(*Index, OS);
}

// Prepare the resulting object vector
assert(ProducedBinaries.empty() && "The generator should not be reused");
ProducedBinaries.resize(Modules.size());

// Prepare the module map.
auto ModuleMap = generateModuleMap(Modules);

// Parallel optimizer + codegen
{
ThreadPool Pool(ThreadCount);
int count = 0;
for (auto &ModuleBuffer : Modules) {
Pool.async([&](int count) {
LLVMContext Context;

// Parse module now
auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);

// Save temps: original file.
if (!SaveTempsDir.empty()) {
saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
}

ProducedBinaries[count] = ProcessThinLTOModule(
*TheModule, *Index, ModuleMap, *TMBuilder.create(), CacheOptions,
SaveTempsDir, count);
}, count);
count++;
}
}

// If statistics were requested, print them out now.
if (llvm::AreStatisticsEnabled())
llvm::PrintStatistics();
}
32 changes: 32 additions & 0 deletions llvm/test/ThinLTO/Inputs/funcimport.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"


define i32 @main() #0 {
entry:
call void (...) @weakalias()
call void (...) @analias()
%call = call i32 (...) @referencestatics()
%call1 = call i32 (...) @referenceglobals()
%call2 = call i32 (...) @referencecommon()
call void (...) @setfuncptr()
call void (...) @callfuncptr()
call void (...) @callweakfunc()
ret i32 0
}

declare void @weakalias(...) #1

declare void @analias(...) #1

declare i32 @referencestatics(...) #1

declare i32 @referenceglobals(...) #1

declare i32 @referencecommon(...) #1

declare void @setfuncptr(...) #1

declare void @callfuncptr(...) #1

declare void @callweakfunc(...) #1
139 changes: 139 additions & 0 deletions llvm/test/ThinLTO/funcimport.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
; Do setup work for all below tests: generate bitcode and combined index
; RUN: llvm-as -function-summary %s -o %t.bc
; RUN: llvm-as -function-summary %p/Inputs/funcimport.ll -o %t2.bc
; RUN: llvm-lto -thinlto-action=thinlink -o %t3.bc %t.bc %t2.bc

; Ensure statics are promoted/renamed correctly from this file (all but
; constant variable need promotion).
; RUN: llvm-lto -thinlto-action=promote %t.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=EXPORTSTATIC
; EXPORTSTATIC-DAG: @staticvar.llvm.0 = hidden global
; EXPORTSTATIC-DAG: @staticconstvar = internal unnamed_addr constant
; EXPORTSTATIC-DAG: @P.llvm.0 = hidden global void ()* null
; EXPORTSTATIC-DAG: define hidden i32 @staticfunc.llvm.0
; EXPORTSTATIC-DAG: define hidden void @staticfunc2.llvm.0

; Ensure that both weak alias to an imported function and strong alias to a
; non-imported function are correctly turned into declarations.
; Also ensures that alias to a linkonce function is turned into a declaration
; and that the associated linkonce function is not in the output, as it is
; lazily linked and never referenced/materialized.
; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=IMPORTGLOB1
; IMPORTGLOB1-DAG: define available_externally void @globalfunc1
; IMPORTGLOB1-DAG: declare void @weakalias
; IMPORTGLOB1-DAG: declare void @analias
; IMPORTGLOB1-NOT: @linkoncealias
; IMPORTGLOB1-NOT: @linkoncefunc
; IMPORTGLOB1-NOT: declare void @globalfunc2

; Verify that the optimizer run
; RUN: llvm-lto -thinlto-action=optimize %t2.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=OPTIMIZED
; OPTIMIZED: define i32 @main()

; Verify that the codegen run
; RUN: llvm-lto -thinlto-action=codegen %t2.bc -o - | llvm-nm -o - | FileCheck %s --check-prefix=CODEGEN
; CODEGEN: T _main

; Verify that all run together
; RUN: llvm-lto -thinlto-action=run %t2.bc %t.bc
; RUN: llvm-nm -o - < %t.bc.thinlto.o | FileCheck %s --check-prefix=ALL
; RUN: llvm-nm -o - < %t2.bc.thinlto.o | FileCheck %s --check-prefix=ALL2
; ALL: T _callfuncptr
; ALL2: T _main

target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"

@globalvar_in_section = global i32 1, align 4
@globalvar = global i32 1, align 4
@staticvar = internal global i32 1, align 4
@staticvar2 = internal global i32 1, align 4
@staticconstvar = internal unnamed_addr constant [2 x i32] [i32 10, i32 20], align 4
@commonvar = common global i32 0, align 4
@P = internal global void ()* null, align 8

@weakalias = weak alias void (...), bitcast (void ()* @globalfunc1 to void (...)*)
@analias = alias void (...), bitcast (void ()* @globalfunc2 to void (...)*)
@linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*)

define void @globalfunc1() #0 {
entry:
ret void
}

define void @globalfunc2() #0 {
entry:
ret void
}

define linkonce_odr void @linkoncefunc() #0 {
entry:
ret void
}

define i32 @referencestatics(i32 %i) #0 {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%call = call i32 @staticfunc()
%0 = load i32, i32* @staticvar, align 4
%add = add nsw i32 %call, %0
%1 = load i32, i32* %i.addr, align 4
%idxprom = sext i32 %1 to i64
%arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* @staticconstvar, i64 0, i64 %idxprom
%2 = load i32, i32* %arrayidx, align 4
%add1 = add nsw i32 %add, %2
ret i32 %add1
}

define i32 @referenceglobals(i32 %i) #0 {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
call void @globalfunc1()
%0 = load i32, i32* @globalvar, align 4
ret i32 %0
}

define i32 @referencecommon(i32 %i) #0 {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%0 = load i32, i32* @commonvar, align 4
ret i32 %0
}

define void @setfuncptr() #0 {
entry:
store void ()* @staticfunc2, void ()** @P, align 8
ret void
}

define void @callfuncptr() #0 {
entry:
%0 = load void ()*, void ()** @P, align 8
call void %0()
ret void
}

@weakvar = weak global i32 1, align 4
define weak void @weakfunc() #0 {
entry:
ret void
}

define void @callweakfunc() #0 {
entry:
call void @weakfunc()
ret void
}

define internal i32 @staticfunc() #0 {
entry:
ret i32 1
}

define internal void @staticfunc2() #0 {
entry:
%0 = load i32, i32* @staticvar2, align 4
ret void
}
290 changes: 290 additions & 0 deletions llvm/tools/llvm-lto/llvm-lto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/LTO/LTOCodeGenerator.h"
#include "llvm/LTO/ThinLTOCodeGenerator.h"
#include "llvm/LTO/LTOModule.h"
#include "llvm/Object/FunctionIndexObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
Expand Down Expand Up @@ -64,6 +67,36 @@ static cl::opt<bool>
ThinLTO("thinlto", cl::init(false),
cl::desc("Only write combined global index for ThinLTO backends"));

enum ThinLTOModes {
THINLINK,
THINPROMOTE,
THINIMPORT,
THINOPT,
THINCODEGEN,
THINALL
};

cl::opt<ThinLTOModes> ThinLTOMode(
"thinlto-action", cl::desc("Perform a single ThinLTO stage:"),
cl::values(
clEnumValN(
THINLINK, "thinlink",
"ThinLink: produces the index by linking only the summaries."),
clEnumValN(THINPROMOTE, "promote",
"Perform pre-import promotion (requires -thinlto-index)."),
clEnumValN(THINIMPORT, "import", "Perform both promotion and "
"cross-module importing (requires "
"-thinlto-index)."),
clEnumValN(THINOPT, "optimize", "Perform ThinLTO optimizations."),
clEnumValN(THINCODEGEN, "codegen", "CodeGen (expected to match llc)"),
clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end"),
clEnumValEnd));

static cl::opt<std::string>
ThinLTOIndex("thinlto-index",
cl::desc("Provide the index produced by a ThinLink, required "
"to perform the promotion and/or importing."));

static cl::opt<bool>
SaveModuleFile("save-merged-module", cl::init(false),
cl::desc("Write merged LTO module to file before CodeGen"));
Expand Down Expand Up @@ -241,6 +274,255 @@ static void createCombinedFunctionIndex() {
OS.close();
}

namespace thinlto {

std::vector<std::unique_ptr<MemoryBuffer>>
loadAllFilesForIndex(const FunctionInfoIndex &Index) {
std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers;

for (auto &ModPath : Index.modPathStringEntries()) {
const auto &Filename = ModPath.first();
auto CurrentActivity = "loading file '" + Filename + "'";
auto InputOrErr = MemoryBuffer::getFile(Filename);
error(InputOrErr, "error " + CurrentActivity);
InputBuffers.push_back(std::move(*InputOrErr));
}
return InputBuffers;
}

std::unique_ptr<FunctionInfoIndex> loadCombinedIndex() {
if (ThinLTOIndex.empty())
report_fatal_error("Missing -thinlto-index for ThinLTO promotion stage");
auto CurrentActivity = "loading file '" + ThinLTOIndex + "'";
ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
llvm::getFunctionIndexForFile(ThinLTOIndex, diagnosticHandler);
error(IndexOrErr, "error " + CurrentActivity);
return std::move(IndexOrErr.get());
}

static std::unique_ptr<Module> loadModule(StringRef Filename,
LLVMContext &Ctx) {
SMDiagnostic Err;
std::unique_ptr<Module> M(parseIRFile(Filename, Err, Ctx));
if (!M) {
Err.print("llvm-lto", errs());
report_fatal_error("Can't load module for file " + Filename);
}
return M;
}

static void writeModuleToFile(Module &TheModule, StringRef Filename) {
std::error_code EC;
raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::F_None);
error(EC, "error opening the file '" + Filename + "'");
WriteBitcodeToFile(&TheModule, OS, true, false);
}

class ThinLTOProcessing {
public:
ThinLTOCodeGenerator ThinGenerator;

ThinLTOProcessing(const TargetOptions &Options) {
ThinGenerator.setCodePICModel(RelocModel);
ThinGenerator.setTargetOptions(Options);
}

void run() {
switch (ThinLTOMode) {
case THINLINK:
return thinLink();
case THINPROMOTE:
return promote();
case THINIMPORT:
return import();
case THINOPT:
return optimize();
case THINCODEGEN:
return codegen();
case THINALL:
return runAll();
}
}

private:
/// Load the input files, create the combined index, and write it out.
void thinLink() {
// Perform "ThinLink": just produce the index
if (OutputFilename.empty())
report_fatal_error(
"OutputFilename is necessary to store the combined index.\n");

LLVMContext Ctx;
std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers;
for (unsigned i = 0; i < InputFilenames.size(); ++i) {
auto &Filename = InputFilenames[i];
StringRef CurrentActivity = "loading file '" + Filename + "'";
auto InputOrErr = MemoryBuffer::getFile(Filename);
error(InputOrErr, "error " + CurrentActivity);
InputBuffers.push_back(std::move(*InputOrErr));
ThinGenerator.addModule(Filename, InputBuffers.back()->getBuffer());
}

auto CombinedIndex = ThinGenerator.linkCombinedIndex();
std::error_code EC;
raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::F_None);
error(EC, "error opening the file '" + OutputFilename + "'");
WriteFunctionSummaryToFile(*CombinedIndex, OS);
return;
}

/// Load the combined index from disk, then load every file referenced by
/// the index and add them to the generator, finally perform the promotion
/// on the files mentioned on the command line (these must match the index
/// content).
void promote() {
if (InputFilenames.size() != 1 && !OutputFilename.empty())
report_fatal_error("Can't handle a single output filename and multiple "
"input files, do not provide an output filename and "
"the output files will be suffixed from the input "
"ones.");

auto Index = loadCombinedIndex();
for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
auto TheModule = loadModule(Filename, Ctx);

ThinGenerator.promote(*TheModule, *Index);

std::string OutputName = OutputFilename;
if (OutputName.empty()) {
OutputName = Filename + ".thinlto.promoted.bc";
}
writeModuleToFile(*TheModule, OutputName);
}
}

/// Load the combined index from disk, then load every file referenced by
/// the index and add them to the generator, then performs the promotion and
/// cross module importing on the files mentioned on the command line
/// (these must match the index content).
void import() {
if (InputFilenames.size() != 1 && !OutputFilename.empty())
report_fatal_error("Can't handle a single output filename and multiple "
"input files, do not provide an output filename and "
"the output files will be suffixed from the input "
"ones.");

auto Index = loadCombinedIndex();
auto InputBuffers = loadAllFilesForIndex(*Index);
for (auto &MemBuffer : InputBuffers)
ThinGenerator.addModule(MemBuffer->getBufferIdentifier(),
MemBuffer->getBuffer());

for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
auto TheModule = loadModule(Filename, Ctx);

ThinGenerator.crossModuleImport(*TheModule, *Index);

std::string OutputName = OutputFilename;
if (OutputName.empty()) {
OutputName = Filename + ".thinlto.imported.bc";
}
writeModuleToFile(*TheModule, OutputName);
}
}

void optimize() {
if (InputFilenames.size() != 1 && !OutputFilename.empty())
report_fatal_error("Can't handle a single output filename and multiple "
"input files, do not provide an output filename and "
"the output files will be suffixed from the input "
"ones.");
if (!ThinLTOIndex.empty())
errs() << "Warning: -thinlto-index ignored for optimize stage";

for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
auto TheModule = loadModule(Filename, Ctx);

ThinGenerator.optimize(*TheModule);

std::string OutputName = OutputFilename;
if (OutputName.empty()) {
OutputName = Filename + ".thinlto.imported.bc";
}
writeModuleToFile(*TheModule, OutputName);
}
}

void codegen() {
if (InputFilenames.size() != 1 && !OutputFilename.empty())
report_fatal_error("Can't handle a single output filename and multiple "
"input files, do not provide an output filename and "
"the output files will be suffixed from the input "
"ones.");
if (!ThinLTOIndex.empty())
errs() << "Warning: -thinlto-index ignored for codegen stage";

for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
auto TheModule = loadModule(Filename, Ctx);

auto Buffer = ThinGenerator.codegen(*TheModule);
std::string OutputName = OutputFilename;
if (OutputName.empty()) {
OutputName = Filename + ".thinlto.o";
}
if (OutputName == "-") {
outs() << Buffer->getBuffer();
return;
}

std::error_code EC;
raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None);
error(EC, "error opening the file '" + OutputName + "'");
OS << Buffer->getBuffer();
}
}

/// Full ThinLTO process
void runAll() {
if (!OutputFilename.empty())
report_fatal_error("Do not provide an output filename for ThinLTO "
" processing, the output files will be suffixed from "
"the input ones.");

if (!ThinLTOIndex.empty())
errs() << "Warning: -thinlto-index ignored for full ThinLTO process";

LLVMContext Ctx;
std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers;
for (unsigned i = 0; i < InputFilenames.size(); ++i) {
auto &Filename = InputFilenames[i];
StringRef CurrentActivity = "loading file '" + Filename + "'";
auto InputOrErr = MemoryBuffer::getFile(Filename);
error(InputOrErr, "error " + CurrentActivity);
InputBuffers.push_back(std::move(*InputOrErr));
ThinGenerator.addModule(Filename, InputBuffers.back()->getBuffer());
}

ThinGenerator.run();

auto &Binaries = ThinGenerator.getProducedBinaries();
if (Binaries.size() != InputFilenames.size())
report_fatal_error("Number of output objects does not match the number "
"of inputs");

for (unsigned BufID = 0; BufID < Binaries.size(); ++BufID) {
auto OutputName = InputFilenames[BufID] + ".thinlto.o";
std::error_code EC;
raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None);
error(EC, "error opening the file '" + OutputName + "'");
OS << Binaries[BufID]->getBuffer();
}
}

/// Load the combined index from disk, then load every file referenced by
};

} // namespace thinlto

int main(int argc, char **argv) {
// Print a stack trace if we signal out.
sys::PrintStackTraceOnErrorSignal();
Expand All @@ -266,6 +548,14 @@ int main(int argc, char **argv) {
return 0;
}

if (ThinLTOMode.getNumOccurrences()) {
if (ThinLTOMode.getNumOccurrences() > 1)
report_fatal_error("You can't specify more than one -thinlto-action");
thinlto::ThinLTOProcessing ThinLTOProcessor(Options);
ThinLTOProcessor.run();
return 0;
}

if (ThinLTO) {
createCombinedFunctionIndex();
return 0;
Expand Down
105 changes: 105 additions & 0 deletions llvm/tools/lto/lto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/LTO/LTOCodeGenerator.h"
#include "llvm/LTO/LTOModule.h"
#include "llvm/LTO/ThinLTOCodeGenerator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/TargetSelect.h"
Expand Down Expand Up @@ -134,6 +135,7 @@ struct LibLTOCodeGenerator : LTOCodeGenerator {
}

DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LibLTOCodeGenerator, lto_code_gen_t)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ThinLTOCodeGenerator, thinlto_code_gen_t)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LTOModule, lto_module_t)

// Convert the subtarget features into a string to pass to LTOCodeGenerator.
Expand Down Expand Up @@ -440,3 +442,106 @@ void lto_codegen_set_should_embed_uselists(lto_code_gen_t cg,
lto_bool_t ShouldEmbedUselists) {
unwrap(cg)->setShouldEmbedUselists(ShouldEmbedUselists);
}

// ThinLTO API below

thinlto_code_gen_t thinlto_create_codegen() {
lto_initialize();
ThinLTOCodeGenerator *CodeGen = new ThinLTOCodeGenerator();
CodeGen->setTargetOptions(InitTargetOptionsFromCodeGenFlags());

return wrap(CodeGen);
}

void thinlto_codegen_dispose(thinlto_code_gen_t cg) { delete unwrap(cg); }

void thinlto_codegen_add_module(thinlto_code_gen_t cg, const char *Identifier,
const char *Data, int Length) {
unwrap(cg)->addModule(Identifier, StringRef(Data, Length));
}

void thinlto_codegen_process(thinlto_code_gen_t cg) { unwrap(cg)->run(); }

unsigned int thinlto_module_get_num_objects(thinlto_code_gen_t cg) {
return unwrap(cg)->getProducedBinaries().size();
}
LTOObjectBuffer thinlto_module_get_object(thinlto_code_gen_t cg,
unsigned int index) {
assert(index < unwrap(cg)->getProducedBinaries().size() && "Index overflow");
auto &MemBuffer = unwrap(cg)->getProducedBinaries()[index];
return LTOObjectBuffer{(void *)MemBuffer->getBufferStart(),
MemBuffer->getBufferSize()};
}

void thinlto_debug_options(const char *const *options, int number) {
// if options were requested, set them
if (number && options) {
std::vector<const char *> CodegenArgv(1, "libLTO");
for (auto Arg : ArrayRef<const char *>(options, number))
CodegenArgv.push_back(Arg);
cl::ParseCommandLineOptions(CodegenArgv.size(), CodegenArgv.data());
}
}

bool lto_module_is_thinlto(lto_module_t mod) {
return unwrap(mod)->isThinLTO();
}

void thinlto_codegen_add_must_preserve_symbol(thinlto_code_gen_t cg,
const char *Name, int Length) {
unwrap(cg)->preserveSymbol(StringRef(Name, Length));
}

void thinlto_codegen_add_cross_referenced_symbol(thinlto_code_gen_t cg,
const char *Name, int Length) {
unwrap(cg)->crossReferenceSymbol(StringRef(Name, Length));
}

void thinlto_codegen_set_cpu(thinlto_code_gen_t cg, const char *cpu) {
return unwrap(cg)->setCpu(cpu);
}

void thinlto_codegen_set_cache_dir(thinlto_code_gen_t cg,
const char *cache_dir) {
return unwrap(cg)->setCacheDir(cache_dir);
}

void thinlto_codegen_set_cache_pruning_interval(thinlto_code_gen_t cg,
int interval) {
return unwrap(cg)->setCachePruningInterval(interval);
}

void thinlto_codegen_set_cache_entry_expiration(thinlto_code_gen_t cg,
unsigned expiration) {
return unwrap(cg)->setCacheEntryExpiration(expiration);
}

void thinlto_codegen_set_final_cache_size_relative_to_available_space(
thinlto_code_gen_t cg, unsigned Percentage) {
return unwrap(cg)->setMaxCacheSizeRelativeToAvailableSpace(Percentage);
}

void thinlto_codegen_set_savetemps_dir(thinlto_code_gen_t cg,
const char *save_temps_dir) {
return unwrap(cg)->setSaveTempsDir(save_temps_dir);
}

lto_bool_t thinlto_codegen_set_pic_model(thinlto_code_gen_t cg,
lto_codegen_model model) {
switch (model) {
case LTO_CODEGEN_PIC_MODEL_STATIC:
unwrap(cg)->setCodePICModel(Reloc::Static);
return false;
case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
unwrap(cg)->setCodePICModel(Reloc::PIC_);
return false;
case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
unwrap(cg)->setCodePICModel(Reloc::DynamicNoPIC);
return false;
case LTO_CODEGEN_PIC_MODEL_DEFAULT:
unwrap(cg)->setCodePICModel(Reloc::Default);
return false;
}
sLastErrorString = "Unknown PIC model";
return true;
}
17 changes: 17 additions & 0 deletions llvm/tools/lto/lto.exports
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,20 @@ LLVMCreateDisasmCPU
LLVMDisasmDispose
LLVMDisasmInstruction
LLVMSetDisasmOptions
thinlto_create_codegen
thinlto_codegen_dispose
thinlto_codegen_add_module
thinlto_codegen_process
thinlto_module_get_num_objects
thinlto_module_get_object
thinlto_codegen_set_pic_model
thinlto_codegen_set_cache_dir
thinlto_codegen_set_cache_pruning_interval
thinlto_codegen_set_cache_entry_expiration
thinlto_codegen_set_savetemps_dir
thinlto_codegen_set_cpu
thinlto_debug_options
lto_module_is_thinlto
thinlto_codegen_add_must_preserve_symbol
thinlto_codegen_add_cross_referenced_symbol
thinlto_codegen_set_final_cache_size_relative_to_available_space