Skip to content

Commit

Permalink
LTO: Simplify caching interface.
Browse files Browse the repository at this point in the history
The NativeObjectOutput class has a design problem: it mixes up the caching
policy with the interface for output streams, which makes the client-side
code hard to follow and would for example make it harder to replace the
cache implementation in an arbitrary client.

This change separates the two aspects by moving the caching policy
to a separate field in Config, replacing NativeObjectOutput with a
NativeObjectStream class which only deals with streams and does not need to
be overridden by most clients and introducing an AddFile callback for adding
files (e.g. from the cache) to the link.

Differential Revision: https://reviews.llvm.org/D24622

llvm-svn: 282299
  • Loading branch information
pcc committed Sep 23, 2016
1 parent 6951707 commit 80186a5
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 301 deletions.
85 changes: 11 additions & 74 deletions llvm/include/llvm/LTO/Caching.h
Expand Up @@ -7,92 +7,29 @@
//
//===----------------------------------------------------------------------===//
//
// This file defines the lto::CacheObjectOutput data structure, which allows
// clients to add a filesystem cache to ThinLTO
// This file defines the localCache function, which allows clients to add a
// filesystem cache to ThinLTO.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LTO_CACHING_H
#define LLVM_LTO_CACHING_H

#include "llvm/ADT/SmallString.h"
#include "llvm/LTO/Config.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/LTO/LTO.h"
#include <string>

namespace llvm {
namespace lto {
/// Type for client-supplied callback when a buffer is loaded from the cache.
typedef std::function<void(std::string)> AddBufferFn;

/// Manage caching on the filesystem.
/// This type defines the callback to add a pre-existing native object file
/// (e.g. in a cache).
///
/// The general scheme is the following:
///
/// void do_stuff(AddBufferFn CallBack) {
/// /* ... */
/// {
/// /* Create the CacheObjectOutput pointing to a cache directory */
/// auto Output = CacheObjectOutput("/tmp/cache", CallBack)
///
/// /* Call some processing function */
/// process(Output);
///
/// } /* Callback is only called now, on destruction of the Output object */
/// /* ... */
/// }
///
///
/// void process(NativeObjectOutput &Output) {
/// /* check if caching is supported */
/// if (Output.isCachingEnabled()) {
/// auto Key = ComputeKeyForEntry(...); // "expensive" call
/// if (Output.tryLoadFromCache())
/// return; // Cache hit
/// }
///
/// auto OS = Output.getStream();
///
/// OS << ...;
/// /* Note that the callback is not called here, but only when the caller
/// destroys Output */
/// }
///
class CacheObjectOutput : public NativeObjectOutput {
/// Path to the on-disk cache directory
StringRef CacheDirectoryPath;
/// Path to this entry in the cache, initialized by tryLoadFromCache().
SmallString<128> EntryPath;
/// Path to temporary file used to buffer output that will be committed to the
/// cache entry when this object is destroyed
SmallString<128> TempFilename;
/// User-supplied callback, used to provide path to cache entry
/// (potentially after creating it).
AddBufferFn AddBuffer;

public:
/// The destructor pulls the entry from the cache and calls the AddBuffer
/// callback, after committing the entry into the cache on miss.
~CacheObjectOutput();

/// Create a CacheObjectOutput: the client is supposed to create it in the
/// callback supplied to LTO::run. The \p CacheDirectoryPath points to the
/// directory on disk where to store the cache, and \p AddBuffer will be
/// called when the buffer is ready to be pulled out of the cache
/// (potentially after creating it).
CacheObjectOutput(StringRef CacheDirectoryPath, AddBufferFn AddBuffer)
: CacheDirectoryPath(CacheDirectoryPath), AddBuffer(AddBuffer) {}

/// Return an allocated stream for the output, or null in case of failure.
std::unique_ptr<raw_pwrite_stream> getStream() override;

/// Set EntryPath, try loading from a possible cache first, return true on
/// cache hit.
bool tryLoadFromCache(StringRef Key) override;
/// File callbacks must be thread safe.
typedef std::function<void(unsigned Task, StringRef Path)> AddFileFn;

/// Returns true to signal that this implementation of NativeObjectFile
/// support caching.
bool isCachingEnabled() const override { return true; }
};
/// Create a local file system cache which uses the given cache directory and
/// file callback.
NativeObjectCache localCache(std::string CacheDirectoryPath, AddFileFn AddFile);

} // namespace lto
} // namespace llvm
Expand Down
39 changes: 0 additions & 39 deletions llvm/include/llvm/LTO/Config.h
Expand Up @@ -30,38 +30,6 @@ class raw_pwrite_stream;

namespace lto {

/// Abstract class representing a single Task output to be implemented by the
/// client of the LTO API.
///
/// The general scheme the API is called is the following:
///
/// void process(NativeObjectOutput &Output) {
/// /* check if caching is supported */
/// if (Output.isCachingEnabled()) {
/// auto Key = ComputeKeyForEntry(...); // "expensive" call
/// if (Output.tryLoadFromCache())
/// return; // Cache hit
/// }
///
/// auto OS = Output.getStream();
///
/// OS << ....;
/// }
///
class NativeObjectOutput {
public:
// Return an allocated stream for the output, or null in case of failure.
virtual std::unique_ptr<raw_pwrite_stream> getStream() = 0;

// Try loading from a possible cache first, return true on cache hit.
virtual bool tryLoadFromCache(StringRef Key) { return false; }

// Returns true if a cache is available
virtual bool isCachingEnabled() const { return false; }

virtual ~NativeObjectOutput() = default;
};

/// LTO configuration. A linker can configure LTO by setting fields in this data
/// structure and passing it to the lto::LTO constructor.
struct Config {
Expand Down Expand Up @@ -235,13 +203,6 @@ struct Config {
bool UseInputModulePath = false);
};

/// This type defines the callback to add a native object that is generated on
/// the fly.
///
/// Output callbacks must be thread safe.
typedef std::function<std::unique_ptr<NativeObjectOutput>(unsigned Task)>
AddOutputFn;

/// A derived class of LLVMContext that initializes itself according to a given
/// Config object. The purpose of this class is to tie ownership of the
/// diagnostic handler to the context, as opposed to the Config object (which
Expand Down
58 changes: 50 additions & 8 deletions llvm/include/llvm/LTO/LTO.h
Expand Up @@ -247,13 +247,47 @@ class InputFile {
}
};

/// This class wraps an output stream for a native object. Most clients should
/// just be able to return an instance of this base class from the stream
/// callback, but if a client needs to perform some action after the stream is
/// written to, that can be done by deriving from this class and overriding the
/// destructor.
class NativeObjectStream {
public:
NativeObjectStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {}
std::unique_ptr<raw_pwrite_stream> OS;
virtual ~NativeObjectStream() = default;
};

/// This type defines the callback to add a native object that is generated on
/// the fly.
///
/// Stream callbacks must be thread safe.
typedef std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>
AddStreamFn;

/// This is the type of a native object cache. To request an item from the
/// cache, pass a unique string as the Key. For hits, the cached file will be
/// added to the link and this function will return AddStreamFn(). For misses,
/// the cache will return a stream callback which must be called at most once to
/// produce content for the stream. The native object stream produced by the
/// stream callback will add the file to the link after the stream is written
/// to.
///
/// Clients generally look like this:
///
/// if (AddStreamFn AddStream = Cache(Task, Key))
/// ProduceContent(AddStream);
typedef std::function<AddStreamFn(unsigned Task, StringRef Key)>
NativeObjectCache;

/// A ThinBackend defines what happens after the thin-link phase during ThinLTO.
/// The details of this type definition aren't important; clients can only
/// create a ThinBackend using one of the create*ThinBackend() functions below.
typedef std::function<std::unique_ptr<ThinBackendProc>(
Config &C, ModuleSummaryIndex &CombinedIndex,
StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddOutputFn AddOutput)>
AddStreamFn AddStream, NativeObjectCache Cache)>
ThinBackend;

/// This ThinBackend runs the individual backend jobs in-process.
Expand Down Expand Up @@ -286,8 +320,9 @@ ThinBackend createWriteIndexesThinBackend(std::string OldPrefix,
/// and pass it and an array of symbol resolutions to the add() function.
/// - Call the getMaxTasks() function to get an upper bound on the number of
/// native object files that LTO may add to the link.
/// - Call the run() function. This function will use the supplied AddOutput
/// function to add up to getMaxTasks() native object files to the link.
/// - Call the run() function. This function will use the supplied AddStream
/// and Cache functions to add up to getMaxTasks() native object files to
/// the link.
class LTO {
friend InputFile;

Expand All @@ -310,9 +345,15 @@ class LTO {
/// full description of tasks see LTOBackend.h.
unsigned getMaxTasks() const;

/// Runs the LTO pipeline. This function calls the supplied AddOutput function
/// to add native object files to the link.
Error run(AddOutputFn AddOutput);
/// Runs the LTO pipeline. This function calls the supplied AddStream
/// function to add native object files to the link.
///
/// The Cache parameter is optional. If supplied, it will be used to cache
/// native object files and add them to the link.
///
/// The client will receive at most one callback (via either AddStream or
/// Cache) for each task identifier.
Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr);

private:
Config Conf;
Expand Down Expand Up @@ -393,8 +434,9 @@ class LTO {
Error addThinLTO(std::unique_ptr<InputFile> Input,
ArrayRef<SymbolResolution> Res);

Error runRegularLTO(AddOutputFn AddOutput);
Error runThinLTO(AddOutputFn AddOutput, bool HasRegularLTO);
Error runRegularLTO(AddStreamFn AddStream);
Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
bool HasRegularLTO);

mutable bool CalledGetMaxTasks = false;
};
Expand Down
6 changes: 3 additions & 3 deletions llvm/include/llvm/LTO/LTOBackend.h
Expand Up @@ -20,7 +20,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/LTO/Config.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
Expand All @@ -34,12 +34,12 @@ class Target;
namespace lto {

/// Runs a regular LTO backend.
Error backend(Config &C, AddOutputFn AddStream,
Error backend(Config &C, AddStreamFn AddStream,
unsigned ParallelCodeGenParallelismLevel,
std::unique_ptr<Module> M);

/// Runs a ThinLTO backend.
Error thinBackend(Config &C, unsigned Task, AddOutputFn AddStream, Module &M,
Error thinBackend(Config &C, unsigned Task, AddStreamFn AddStream, Module &M,
ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
Expand Down
101 changes: 51 additions & 50 deletions llvm/lib/LTO/Caching.cpp
Expand Up @@ -12,13 +12,9 @@
//===----------------------------------------------------------------------===//

#include "llvm/LTO/Caching.h"

#ifdef HAVE_LLVM_REVISION
#include "LLVMLTORevision.h"
#endif

#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"

Expand All @@ -30,6 +26,8 @@ static void commitEntry(StringRef TempFilename, StringRef EntryPath) {
auto EC = sys::fs::rename(TempFilename, EntryPath);
if (EC) {
// Renaming failed, probably not the same filesystem, copy and delete.
// FIXME: Avoid needing to do this by creating the temporary file in the
// cache directory.
{
auto ReloadedBufferOrErr = MemoryBuffer::getFile(TempFilename);
if (auto EC = ReloadedBufferOrErr.getError())
Expand All @@ -48,51 +46,54 @@ static void commitEntry(StringRef TempFilename, StringRef EntryPath) {
}
}

CacheObjectOutput::~CacheObjectOutput() {
if (EntryPath.empty())
// The entry was never used by the client (tryLoadFromCache() wasn't called)
return;
// TempFilename is only set if getStream() was called, i.e. on cache miss when
// tryLoadFromCache() returned false. And EntryPath is valid if a Key was
// submitted, otherwise it has been set to CacheDirectoryPath in
// tryLoadFromCache.
if (!TempFilename.empty()) {
if (EntryPath == CacheDirectoryPath)
// The Key supplied to tryLoadFromCache was empty, do not commit the temp.
EntryPath = TempFilename;
else
// We commit the tempfile into the cache now, by moving it to EntryPath.
commitEntry(TempFilename, EntryPath);
}
// Supply the cache path to the user.
AddBuffer(EntryPath.str());
}
NativeObjectCache lto::localCache(std::string CacheDirectoryPath,
AddFileFn AddFile) {
return [=](unsigned Task, StringRef Key) -> AddStreamFn {
// First, see if we have a cache hit.
SmallString<64> EntryPath;
sys::path::append(EntryPath, CacheDirectoryPath, Key);
if (sys::fs::exists(EntryPath)) {
AddFile(Task, EntryPath);
return AddStreamFn();
}

// Return an allocated stream for the output, or null in case of failure.
std::unique_ptr<raw_pwrite_stream> CacheObjectOutput::getStream() {
assert(!EntryPath.empty() && "API Violation: client didn't call "
"tryLoadFromCache() before getStream()");
// Write to a temporary to avoid race condition
int TempFD;
std::error_code EC =
sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
if (EC) {
errs() << "Error: " << EC.message() << "\n";
report_fatal_error("ThinLTO: Can't get a temporary file");
}
return llvm::make_unique<raw_fd_ostream>(TempFD, /* ShouldClose */ true);
}
// This native object stream is responsible for commiting the resulting
// file to the cache and calling AddFile to add it to the link.
struct CacheStream : NativeObjectStream {
AddFileFn AddFile;
std::string TempFilename;
std::string EntryPath;
unsigned Task;

// Try loading from a possible cache first, return true on cache hit.
bool CacheObjectOutput::tryLoadFromCache(StringRef Key) {
assert(!CacheDirectoryPath.empty() &&
"CacheObjectOutput was initialized without a cache path");
if (Key.empty()) {
// Client didn't compute a valid key. EntryPath has been set to
// CacheDirectoryPath.
EntryPath = CacheDirectoryPath;
return false;
}
sys::path::append(EntryPath, CacheDirectoryPath, Key);
return sys::fs::exists(EntryPath);
CacheStream(std::unique_ptr<raw_pwrite_stream> OS, AddFileFn AddFile,
std::string TempFilename, std::string EntryPath,
unsigned Task)
: NativeObjectStream(std::move(OS)), AddFile(AddFile),
TempFilename(TempFilename), EntryPath(EntryPath), Task(Task) {}

~CacheStream() {
// Make sure the file is closed before committing it.
OS.reset();
commitEntry(TempFilename, EntryPath);
AddFile(Task, EntryPath);
}
};

return [=](size_t Task) -> std::unique_ptr<NativeObjectStream> {
// Write to a temporary to avoid race condition
int TempFD;
SmallString<64> TempFilename;
std::error_code EC =
sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
if (EC) {
errs() << "Error: " << EC.message() << "\n";
report_fatal_error("ThinLTO: Can't get a temporary file");
}

// This CacheStream will move the temporary file into the cache when done.
return make_unique<CacheStream>(
llvm::make_unique<raw_fd_ostream>(TempFD, /* ShouldClose */ true),
AddFile, TempFilename.str(), EntryPath.str(), Task);
};
};
}

0 comments on commit 80186a5

Please sign in to comment.