Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions cross-project-tests/dtlto/dtlto-cache.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
REQUIRES: x86-registered-target, ld.lld

# Show that the ThinLTO cache works with DTLTO.

RUN: rm -rf %t && split-file %s %t && cd %t

# Compile source files into bitcode files.
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c

# Execute the linker and check that the cache is populated.
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
RUN: main.o foo.o -o populate1.elf \
RUN: -Wl,--thinlto-distributor=%python \
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps

# Check that there are two backend compilation jobs occurred.
RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3
RUN: ls cache.dir/llvmcache.timestamp
RUN: ls cache.dir | count 3

# Execute the linker again and check that a fully populated cache is used correctly,
# i.e., no additional cache entries are created for cache hits.
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
RUN: main.o foo.o -o populate2.elf \
RUN: -Wl,--thinlto-distributor=%python \
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps

# Check that there are no backend compilation jobs occurred.
RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 1
RUN: ls cache.dir | count 3

RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c foo.c -o foo.O0.o
RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c main.c -o main.O0.o

# Execute the linker again and check that the cache is populated correctly when there
# are no cache hits but there are existing cache entries.
# As a side effect, this also verifies that the optimization level is considered when
# evaluating the cache entry key.

RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
RUN: main.O0.o foo.O0.o -o populate3.elf \
RUN: -Wl,--thinlto-distributor=%python \
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps

# Check that there are two new backend compilation jobs occurred.
RUN: grep -wo args populate3.*.dist-file.json | wc -l | grep -qx 3
RUN: ls cache.dir | count 5

RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c main-partial.c

# Execute the linker and check that everything works correctly with the partially populated cache;
# One more cache entry should be generated after this run.

RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
RUN: main-partial.o foo.o -o main-partial.elf \
RUN: -Wl,--thinlto-distributor=%python \
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps

# Check that there is one new backend compilation jobs occurred.
RUN: grep -wo args main-partial.*.dist-file.json | wc -l | grep -qx 2
RUN: ls cache.dir | count 6

#--- foo.c
volatile int foo_int;
__attribute__((retain)) int foo(int x) { return x + foo_int; }

#--- main.c
extern int foo(int x);
__attribute__((retain)) int main(int argc, char** argv) {
return foo(argc);
}

#--- main-partial.c
extern int foo(int x);
__attribute__((retain)) int main(int argc, char** argv) {
return foo(argc+1);
}
66 changes: 66 additions & 0 deletions cross-project-tests/dtlto/dtlto-thinlto-cache.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
REQUIRES: x86-registered-target, ld.lld

# This test verifies that a cache populated by a ThinLTO link is not reused by a DTLTO link and vice versa.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably make this "by an in-process ThinLTO link", since DTLTO is also ThinLTO.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto other places below


RUN: rm -rf %t && split-file %s %t && cd %t

# Compile source files into bitcode files.
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c

# Execute the linker and check that ThinLTO cache is populated.
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
RUN: main.o foo.o -o main.elf \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps

RUN: ls cache.dir/llvmcache.timestamp
RUN: ls cache.dir | count 3

# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache, implying they do not share entries.
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
RUN: main.o foo.o -o populate1.elf \
RUN: -Wl,--thinlto-distributor=%python \
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps

# Check that there are two backend compilation jobs occurred.
RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3
RUN: ls cache.dir | count 5

# Clean up cache directory.
RUN: rm -rf cache.dir

# Execute the linker and check that DTLTO cache is populated.
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
RUN: main.o foo.o -o populate2.elf \
RUN: -Wl,--thinlto-distributor=%python \
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps

# Check that there are two backend compilation jobs occurred.
RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 3
RUN: ls cache.dir/llvmcache.timestamp
RUN: ls cache.dir | count 3

# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache,
# implying they do not share entries.
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
RUN: main.o foo.o -o main.elf \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps

RUN: ls cache.dir | count 5

#--- foo.c
volatile int foo_int;
__attribute__((retain)) int foo(int x) { return x + foo_int; }

#--- main.c
extern int foo(int x);
__attribute__((retain)) int main(int argc, char** argv) {
return foo(argc);
}
1 change: 1 addition & 0 deletions llvm/include/llvm/LTO/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ struct Config {
LLVM_ABI Error addSaveTemps(std::string OutputFileName,
bool UseInputModulePath = false,
const DenseSet<StringRef> &SaveTempsArgs = {});
mutable uint8_t Dtlto = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bool? Also, add a comment about the need for this variable.

};

struct LTOLLVMDiagnosticHandler : public DiagnosticHandler {
Expand Down
131 changes: 98 additions & 33 deletions llvm/lib/LTO/LTO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ std::string llvm::computeLTOCacheKey(
AddString(Conf.OverrideTriple);
AddString(Conf.DefaultTriple);
AddString(Conf.DwoDir);
AddUint8(Conf.Dtlto);

// Include the hash for the current module
auto ModHash = Index.getModuleHash(ModuleID);
Expand Down Expand Up @@ -2244,14 +2245,17 @@ class OutOfProcessThinBackend : public CGThinBackend {

SmallVector<StringRef, 0> CodegenOptions;
DenseSet<StringRef> CommonInputs;

std::atomic<uint64_t> CachedJobs{0};
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

size_t should be enough

Copy link
Contributor Author

@romanova-ekaterina romanova-ekaterina Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you suggesting to change it to
std::atomic<size_t> CachedJobs{0};
or to
size_t CachedJobs{0};

This variable is changed in the thread pool. We cannot safely get rid of atomic because of a data race.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vitaly, I was wondering if there are some other comments related to this PR?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment description

// Information specific to individual backend compilation job.
struct Job {
unsigned Task;
StringRef ModuleID;
StringRef NativeObjectPath;
StringRef SummaryIndexPath;
ImportsFilesContainer ImportsFiles;
std::string CacheKey;
AddStreamFn CacheAddStream;
bool Cached = false;
};
// The set of backend compilations jobs.
SmallVector<Job> Jobs;
Expand All @@ -2265,12 +2269,15 @@ class OutOfProcessThinBackend : public CGThinBackend {
// The target triple to supply for backend compilations.
llvm::Triple Triple;

// Cache
FileCache Cache;

public:
OutOfProcessThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
AddStreamFn AddStream, FileCache CacheFn, lto::IndexWriteCallback OnWrite,
bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
StringRef LinkerOutputFile, StringRef Distributor,
ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
Expand All @@ -2280,14 +2287,16 @@ class OutOfProcessThinBackend : public CGThinBackend {
ShouldEmitImportsFiles, ThinLTOParallelism),
LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor),
DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler),
RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {}
RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps),
Cache(std::move(CacheFn)) {}

virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
llvm::Triple Triple) override {
UID = itostr(sys::Process::getProcessId());
Jobs.resize((size_t)ThinLTONumTasks);
this->ThinLTOTaskOffset = ThinLTOTaskOffset;
this->Triple = Triple;
this->Conf.Dtlto = 1;
}

Error start(
Expand All @@ -2304,13 +2313,14 @@ class OutOfProcessThinBackend : public CGThinBackend {
itostr(Task) + "." + UID + ".native.o");

Job &J = Jobs[Task - ThinLTOTaskOffset];
J = {
Task,
ModulePath,
Saver.save(ObjFilePath.str()),
Saver.save(ObjFilePath.str() + ".thinlto.bc"),
{} // Filled in by emitFiles below.
};
J = {Task,
ModulePath,
Saver.save(ObjFilePath.str()),
Saver.save(ObjFilePath.str() + ".thinlto.bc"),
{}, // Filled in by emitFiles below.
"",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document constant values with comment e.g. /*CacheKey=*/

nullptr,
false};

assert(ModuleToDefinedGVSummaries.count(ModulePath));

Expand All @@ -2326,6 +2336,35 @@ class OutOfProcessThinBackend : public CGThinBackend {
else
Err = std::move(E);
}

if (Cache.isValid() &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This lambda is getting pretty large. Can you refactor it into a runThinLTOBackendThread helper, and as much as possible structure it the same way as InProcessThinBackend::runThinLTOBackendThread ? Eventually it may make sense to move some of this into the base CGThinBackend, but at the very least, making the flow similar will help with comparing the functionality and code maintenance.

CombinedIndex.modulePaths().count(J.ModuleID) &&
all_of(CombinedIndex.getModuleHash(J.ModuleID),
[](uint32_t V) { return V != 0; })) {

const GVSummaryMapTy &DefinedGlobals =
ModuleToDefinedGVSummaries.find(ModulePath)->second;

// Compute and store a bitcode module cache key.
J.CacheKey = computeLTOCacheKey(
Conf, CombinedIndex, ModulePath, ImportList, ExportList,
ResolvedODR, DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);

// Check if we have something in the cache.
auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
if (Error E = CacheAddStreamExp.takeError()) {
Err = joinErrors(std::move(*Err), std::move(E));
} else {
AddStreamFn &CacheAddStream = *CacheAddStreamExp;
if (!CacheAddStream) {
J.Cached = true; // Cache hit, mark the job as cached.
CachedJobs.fetch_add(1);
} else {
// Cache miss, save cache 'add stream' function for a later use.
J.CacheAddStream = std::move(CacheAddStream);
}
}
}
},
std::ref(J), std::ref(ImportList));

Expand Down Expand Up @@ -2417,6 +2456,9 @@ class OutOfProcessThinBackend : public CGThinBackend {
for (const auto &J : Jobs) {
assert(J.Task != 0);

if (!Cache.getCacheDirectoryPath().empty() && J.Cached)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When would Cache.getCacheDirectoryPath().empty() be true at the same time as J.Cached?

continue;

SmallVector<StringRef, 2> Inputs;
SmallVector<StringRef, 1> Outputs;

Expand Down Expand Up @@ -2488,20 +2530,26 @@ class OutOfProcessThinBackend : public CGThinBackend {
removeFile(JsonFile);
});

SmallVector<StringRef, 3> Args = {DistributorPath};
llvm::append_range(Args, DistributorArgs);
Args.push_back(JsonFile);
std::string ErrMsg;
if (sys::ExecuteAndWait(Args[0], Args,
/*Env=*/std::nullopt, /*Redirects=*/{},
/*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) {
return make_error<StringError>(
BCError + "distributor execution failed" +
(!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
inconvertibleErrorCode());
if (CachedJobs.load() < Jobs.size()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment. I don't have a good understanding of what the CachedJobs count is for and how that works.

SmallVector<StringRef, 3> Args = {DistributorPath};
llvm::append_range(Args, DistributorArgs);
Args.push_back(JsonFile);
std::string ErrMsg;
if (sys::ExecuteAndWait(Args[0], Args,
/*Env=*/std::nullopt, /*Redirects=*/{},
/*SecondsToWait=*/0, /*MemoryLimit=*/0,
&ErrMsg)) {
return make_error<StringError>(
BCError + "distributor execution failed" +
(!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
inconvertibleErrorCode());
}
}

for (auto &Job : Jobs) {
if (Cache.isValid() && !Job.CacheKey.empty())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can Cache.isValid be false if CacheKey is not empty?

if (Job.Cached)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Combine with enclosing if check on prior line?

continue;
// Load the native object from a file into a memory buffer
// and store its contents in the output buffer.
auto ObjFileMbOrErr =
Expand All @@ -2512,15 +2560,32 @@ class OutOfProcessThinBackend : public CGThinBackend {
BCError + "cannot open native object file: " +
Job.NativeObjectPath + ": " + EC.message(),
inconvertibleErrorCode());
auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
if (Error Err = StreamOrErr.takeError())
report_fatal_error(std::move(Err));
auto &Stream = *StreamOrErr->get();
*Stream.OS << ObjFileMbOrErr->get()->getMemBufferRef().getBuffer();
if (Error Err = Stream.commit())
report_fatal_error(std::move(Err));
}

MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef();
if (Cache.isValid() && Job.CacheAddStream) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can Cache.isValid be false if CacheAddStream is non-null?

// Obtain a file stream for a storing a cache entry.
auto CachedFileStreamOrErr = Job.CacheAddStream(Job.Task, Job.ModuleID);
if (!CachedFileStreamOrErr)
return joinErrors(
CachedFileStreamOrErr.takeError(),
createStringError(inconvertibleErrorCode(),
"Cannot get a cache file stream: %s",
Job.NativeObjectPath.data()));
// Store a file buffer into the cache stream.
auto &CacheStream = *(CachedFileStreamOrErr->get());
*(CacheStream.OS) << ObjFileMbRef.getBuffer();
if (Error Err = CacheStream.commit())
return Err;
} else {
auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
if (Error Err = StreamOrErr.takeError())
report_fatal_error(std::move(Err));
auto &Stream = *StreamOrErr->get();
*Stream.OS << ObjFileMbRef.getBuffer();
if (Error Err = Stream.commit())
report_fatal_error(std::move(Err));
}
}
return Error::success();
}
};
Expand All @@ -2535,12 +2600,12 @@ ThinBackend lto::createOutOfProcessThinBackend(
auto Func =
[=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, FileCache /*Cache*/) {
AddStreamFn AddStream, FileCache Cache) {
return std::make_unique<OutOfProcessThinBackend>(
Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles,
LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler,
RemoteCompilerArgs, SaveTemps);
AddStream, Cache, OnWrite, ShouldEmitIndexFiles,
ShouldEmitImportsFiles, LinkerOutputFile, Distributor,
DistributorArgs, RemoteCompiler, RemoteCompilerArgs, SaveTemps);
};
return ThinBackend(Func, Parallelism);
}