Skip to content

Commit 1ecbb72

Browse files
[DTLTO] [LLVM] Initial DTLTO cache implementation
This patch implements DTLTO cache. DTLTO cache is implemented the same way as ThinLTO cache. In fact the same class Cache is used for both of them. Because parameters for codegen are different for DTLTO and ThinLTO (DTLTO codegen is done by invoking clang and its codegen parameters are not fully synchronized with codegen parameters used by LTO backend). The object files generated by DTLTO and ThinLTO might be different and shouldn't be mixed. If ThinLTO and DTLTO share the same cache directory, the cache file won't interfere with each other. I added a couple of test files in cross-project-test/dtlto directory, but if more tests are required for initial implementation, I could add them.
1 parent 8f59a94 commit 1ecbb72

File tree

4 files changed

+254
-33
lines changed

4 files changed

+254
-33
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
REQUIRES: x86-registered-target, ld.lld
2+
3+
# Show that the ThinLTO cache works with DTLTO.
4+
5+
RUN: rm -rf %t && split-file %s %t && cd %t
6+
7+
# Compile source files into bitcode files.
8+
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c
9+
10+
# Execute the linker and check that the cache is populated.
11+
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
12+
RUN: main.o foo.o -o populate1.elf \
13+
RUN: -Wl,--thinlto-distributor=%python \
14+
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
15+
RUN: -Wl,--thinlto-remote-compiler=%clang \
16+
RUN: -Wl,--thinlto-cache-dir=cache.dir \
17+
RUN: -Wl,--save-temps
18+
19+
# Check that there are two backend compilation jobs occurred.
20+
RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3
21+
RUN: ls cache.dir/llvmcache.timestamp
22+
RUN: ls cache.dir | count 3
23+
24+
# Execute the linker again and check that a fully populated cache is used correctly,
25+
# i.e., no additional cache entries are created for cache hits.
26+
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
27+
RUN: main.o foo.o -o populate2.elf \
28+
RUN: -Wl,--thinlto-distributor=%python \
29+
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
30+
RUN: -Wl,--thinlto-remote-compiler=%clang \
31+
RUN: -Wl,--thinlto-cache-dir=cache.dir \
32+
RUN: -Wl,--save-temps
33+
34+
# Check that there are no backend compilation jobs occurred.
35+
RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 1
36+
RUN: ls cache.dir | count 3
37+
38+
RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c foo.c -o foo.O0.o
39+
RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c main.c -o main.O0.o
40+
41+
# Execute the linker again and check that the cache is populated correctly when there
42+
# are no cache hits but there are existing cache entries.
43+
# As a side effect, this also verifies that the optimization level is considered when
44+
# evaluating the cache entry key.
45+
46+
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
47+
RUN: main.O0.o foo.O0.o -o populate3.elf \
48+
RUN: -Wl,--thinlto-distributor=%python \
49+
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
50+
RUN: -Wl,--thinlto-remote-compiler=%clang \
51+
RUN: -Wl,--thinlto-cache-dir=cache.dir \
52+
RUN: -Wl,--save-temps
53+
54+
# Check that there are two new backend compilation jobs occurred.
55+
RUN: grep -wo args populate3.*.dist-file.json | wc -l | grep -qx 3
56+
RUN: ls cache.dir | count 5
57+
58+
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c main-partial.c
59+
60+
# Execute the linker and check that everything works correctly with the partially populated cache;
61+
# One more cache entry should be generated after this run.
62+
63+
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
64+
RUN: main-partial.o foo.o -o main-partial.elf \
65+
RUN: -Wl,--thinlto-distributor=%python \
66+
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
67+
RUN: -Wl,--thinlto-remote-compiler=%clang \
68+
RUN: -Wl,--thinlto-cache-dir=cache.dir \
69+
RUN: -Wl,--save-temps
70+
71+
# Check that there is one new backend compilation jobs occurred.
72+
RUN: grep -wo args main-partial.*.dist-file.json | wc -l | grep -qx 2
73+
RUN: ls cache.dir | count 6
74+
75+
#--- foo.c
76+
volatile int foo_int;
77+
__attribute__((retain)) int foo(int x) { return x + foo_int; }
78+
79+
#--- main.c
80+
extern int foo(int x);
81+
__attribute__((retain)) int main(int argc, char** argv) {
82+
return foo(argc);
83+
}
84+
85+
#--- main-partial.c
86+
extern int foo(int x);
87+
__attribute__((retain)) int main(int argc, char** argv) {
88+
return foo(argc+1);
89+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
REQUIRES: x86-registered-target, ld.lld
2+
3+
# This test verifies that a cache populated by a ThinLTO link is not reused by a DTLTO link and vice versa.
4+
5+
RUN: rm -rf %t && split-file %s %t && cd %t
6+
7+
# Compile source files into bitcode files.
8+
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c
9+
10+
# Execute the linker and check that ThinLTO cache is populated.
11+
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
12+
RUN: main.o foo.o -o main.elf \
13+
RUN: -Wl,--thinlto-cache-dir=cache.dir \
14+
RUN: -Wl,--save-temps
15+
16+
RUN: ls cache.dir/llvmcache.timestamp
17+
RUN: ls cache.dir | count 3
18+
19+
# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache, implying they do not share entries.
20+
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
21+
RUN: main.o foo.o -o populate1.elf \
22+
RUN: -Wl,--thinlto-distributor=%python \
23+
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
24+
RUN: -Wl,--thinlto-remote-compiler=%clang \
25+
RUN: -Wl,--thinlto-cache-dir=cache.dir \
26+
RUN: -Wl,--save-temps
27+
28+
# Check that there are two backend compilation jobs occurred.
29+
RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3
30+
RUN: ls cache.dir | count 5
31+
32+
# Clean up cache directory.
33+
RUN: rm -rf cache.dir
34+
35+
# Execute the linker and check that DTLTO cache is populated.
36+
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
37+
RUN: main.o foo.o -o populate2.elf \
38+
RUN: -Wl,--thinlto-distributor=%python \
39+
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
40+
RUN: -Wl,--thinlto-remote-compiler=%clang \
41+
RUN: -Wl,--thinlto-cache-dir=cache.dir \
42+
RUN: -Wl,--save-temps
43+
44+
# Check that there are two backend compilation jobs occurred.
45+
RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 3
46+
RUN: ls cache.dir/llvmcache.timestamp
47+
RUN: ls cache.dir | count 3
48+
49+
# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache,
50+
# implying they do not share entries.
51+
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
52+
RUN: main.o foo.o -o main.elf \
53+
RUN: -Wl,--thinlto-cache-dir=cache.dir \
54+
RUN: -Wl,--save-temps
55+
56+
RUN: ls cache.dir | count 5
57+
58+
#--- foo.c
59+
volatile int foo_int;
60+
__attribute__((retain)) int foo(int x) { return x + foo_int; }
61+
62+
#--- main.c
63+
extern int foo(int x);
64+
__attribute__((retain)) int main(int argc, char** argv) {
65+
return foo(argc);
66+
}

llvm/include/llvm/LTO/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ struct Config {
281281
LLVM_ABI Error addSaveTemps(std::string OutputFileName,
282282
bool UseInputModulePath = false,
283283
const DenseSet<StringRef> &SaveTempsArgs = {});
284+
mutable uint8_t Dtlto = 0;
284285
};
285286

286287
struct LTOLLVMDiagnosticHandler : public DiagnosticHandler {

llvm/lib/LTO/LTO.cpp

Lines changed: 98 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ std::string llvm::computeLTOCacheKey(
168168
AddString(Conf.OverrideTriple);
169169
AddString(Conf.DefaultTriple);
170170
AddString(Conf.DwoDir);
171+
AddUint8(Conf.Dtlto);
171172

172173
// Include the hash for the current module
173174
auto ModHash = Index.getModuleHash(ModuleID);
@@ -2244,14 +2245,17 @@ class OutOfProcessThinBackend : public CGThinBackend {
22442245

22452246
SmallVector<StringRef, 0> CodegenOptions;
22462247
DenseSet<StringRef> CommonInputs;
2247-
2248+
std::atomic<uint64_t> CachedJobs{0};
22482249
// Information specific to individual backend compilation job.
22492250
struct Job {
22502251
unsigned Task;
22512252
StringRef ModuleID;
22522253
StringRef NativeObjectPath;
22532254
StringRef SummaryIndexPath;
22542255
ImportsFilesContainer ImportsFiles;
2256+
std::string CacheKey;
2257+
AddStreamFn CacheAddStream;
2258+
bool Cached = false;
22552259
};
22562260
// The set of backend compilations jobs.
22572261
SmallVector<Job> Jobs;
@@ -2265,12 +2269,15 @@ class OutOfProcessThinBackend : public CGThinBackend {
22652269
// The target triple to supply for backend compilations.
22662270
llvm::Triple Triple;
22672271

2272+
// Cache
2273+
FileCache Cache;
2274+
22682275
public:
22692276
OutOfProcessThinBackend(
22702277
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
22712278
ThreadPoolStrategy ThinLTOParallelism,
22722279
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2273-
AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
2280+
AddStreamFn AddStream, FileCache CacheFn, lto::IndexWriteCallback OnWrite,
22742281
bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
22752282
StringRef LinkerOutputFile, StringRef Distributor,
22762283
ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
@@ -2280,14 +2287,16 @@ class OutOfProcessThinBackend : public CGThinBackend {
22802287
ShouldEmitImportsFiles, ThinLTOParallelism),
22812288
LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor),
22822289
DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler),
2283-
RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {}
2290+
RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps),
2291+
Cache(std::move(CacheFn)) {}
22842292

22852293
virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
22862294
llvm::Triple Triple) override {
22872295
UID = itostr(sys::Process::getProcessId());
22882296
Jobs.resize((size_t)ThinLTONumTasks);
22892297
this->ThinLTOTaskOffset = ThinLTOTaskOffset;
22902298
this->Triple = Triple;
2299+
this->Conf.Dtlto = 1;
22912300
}
22922301

22932302
Error start(
@@ -2304,13 +2313,14 @@ class OutOfProcessThinBackend : public CGThinBackend {
23042313
itostr(Task) + "." + UID + ".native.o");
23052314

23062315
Job &J = Jobs[Task - ThinLTOTaskOffset];
2307-
J = {
2308-
Task,
2309-
ModulePath,
2310-
Saver.save(ObjFilePath.str()),
2311-
Saver.save(ObjFilePath.str() + ".thinlto.bc"),
2312-
{} // Filled in by emitFiles below.
2313-
};
2316+
J = {Task,
2317+
ModulePath,
2318+
Saver.save(ObjFilePath.str()),
2319+
Saver.save(ObjFilePath.str() + ".thinlto.bc"),
2320+
{}, // Filled in by emitFiles below.
2321+
"",
2322+
nullptr,
2323+
false};
23142324

23152325
assert(ModuleToDefinedGVSummaries.count(ModulePath));
23162326

@@ -2326,6 +2336,35 @@ class OutOfProcessThinBackend : public CGThinBackend {
23262336
else
23272337
Err = std::move(E);
23282338
}
2339+
2340+
if (Cache.isValid() &&
2341+
CombinedIndex.modulePaths().count(J.ModuleID) &&
2342+
all_of(CombinedIndex.getModuleHash(J.ModuleID),
2343+
[](uint32_t V) { return V != 0; })) {
2344+
2345+
const GVSummaryMapTy &DefinedGlobals =
2346+
ModuleToDefinedGVSummaries.find(ModulePath)->second;
2347+
2348+
// Compute and store a bitcode module cache key.
2349+
J.CacheKey = computeLTOCacheKey(
2350+
Conf, CombinedIndex, ModulePath, ImportList, ExportList,
2351+
ResolvedODR, DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
2352+
2353+
// Check if we have something in the cache.
2354+
auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
2355+
if (Error E = CacheAddStreamExp.takeError()) {
2356+
Err = joinErrors(std::move(*Err), std::move(E));
2357+
} else {
2358+
AddStreamFn &CacheAddStream = *CacheAddStreamExp;
2359+
if (!CacheAddStream) {
2360+
J.Cached = true; // Cache hit, mark the job as cached.
2361+
CachedJobs.fetch_add(1);
2362+
} else {
2363+
// Cache miss, save cache 'add stream' function for a later use.
2364+
J.CacheAddStream = std::move(CacheAddStream);
2365+
}
2366+
}
2367+
}
23292368
},
23302369
std::ref(J), std::ref(ImportList));
23312370

@@ -2417,6 +2456,9 @@ class OutOfProcessThinBackend : public CGThinBackend {
24172456
for (const auto &J : Jobs) {
24182457
assert(J.Task != 0);
24192458

2459+
if (!Cache.getCacheDirectoryPath().empty() && J.Cached)
2460+
continue;
2461+
24202462
SmallVector<StringRef, 2> Inputs;
24212463
SmallVector<StringRef, 1> Outputs;
24222464

@@ -2488,20 +2530,26 @@ class OutOfProcessThinBackend : public CGThinBackend {
24882530
removeFile(JsonFile);
24892531
});
24902532

2491-
SmallVector<StringRef, 3> Args = {DistributorPath};
2492-
llvm::append_range(Args, DistributorArgs);
2493-
Args.push_back(JsonFile);
2494-
std::string ErrMsg;
2495-
if (sys::ExecuteAndWait(Args[0], Args,
2496-
/*Env=*/std::nullopt, /*Redirects=*/{},
2497-
/*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) {
2498-
return make_error<StringError>(
2499-
BCError + "distributor execution failed" +
2500-
(!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
2501-
inconvertibleErrorCode());
2533+
if (CachedJobs.load() < Jobs.size()) {
2534+
SmallVector<StringRef, 3> Args = {DistributorPath};
2535+
llvm::append_range(Args, DistributorArgs);
2536+
Args.push_back(JsonFile);
2537+
std::string ErrMsg;
2538+
if (sys::ExecuteAndWait(Args[0], Args,
2539+
/*Env=*/std::nullopt, /*Redirects=*/{},
2540+
/*SecondsToWait=*/0, /*MemoryLimit=*/0,
2541+
&ErrMsg)) {
2542+
return make_error<StringError>(
2543+
BCError + "distributor execution failed" +
2544+
(!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
2545+
inconvertibleErrorCode());
2546+
}
25022547
}
25032548

25042549
for (auto &Job : Jobs) {
2550+
if (Cache.isValid() && !Job.CacheKey.empty())
2551+
if (Job.Cached)
2552+
continue;
25052553
// Load the native object from a file into a memory buffer
25062554
// and store its contents in the output buffer.
25072555
auto ObjFileMbOrErr =
@@ -2512,15 +2560,32 @@ class OutOfProcessThinBackend : public CGThinBackend {
25122560
BCError + "cannot open native object file: " +
25132561
Job.NativeObjectPath + ": " + EC.message(),
25142562
inconvertibleErrorCode());
2515-
auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
2516-
if (Error Err = StreamOrErr.takeError())
2517-
report_fatal_error(std::move(Err));
2518-
auto &Stream = *StreamOrErr->get();
2519-
*Stream.OS << ObjFileMbOrErr->get()->getMemBufferRef().getBuffer();
2520-
if (Error Err = Stream.commit())
2521-
report_fatal_error(std::move(Err));
2522-
}
25232563

2564+
MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef();
2565+
if (Cache.isValid() && Job.CacheAddStream) {
2566+
// Obtain a file stream for a storing a cache entry.
2567+
auto CachedFileStreamOrErr = Job.CacheAddStream(Job.Task, Job.ModuleID);
2568+
if (!CachedFileStreamOrErr)
2569+
return joinErrors(
2570+
CachedFileStreamOrErr.takeError(),
2571+
createStringError(inconvertibleErrorCode(),
2572+
"Cannot get a cache file stream: %s",
2573+
Job.NativeObjectPath.data()));
2574+
// Store a file buffer into the cache stream.
2575+
auto &CacheStream = *(CachedFileStreamOrErr->get());
2576+
*(CacheStream.OS) << ObjFileMbRef.getBuffer();
2577+
if (Error Err = CacheStream.commit())
2578+
return Err;
2579+
} else {
2580+
auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
2581+
if (Error Err = StreamOrErr.takeError())
2582+
report_fatal_error(std::move(Err));
2583+
auto &Stream = *StreamOrErr->get();
2584+
*Stream.OS << ObjFileMbRef.getBuffer();
2585+
if (Error Err = Stream.commit())
2586+
report_fatal_error(std::move(Err));
2587+
}
2588+
}
25242589
return Error::success();
25252590
}
25262591
};
@@ -2535,12 +2600,12 @@ ThinBackend lto::createOutOfProcessThinBackend(
25352600
auto Func =
25362601
[=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
25372602
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2538-
AddStreamFn AddStream, FileCache /*Cache*/) {
2603+
AddStreamFn AddStream, FileCache Cache) {
25392604
return std::make_unique<OutOfProcessThinBackend>(
25402605
Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
2541-
AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles,
2542-
LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler,
2543-
RemoteCompilerArgs, SaveTemps);
2606+
AddStream, Cache, OnWrite, ShouldEmitIndexFiles,
2607+
ShouldEmitImportsFiles, LinkerOutputFile, Distributor,
2608+
DistributorArgs, RemoteCompiler, RemoteCompilerArgs, SaveTemps);
25442609
};
25452610
return ThinBackend(Func, Parallelism);
25462611
}

0 commit comments

Comments
 (0)