Skip to content

Commit 2edd897

Browse files
NuriAmariNuri Amari
andauthored
Make WriteIndexesThinBackend multi threaded (#109847)
We've noticed that for large builds executing thin-link can take on the order of 10s of minutes. We are only using a single thread to write the sharded indices and import files for each input bitcode file. While we need to ensure the index file produced lists modules in a deterministic order, that doesn't prevent us from executing the rest of the work in parallel. In this change we use a thread pool to execute as much of the backend's work as possible in parallel. In local testing on a machine with 80 cores, this change makes a thin-link for ~100,000 input files run in ~2 minutes. Without this change it takes upwards of 10 minutes. --------- Co-authored-by: Nuri Amari <nuriamari@fb.com>
1 parent 2fe1f84 commit 2edd897

File tree

15 files changed

+94
-61
lines changed

15 files changed

+94
-61
lines changed

lld/COFF/LTO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) {
118118
if (ctx.config.thinLTOIndexOnly) {
119119
auto OnIndexWrite = [&](StringRef S) { thinIndices.erase(S); };
120120
backend = lto::createWriteIndexesThinBackend(
121+
llvm::hardware_concurrency(ctx.config.thinLTOJobs),
121122
std::string(ctx.config.thinLTOPrefixReplaceOld),
122123
std::string(ctx.config.thinLTOPrefixReplaceNew),
123124
std::string(ctx.config.thinLTOPrefixReplaceNativeObject),

lld/ELF/LTO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
179179
auto onIndexWrite = [&](StringRef s) { thinIndices.erase(s); };
180180
if (ctx.arg.thinLTOIndexOnly) {
181181
backend = lto::createWriteIndexesThinBackend(
182+
llvm::hardware_concurrency(ctx.arg.thinLTOJobs),
182183
std::string(ctx.arg.thinLTOPrefixReplaceOld),
183184
std::string(ctx.arg.thinLTOPrefixReplaceNew),
184185
std::string(ctx.arg.thinLTOPrefixReplaceNativeObject),

lld/MachO/LTO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ BitcodeCompiler::BitcodeCompiler() {
8787
auto onIndexWrite = [&](StringRef S) { thinIndices.erase(S); };
8888
if (config->thinLTOIndexOnly) {
8989
backend = lto::createWriteIndexesThinBackend(
90+
llvm::hardware_concurrency(config->thinLTOJobs),
9091
std::string(config->thinLTOPrefixReplaceOld),
9192
std::string(config->thinLTOPrefixReplaceNew),
9293
std::string(config->thinLTOPrefixReplaceNativeObject),

lld/test/COFF/thinlto-emit-imports.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
; RUN: not lld-link -entry:main -thinlto-index-only \
3636
; RUN: -thinlto-emit-imports-files %t1.obj %t2.obj %t3.obj \
3737
; RUN: -out:%t4.exe 2>&1 | FileCheck -DMSG=%errc_EACCES %s --check-prefix=ERR
38-
; ERR: cannot open {{.*}}3.obj.imports: [[MSG]]
38+
; ERR: 'cannot open {{.*}}3.obj.imports': [[MSG]]
3939

4040
; Ensure lld doesn't generate import files when thinlto-index-only is not enabled
4141
; RUN: rm -f %t1.obj.imports

lld/test/ELF/lto/thinlto-cant-write-index.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
; RUN: chmod u-w %t2.o.thinlto.bc
1111
; RUN: not ld.lld --plugin-opt=thinlto-index-only -shared %t1.o %t2.o -o /dev/null 2>&1 | FileCheck -DMSG=%errc_EACCES %s
1212
; RUN: chmod u+w %t2.o.thinlto.bc
13-
; CHECK: cannot open {{.*}}2.o.thinlto.bc: [[MSG]]
13+
; CHECK: 'cannot open {{.*}}2.o.thinlto.bc': [[MSG]]
1414

1515
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
1616
target triple = "x86_64-unknown-linux-gnu"

lld/test/ELF/lto/thinlto-emit-imports.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
; RUN: touch %t3.o.imports
1111
; RUN: chmod 400 %t3.o.imports
1212
; RUN: not ld.lld --plugin-opt=thinlto-index-only --plugin-opt=thinlto-emit-imports-files -shared %t1.o %t2.o %t3.o -o /dev/null 2>&1 | FileCheck -DMSG=%errc_EACCES %s --check-prefix=ERR
13-
; ERR: cannot open {{.*}}3.o.imports: [[MSG]]
13+
; ERR: 'cannot open {{.*}}3.o.imports': [[MSG]]
1414

1515
; RUN: rm -f %t1.o.imports %t2.o.imports rm -f %t3.o.imports
1616
; RUN: ld.lld --plugin-opt=thinlto-emit-imports-files -shared %t1.o %t2.o %t3.o -o %t4

lld/test/MachO/thinlto-emit-imports.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
; RUN: chmod 400 %t3.o.imports
3434
; RUN: not %lld --thinlto-index-only --thinlto-emit-imports-files -dylib %t1.o %t2.o %t3.o -o /dev/null 2>&1 \
3535
; RUN: | FileCheck -DMSG=%errc_EACCES %s --check-prefix=ERR
36-
; ERR: cannot open {{.*}}3.o.imports: [[MSG]]
36+
; ERR: 'cannot open {{.*}}3.o.imports': [[MSG]]
3737

3838
; Ensure lld doesn't generate import files when thinlto-index-only is not enabled
3939
; RUN: rm -f %t1.o.imports

llvm/include/llvm/LTO/LTO.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism,
231231
/// the objects with NativeObjectPrefix instead of NewPrefix. OnWrite is
232232
/// callback which receives module identifier and notifies LTO user that index
233233
/// file for the module (and optionally imports file) was created.
234-
ThinBackend createWriteIndexesThinBackend(std::string OldPrefix,
234+
ThinBackend createWriteIndexesThinBackend(ThreadPoolStrategy Parallelism,
235+
std::string OldPrefix,
235236
std::string NewPrefix,
236237
std::string NativeObjectPrefix,
237238
bool ShouldEmitImportsFiles,

llvm/include/llvm/Support/Threading.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,18 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
188188
return S;
189189
}
190190

191+
/// Like hardware_concurrency() above, but builds a strategy
192+
/// based on the rules described for get_threadpool_strategy().
193+
/// If \p Num is invalid, returns a default strategy where one thread per
194+
/// hardware core is used.
195+
inline ThreadPoolStrategy hardware_concurrency(StringRef Num) {
196+
std::optional<ThreadPoolStrategy> S =
197+
get_threadpool_strategy(Num, hardware_concurrency());
198+
if (S)
199+
return *S;
200+
return hardware_concurrency();
201+
}
202+
191203
/// Returns an optimal thread strategy to execute specified amount of tasks.
192204
/// This strategy should prevent us from creating too many threads if we
193205
/// occasionaly have an unexpectedly small amount of tasks.

llvm/include/llvm/Transforms/IPO/FunctionImport.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -417,9 +417,9 @@ void gatherImportedSummariesForModule(
417417
GVSummaryPtrSet &DecSummaries);
418418

419419
/// Emit into \p OutputFilename the files module \p ModulePath will import from.
420-
std::error_code
421-
EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename,
422-
const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex);
420+
Error EmitImportsFiles(
421+
StringRef ModulePath, StringRef OutputFilename,
422+
const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex);
423423

424424
/// Based on the information recorded in the summaries during global
425425
/// summary-based analysis:

0 commit comments

Comments
 (0)