Skip to content

Commit 2b24287

Browse files
johnno1962drodriguezellishg
authored
[lld][MachO] Multi-threaded preload of input files into memory (#147134)
This PR adds a new option to lld `--read-workers=20` that defers all disk I/o then performs it multithreaded so the process is never stalled waiting for the I/o of the page-in of mapped input files. This results in a saving of elapsed time. For a large link (iterating on Chromium) these are the baseline linkage times saving a single file and rebuilding (seconds inside Xcode): 26.01, 25.84, 26.15, 26.03, 27.10, 25.90, 25.86, 25.81, 25.80, 25.87 With the proposed code change, and using the `--read-workers=20` option, the linking times reduce to the following: 21.13, 20.35, 20.01, 20.01, 20.30, 20.39, 19.97, 20.23, 20.17, 20.23 The secret sauce is in the new function `multiThreadedPageIn()` in Driver.cpp. Without the option lld behaves as before. Edit: with subsequent commits I've taken this novel i/o approach to its full potential. Latest linking times are now: 13.2, 11.9, 12.12, 12.01, 11.99, 13.11, 11.93, 11.95, 12.18, 11.97 Chrome is still linking and running so it doesn't look like anything is broken. Despite being multi-threaded all memory access is readonly and the original code paths are not changed. All that is happening is the system is being asked to proactively page in files rather than waiting for processing to page fault which would otherwise stall the process. --------- Co-authored-by: Daniel Rodríguez Troitiño <drodrigueztroitino@gmail.com> Co-authored-by: Ellis Hoag <ellis.sparky.hoag@gmail.com>
1 parent ca03045 commit 2b24287

File tree

3 files changed

+172
-10
lines changed

3 files changed

+172
-10
lines changed

lld/MachO/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ struct Configuration {
186186
bool interposable = false;
187187
bool errorForArchMismatch = false;
188188
bool ignoreAutoLink = false;
189+
int readWorkers = 0;
189190
// ld64 allows invalid auto link options as long as the link succeeds. LLD
190191
// does not, but there are cases in the wild where the invalid linker options
191192
// exist. This allows users to ignore the specific invalid options in the case

lld/MachO/Driver.cpp

Lines changed: 168 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,10 @@
4444
#include "llvm/Support/FileSystem.h"
4545
#include "llvm/Support/Parallel.h"
4646
#include "llvm/Support/Path.h"
47+
#include "llvm/Support/Process.h"
4748
#include "llvm/Support/TarWriter.h"
4849
#include "llvm/Support/TargetSelect.h"
50+
#include "llvm/Support/Threading.h"
4951
#include "llvm/Support/TimeProfiler.h"
5052
#include "llvm/TargetParser/Host.h"
5153
#include "llvm/TextAPI/Architecture.h"
@@ -282,11 +284,117 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
282284
": Archive::children failed: " + toString(std::move(e)));
283285
}
284286

285-
static InputFile *addFile(StringRef path, LoadType loadType,
286-
bool isLazy = false, bool isExplicit = true,
287-
bool isBundleLoader = false,
288-
bool isForceHidden = false) {
289-
std::optional<MemoryBufferRef> buffer = readFile(path);
287+
struct DeferredFile {
288+
StringRef path;
289+
bool isLazy;
290+
MemoryBufferRef buffer;
291+
};
292+
using DeferredFiles = std::vector<DeferredFile>;
293+
294+
class SerialBackgroundQueue {
295+
std::deque<std::function<void()>> queue;
296+
std::thread *running;
297+
std::mutex mutex;
298+
299+
public:
300+
void queueWork(std::function<void()> work) {
301+
mutex.lock();
302+
if (running && queue.empty()) {
303+
mutex.unlock();
304+
running->join();
305+
mutex.lock();
306+
delete running;
307+
running = nullptr;
308+
}
309+
310+
if (work) {
311+
queue.emplace_back(std::move(work));
312+
if (!running)
313+
running = new std::thread([&]() {
314+
while (true) {
315+
mutex.lock();
316+
if (queue.empty()) {
317+
mutex.unlock();
318+
break;
319+
}
320+
auto work = std::move(queue.front());
321+
mutex.unlock();
322+
work();
323+
mutex.lock();
324+
queue.pop_front();
325+
mutex.unlock();
326+
}
327+
});
328+
}
329+
mutex.unlock();
330+
}
331+
};
332+
333+
// Most input files have been mapped but not yet paged in.
334+
// This code forces the page-ins on multiple threads so
335+
// the process is not stalled waiting on disk buffer i/o.
336+
void multiThreadedPageInBackground(DeferredFiles &deferred) {
337+
static const size_t pageSize = Process::getPageSizeEstimate();
338+
static const size_t largeArchive = 10 * 1024 * 1024;
339+
#ifndef NDEBUG
340+
using namespace std::chrono;
341+
std::atomic_int numDeferedFilesTouched = 0;
342+
static std::atomic_uint64_t totalBytes = 0;
343+
auto t0 = high_resolution_clock::now();
344+
#endif
345+
346+
auto preloadDeferredFile = [&](const DeferredFile &deferredFile) {
347+
const StringRef &buff = deferredFile.buffer.getBuffer();
348+
if (buff.size() > largeArchive)
349+
return;
350+
#ifndef NDEBUG
351+
totalBytes += buff.size();
352+
numDeferedFilesTouched += 1;
353+
#endif
354+
355+
// Reference all file's mmap'd pages to load them into memory.
356+
for (const char *page = buff.data(), *end = page + buff.size(); page < end;
357+
page += pageSize)
358+
LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
359+
};
360+
#if LLVM_ENABLE_THREADS
361+
{ // Create scope for waiting for the taskGroup
362+
std::atomic_size_t index = 0;
363+
llvm::parallel::TaskGroup taskGroup;
364+
for (int w = 0; w < config->readWorkers; w++)
365+
taskGroup.spawn([&index, &preloadDeferredFile, &deferred]() {
366+
while (true) {
367+
size_t localIndex = index.fetch_add(1);
368+
if (localIndex >= deferred.size())
369+
break;
370+
preloadDeferredFile(deferred[localIndex]);
371+
}
372+
});
373+
}
374+
#endif
375+
#ifndef NDEBUG
376+
auto dt = high_resolution_clock::now() - t0;
377+
if (Process::GetEnv("LLD_MULTI_THREAD_PAGE"))
378+
llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/"
379+
<< numDeferedFilesTouched << "/" << deferred.size() << "/"
380+
<< duration_cast<milliseconds>(dt).count() / 1000. << "\n";
381+
#endif
382+
}
383+
384+
static void multiThreadedPageIn(const DeferredFiles &deferred) {
385+
static SerialBackgroundQueue pageInQueue;
386+
pageInQueue.queueWork([=]() {
387+
DeferredFiles files = deferred;
388+
multiThreadedPageInBackground(files);
389+
});
390+
}
391+
392+
static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
393+
DeferredFiles *archiveContents, StringRef path,
394+
LoadType loadType, bool isLazy = false,
395+
bool isExplicit = true,
396+
bool isBundleLoader = false,
397+
bool isForceHidden = false) {
290398
if (!buffer)
291399
return nullptr;
292400
MemoryBufferRef mbref = *buffer;
@@ -379,6 +487,8 @@ static InputFile *addFile(StringRef path, LoadType loadType,
379487
continue;
380488
}
381489

490+
if (archiveContents)
491+
archiveContents->push_back({path, isLazy, *mb});
382492
if (!hasObjCSection(*mb))
383493
continue;
384494
if (Error e = file->fetch(c, "-ObjC"))
@@ -390,7 +500,8 @@ static InputFile *addFile(StringRef path, LoadType loadType,
390500
": Archive::children failed: " + toString(std::move(e)));
391501
}
392502
}
393-
file->addLazySymbols();
503+
if (!archiveContents || archiveContents->empty())
504+
file->addLazySymbols();
394505
loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad};
395506
newFile = file;
396507
break;
@@ -441,6 +552,24 @@ static InputFile *addFile(StringRef path, LoadType loadType,
441552
return newFile;
442553
}
443554

555+
static InputFile *addFile(StringRef path, LoadType loadType,
556+
bool isLazy = false, bool isExplicit = true,
557+
bool isBundleLoader = false,
558+
bool isForceHidden = false) {
559+
return processFile(readFile(path), nullptr, path, loadType, isLazy,
560+
isExplicit, isBundleLoader, isForceHidden);
561+
}
562+
563+
static void deferFile(StringRef path, bool isLazy, DeferredFiles &deferred) {
564+
std::optional<MemoryBufferRef> buffer = readFile(path);
565+
if (!buffer)
566+
return;
567+
if (config->readWorkers)
568+
deferred.push_back({path, isLazy, *buffer});
569+
else
570+
processFile(buffer, nullptr, path, LoadType::CommandLine, isLazy);
571+
}
572+
444573
static std::vector<StringRef> missingAutolinkWarnings;
445574
static void addLibrary(StringRef name, bool isNeeded, bool isWeak,
446575
bool isReexport, bool isHidden, bool isExplicit,
@@ -564,13 +693,14 @@ void macho::resolveLCLinkerOptions() {
564693
}
565694
}
566695

567-
static void addFileList(StringRef path, bool isLazy) {
696+
static void addFileList(StringRef path, bool isLazy,
697+
DeferredFiles &deferredFiles) {
568698
std::optional<MemoryBufferRef> buffer = readFile(path);
569699
if (!buffer)
570700
return;
571701
MemoryBufferRef mbref = *buffer;
572702
for (StringRef path : args::getLines(mbref))
573-
addFile(rerootPath(path), LoadType::CommandLine, isLazy);
703+
deferFile(rerootPath(path), isLazy, deferredFiles);
574704
}
575705

576706
// We expect sub-library names of the form "libfoo", which will match a dylib
@@ -1222,14 +1352,16 @@ static void createFiles(const InputArgList &args) {
12221352
bool isLazy = false;
12231353
// If we've processed an opening --start-lib, without a matching --end-lib
12241354
bool inLib = false;
1355+
DeferredFiles deferredFiles;
1356+
12251357
for (const Arg *arg : args) {
12261358
const Option &opt = arg->getOption();
12271359
warnIfDeprecatedOption(opt);
12281360
warnIfUnimplementedOption(opt);
12291361

12301362
switch (opt.getID()) {
12311363
case OPT_INPUT:
1232-
addFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy);
1364+
deferFile(rerootPath(arg->getValue()), isLazy, deferredFiles);
12331365
break;
12341366
case OPT_needed_library:
12351367
if (auto *dylibFile = dyn_cast_or_null<DylibFile>(
@@ -1249,7 +1381,7 @@ static void createFiles(const InputArgList &args) {
12491381
dylibFile->forceWeakImport = true;
12501382
break;
12511383
case OPT_filelist:
1252-
addFileList(arg->getValue(), isLazy);
1384+
addFileList(arg->getValue(), isLazy, deferredFiles);
12531385
break;
12541386
case OPT_force_load:
12551387
addFile(rerootPath(arg->getValue()), LoadType::CommandLineForce);
@@ -1295,6 +1427,24 @@ static void createFiles(const InputArgList &args) {
12951427
break;
12961428
}
12971429
}
1430+
1431+
if (config->readWorkers) {
1432+
multiThreadedPageIn(deferredFiles);
1433+
1434+
DeferredFiles archiveContents;
1435+
std::vector<ArchiveFile *> archives;
1436+
for (auto &file : deferredFiles) {
1437+
auto inputFile = processFile(file.buffer, &archiveContents, file.path,
1438+
LoadType::CommandLine, file.isLazy);
1439+
if (ArchiveFile *archive = dyn_cast<ArchiveFile>(inputFile))
1440+
archives.push_back(archive);
1441+
}
1442+
1443+
if (!archiveContents.empty())
1444+
multiThreadedPageIn(archiveContents);
1445+
for (auto *archive : archives)
1446+
archive->addLazySymbols();
1447+
}
12981448
}
12991449

13001450
static void gatherInputSections() {
@@ -1681,6 +1831,14 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
16811831
}
16821832
}
16831833

1834+
if (auto *arg = args.getLastArg(OPT_read_workers)) {
1835+
StringRef v(arg->getValue());
1836+
unsigned threads = 0;
1837+
if (!llvm::to_integer(v, threads, 0) || threads < 0)
1838+
error(arg->getSpelling() + ": expected a positive integer, but got '" +
1839+
arg->getValue() + "'");
1840+
config->readWorkers = threads;
1841+
}
16841842
if (auto *arg = args.getLastArg(OPT_threads_eq)) {
16851843
StringRef v(arg->getValue());
16861844
unsigned threads = 0;

lld/MachO/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,9 @@ def dead_strip : Flag<["-"], "dead_strip">,
396396
def interposable : Flag<["-"], "interposable">,
397397
HelpText<"Indirects access to all exported symbols in an image">,
398398
Group<grp_opts>;
399+
def read_workers : Joined<["--"], "read-workers=">,
400+
HelpText<"Approximate number of workers to use to eagerly preload input files content into memory. Use 0 to disable this feature. Default is disabled.">,
401+
Group<grp_lld>;
399402
def order_file : Separate<["-"], "order_file">,
400403
MetaVarName<"<file>">,
401404
HelpText<"Layout functions and data according to specification in <file>">,

0 commit comments

Comments
 (0)