diff --git a/cross-project-tests/dtlto/archive.test b/cross-project-tests/dtlto/archive.test new file mode 100644 index 0000000000000..1c01ae7a5691a --- /dev/null +++ b/cross-project-tests/dtlto/archive.test @@ -0,0 +1,80 @@ +REQUIRES: x86-registered-target,ld.lld,llvm-ar + +# Test that a DTLTO link succeeds and outputs the expected set of files +# correctly when archives are present. + +RUN: rm -rf %t && split-file %s %t && cd %t +# Compile sources into bitcode. -O2 is required for cross-module importing. +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c boo.c moo.c loo.c voo.c main.c + +RUN: llvm-ar rcs archive.a foo.o boo.o moo.o +RUN: llvm-ar rcsT archive.thin.a loo.o voo.o + +# Build with DTLTO. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin \ +RUN: -fuse-ld=lld -nostdlib -e main \ +RUN: main.o archive.a archive.thin.a -o main.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--save-temps + +# Check that the required output files have been created. +RUN: ls | FileCheck %s --check-prefix=OUTPUTS + +# JSON jobs description. +OUTPUTS-DAG: {{^}}main.[[PID:[0-9]+]].dist-file.json + +# Main source. +OUTPUTS-DAG: {{^}}main.{{[0-9]+}}.[[PID]].native.o{{$}} +OUTPUTS-DAG: {{^}}main.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}} + +# Regular archive members. +# Filename composition: ( at ).....native.o[.thinlto.bc]. +OUTPUTS-DAG: {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}} +OUTPUTS-DAG: {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}} + +OUTPUTS-DAG: {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}} +OUTPUTS-DAG: {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}} + +OUTPUTS-DAG: {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o{{$}} +OUTPUTS-DAG: {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o.thinlto.bc{{$}} + +# Thin archive members. +OUTPUTS-DAG: {{^}}voo.{{[0-9]+}}.[[PID]].native.o{{$}} +OUTPUTS-DAG: {{^}}voo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}} + +OUTPUTS-DAG: {{^}}loo.{{[0-9]+}}.[[PID]].native.o{{$}} +OUTPUTS-DAG: {{^}}loo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}} + +# Executable file. +OUTPUTS-DAG: {{^}}main.elf{{$}} + +#--- foo.c +volatile int foo_int; +__attribute__((retain)) int foo(int x) { return x + foo_int; } + +#--- boo.c +extern int foo(int x); +__attribute__((retain)) int boo(int x) { return foo(x); } + +#--- moo.c +__attribute__((retain)) int moo() { return 3; } + +#--- loo.c +extern int moo(int x); +__attribute__((retain)) int loo(int x) { return moo(x); } + +#--- voo.c +extern int foo(int x); +extern int loo(int x); +__attribute__((retain)) int voo(int x) { return foo(x) + loo(x + 1) + 7; } + +#--- main.c +extern int boo(int x); +extern int moo(); +extern int voo(int x); +__attribute__((retain)) int main(int argc, char** argv) { + return boo(argc) + moo() + voo(argc + 3); +} + diff --git a/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test b/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test new file mode 100644 index 0000000000000..74f146028b4b6 --- /dev/null +++ b/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test @@ -0,0 +1,35 @@ +REQUIRES: x86-registered-target,ld.lld,llvm-ar + +# Test that DTLTO works with a mixture of FullLTO and ThinLTO bitcode archive members +# where there is more than one LTO partition. + +RUN: rm -rf %t && split-file %s %t && cd %t + +RUN: %clang --target=x86_64-linux-gnu -flto -c one.c two.c +RUN: %clang --target=x86_64-linux-gnu -flto=thin -c three.c + +RUN: llvm-ar rc archive.a one.o two.o three.o + +# Build with DTLTO. +RUN: %clang --target=x86_64-linux-gnu -Werror -flto -fuse-ld=lld -nostdlib \ +RUN: -Wl,--whole-archive archive.a \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--save-temps,--lto-partitions=2 + +# Show that the FullLTO modules have been prepared for distribution, this is +# not optimal but has no functional impact. +RUN: FileCheck %s --input-file=a.out.resolution.txt +CHECK: archive.a(one.o at {{.*}}).1.[[PID:[a-zA-Z0-9_]+]].o +CHECK: archive.a(two.o at {{.*}}).2.[[PID]].o +CHECK: archive.a(three.o at {{.*}}).3.[[PID]].o + +#--- one.c +__attribute__((retain)) void one() {} + +#--- two.c +__attribute__((retain)) void two() {} + +#--- three.c +__attribute__((retain)) void three() {} diff --git a/cross-project-tests/dtlto/archives-same-module-id.test b/cross-project-tests/dtlto/archives-same-module-id.test new file mode 100644 index 0000000000000..09d5f7492bfa5 --- /dev/null +++ b/cross-project-tests/dtlto/archives-same-module-id.test @@ -0,0 +1,55 @@ +REQUIRES: x86-registered-target,ld.lld,llvm-ar + +# Test that a DTLTO link succeeds when there are two archive member files with +# the same filename path component. + +# Split this file into several sources. +RUN: rm -rf %t && split-file %s %t && cd %t + +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c start.c + +# Create first archive. +RUN: mkdir archive1 && cd archive1 +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c +RUN: llvm-ar rc archive.a t3.o t1.o +RUN: cd .. + +# Create second archive. +RUN: mkdir archive2 && cd archive2 +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c +RUN: llvm-ar rc archive.a t3.o t1.o +RUN: cd .. + +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld \ +RUN: -nostdlib -Wl,--undefined=t1,--undefined=t3 \ +RUN: start.o archive1/archive.a archive2/archive.a -o main.elf \ +RUN: -Wl,--save-temps \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang + +# Check that the required output files have been created. +RUN: ls | FileCheck %s --check-prefix=OUTPUTS + +# JSON jobs description. +OUTPUTS-DAG: {{^}}main.[[PID:[0-9]+]].dist-file.json + +# Sources. +OUTPUTS-DAG: {{^}}start.{{[0-9]+}}.[[PID]].native.o{{$}} +OUTPUTS-DAG: {{^}}start.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}} + +# Archive members. +# Filename composition: ( at ).....native.o[.thinlto.bc]. +OUTPUTS-DAG: {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}} +OUTPUTS-DAG: {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}} +OUTPUTS-DAG: {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}} +OUTPUTS-DAG: {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}} + +#--- t1.c +__attribute__((retain)) void t1() { } + +#--- start.c +__attribute__((retain)) void _start() { } + +#--- t3.c +__attribute__((retain)) void t3() { } diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index c08099b8810bb..d415955b6093b 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -1380,6 +1380,7 @@ BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb, utostr(offsetInArchive))); std::unique_ptr obj = check(lto::InputFile::create(mbref)); + obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier()); return make(ctx.getSymtab(getMachineType(obj.get())), mb, obj, lazy); } diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp index a988be610864a..1107fa36f5e11 100644 --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -135,6 +135,9 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) { ltoObj = std::make_unique(createConfig(), backend, ctx.config.ltoPartitions); + + if (!ctx.config.dtltoDistributor.empty()) + ltoObj->Dtlto = true; } BitcodeCompiler::~BitcodeCompiler() = default; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index a5921feb18299..a34d007faa3df 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/LTO/LTO.h" -#include "llvm/Object/Archive.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Support/AArch64AttributeParser.h" #include "llvm/Support/ARMAttributeParser.h" @@ -1812,39 +1811,6 @@ static uint8_t getOsAbi(const Triple &t) { } } -// For DTLTO, bitcode member names must be valid paths to files on disk. -// For thin archives, resolve `memberPath` relative to the archive's location. -// Returns true if adjusted; false otherwise. Non-thin archives are unsupported. -static bool dtltoAdjustMemberPathIfThinArchive(Ctx &ctx, StringRef archivePath, - std::string &memberPath) { - assert(!archivePath.empty()); - - if (ctx.arg.dtltoDistributor.empty()) - return false; - - // Read the archive header to determine if it's a thin archive. - auto bufferOrErr = - MemoryBuffer::getFileSlice(archivePath, sizeof(ThinArchiveMagic) - 1, 0); - if (std::error_code ec = bufferOrErr.getError()) { - ErrAlways(ctx) << "cannot open " << archivePath << ": " << ec.message(); - return false; - } - - if (!bufferOrErr->get()->getBuffer().starts_with(ThinArchiveMagic)) - return false; - - SmallString<128> resolvedPath; - if (path::is_relative(memberPath)) { - resolvedPath = path::parent_path(archivePath); - path::append(resolvedPath, memberPath); - } else - resolvedPath = memberPath; - - path::remove_dots(resolvedPath, /*remove_dot_dot=*/true); - memberPath = resolvedPath.str(); - return true; -} - BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive, bool lazy) : InputFile(ctx, BitcodeKind, mb) { @@ -1855,25 +1821,22 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName, if (ctx.arg.thinLTOIndexOnly) path = replaceThinLTOSuffix(ctx, mb.getBufferIdentifier()); + // ThinLTO assumes that all MemoryBufferRefs given to it have a unique + // name. If two archives define two members with the same name, this + // causes a collision which result in only one of the objects being taken + // into consideration at LTO time (which very likely causes undefined + // symbols later in the link stage). So we append file offset to make + // filename unique. StringSaver &ss = ctx.saver; - StringRef name; - if (archiveName.empty() || - dtltoAdjustMemberPathIfThinArchive(ctx, archiveName, path)) { - name = ss.save(path); - } else { - // ThinLTO assumes that all MemoryBufferRefs given to it have a unique - // name. If two archives define two members with the same name, this - // causes a collision which result in only one of the objects being taken - // into consideration at LTO time (which very likely causes undefined - // symbols later in the link stage). So we append file offset to make - // filename unique. - name = ss.save(archiveName + "(" + path::filename(path) + " at " + - utostr(offsetInArchive) + ")"); - } + StringRef name = archiveName.empty() + ? ss.save(path) + : ss.save(archiveName + "(" + path::filename(path) + + " at " + utostr(offsetInArchive) + ")"); MemoryBufferRef mbref(mb.getBuffer(), name); obj = CHECK2(lto::InputFile::create(mbref), this); + obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier()); Triple t(obj->getTargetTriple()); ekind = getBitcodeELFKind(t); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 8d4a6c9e3a81e..5a8f9395b7174 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -202,6 +202,9 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) { ctx.arg.ltoPartitions, ltoModes[ctx.arg.ltoKind]); + if (!ctx.arg.dtltoDistributor.empty()) + ltoObj->Dtlto = true; + // Initialize usedStartStop. if (ctx.bitcodeFiles.empty()) return; diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h index 4f839d4cd1575..772ca82019278 100644 --- a/llvm/include/llvm/Bitcode/BitcodeReader.h +++ b/llvm/include/llvm/Bitcode/BitcodeReader.h @@ -137,6 +137,11 @@ struct ParserCallbacks { StringRef getModuleIdentifier() const { return ModuleIdentifier; } + // Assign a new module identifier to this bitcode module. + void setModuleIdentifier(llvm::StringRef ModuleId) { + ModuleIdentifier = ModuleId; + } + /// Read the bitcode module and prepare for lazy deserialization of function /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. /// If IsImporting is true, this module is being parsed for ThinLTO diff --git a/llvm/include/llvm/DTLTO/DTLTO.h b/llvm/include/llvm/DTLTO/DTLTO.h new file mode 100644 index 0000000000000..dfbfcf79d7435 --- /dev/null +++ b/llvm/include/llvm/DTLTO/DTLTO.h @@ -0,0 +1,23 @@ +//===- DTLTO.h - Distributed ThinLTO functions and classes ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_DTLTO_H +#define LLVM_DTLTO_H + +#include "llvm/LTO/LTO.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace dtlto { + +llvm::Expected +addInput(llvm::lto::LTO *LtoObj, std::unique_ptr Input); + +llvm::Error process(llvm::lto::LTO &LtoObj); +} // namespace dtlto + +#endif // LLVM_DTLTO_H diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index 3a9a7f7c25859..4e1e7231b0ee7 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -33,6 +33,23 @@ #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" +namespace llvm { +namespace lto { +class LTO; +} +} // namespace llvm + +namespace dtlto { +class TempFilesRemover { + llvm::lto::LTO *Lto = nullptr; + +public: + TempFilesRemover(llvm::lto::LTO *LtoObj) : Lto{LtoObj} {} + ~TempFilesRemover(); +}; + +} // namespace dtlto + namespace llvm { class Error; @@ -136,6 +153,12 @@ class InputFile { std::vector DependentLibraries; std::vector> ComdatTable; + MemoryBufferRef MbRef; + bool IsMemberOfArchive = false; + bool IsThinLTO = false; + StringRef ArchivePath; + StringRef MemberName; + public: LLVM_ABI ~InputFile(); @@ -194,6 +217,23 @@ class InputFile { // Returns the only BitcodeModule from InputFile. LLVM_ABI BitcodeModule &getSingleBitcodeModule(); + // Returns the memory buffer reference for this input file. + MemoryBufferRef getFileBuffer() const { return MbRef; } + // Returns true if this input file is a member of an archive. + bool isMemberOfArchive() const { return IsMemberOfArchive; } + // Mark this input file as a member of archive. + void memberOfArchive(bool MA) { IsMemberOfArchive = MA; } + + // Returns true if bitcode is ThinLTO. + bool isThinLTO() const { return IsThinLTO; } + + // Store an archive path and a member name. + void setArchivePathAndName(StringRef Path, StringRef Name) { + ArchivePath = Path; + MemberName = Name; + } + StringRef getArchivePath() const { return ArchivePath; } + StringRef getMemberName() const { return MemberName; } private: ArrayRef module_symbols(unsigned I) const { @@ -581,6 +621,17 @@ class LTO { // Diagnostic optimization remarks file LLVMRemarkFileHandle DiagnosticOutputFile; + +public: + /// DTLTO mode. + bool Dtlto = false; + + BumpPtrAllocator PtrAlloc; + StringSaver Saver{PtrAlloc}; + + // Array of input bitcode files for LTO. + std::vector> InputFiles; + std::unique_ptr TempsRemover; }; /// The resolution for a symbol. The linker must provide a SymbolResolution for diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt index a9432977718c6..0856af9058fef 100644 --- a/llvm/lib/CMakeLists.txt +++ b/llvm/lib/CMakeLists.txt @@ -22,6 +22,7 @@ add_subdirectory(Frontend) add_subdirectory(Transforms) add_subdirectory(Linker) add_subdirectory(Analysis) +add_subdirectory(DTLTO) add_subdirectory(LTO) add_subdirectory(MC) add_subdirectory(MCA) diff --git a/llvm/lib/DTLTO/CMakeLists.txt b/llvm/lib/DTLTO/CMakeLists.txt new file mode 100644 index 0000000000000..4a35de24c86db --- /dev/null +++ b/llvm/lib/DTLTO/CMakeLists.txt @@ -0,0 +1,7 @@ +add_llvm_component_library(LLVMDTLTO + DTLTO.cpp + + LINK_COMPONENTS + Core + Support + ) diff --git a/llvm/lib/DTLTO/DTLTO.cpp b/llvm/lib/DTLTO/DTLTO.cpp new file mode 100644 index 0000000000000..19ae6385f75cd --- /dev/null +++ b/llvm/lib/DTLTO/DTLTO.cpp @@ -0,0 +1,226 @@ +//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// This file implements support functions for Distributed ThinLTO, focusing on +// archive file handling. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DTLTO/DTLTO.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/LTO/LTO.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +using namespace llvm; + +namespace dtlto { + +// Removes any temporary regular archive member files that were created during +// processing. +TempFilesRemover::~TempFilesRemover() { + if (!Lto) + return; + for (auto &Input : Lto->InputFiles) { + if (Input->isMemberOfArchive()) + sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true); + } +} + +// Writes the content of a memory buffer into a file. +static llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) { + std::error_code EC; + raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None); + if (EC) { + return createStringError(inconvertibleErrorCode(), + "Failed to create file %s: %s", FilePath.data(), + EC.message().c_str()); + } + OS.write(FileBuffer.data(), FileBuffer.size()); + if (OS.has_error()) { + return createStringError(inconvertibleErrorCode(), + "Failed writing to file %s", FilePath.data()); + } + return Error::success(); +} + +// Compute the file path for a thin archive member. +// +// For thin archives, an archive member name is typically a file path relative +// to the archive file's directory. This function resolves that path. +SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath, + const StringRef MemberName) { + assert(!ArchivePath.empty() && "An archive file path must be non empty."); + SmallString<64> MemberPath; + if (sys::path::is_relative(MemberName)) { + MemberPath = sys::path::parent_path(ArchivePath); + sys::path::append(MemberPath, MemberName); + } else + MemberPath = MemberName; + sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true); + return MemberPath; +} + +// Magic string identifying thin archive files. +static constexpr StringLiteral THIN_ARCHIVE_MAGIC = "!\n"; + +// Determines if a file at the given path is a thin archive file. +// +// This function uses a cache to avoid repeatedly reading the same file. +// It reads only the header portion (magic bytes) of the file to identify +// the archive type. +Expected isThinArchive(const StringRef ArchivePath) { + static StringMap ArchiveFiles; + + // Return cached result if available. + auto Cached = ArchiveFiles.find(ArchivePath); + if (Cached != ArchiveFiles.end()) + return Cached->second; + + uint64_t FileSize = -1; + bool IsThin = false; + std::error_code EC = sys::fs::file_size(ArchivePath, FileSize); + if (EC) + return createStringError(inconvertibleErrorCode(), + "Failed to get file size from archive %s: %s", + ArchivePath.data(), EC.message().c_str()); + if (FileSize < THIN_ARCHIVE_MAGIC.size()) + return createStringError(inconvertibleErrorCode(), + "Archive file size is too small %s", + ArchivePath.data()); + + // Read only the first few bytes containing the magic signature. + ErrorOr> MemBufferOrError = + MemoryBuffer::getFileSlice(ArchivePath, THIN_ARCHIVE_MAGIC.size(), 0); + + if ((EC = MemBufferOrError.getError())) + return createStringError(inconvertibleErrorCode(), + "Failed to read from archive %s: %s", + ArchivePath.data(), EC.message().c_str()); + + StringRef MemBuf = (*MemBufferOrError.get()).getBuffer(); + if (file_magic::archive != identify_magic(MemBuf)) + return createStringError(inconvertibleErrorCode(), + "Unknown format for archive %s", + ArchivePath.data()); + + IsThin = MemBuf.starts_with(THIN_ARCHIVE_MAGIC); + + // Cache the result + ArchiveFiles[ArchivePath] = IsThin; + return IsThin; +} + +// This function performs the following tasks: +// 1. Adds the input file to the LTO object's list of input files. +// 2. For thin archive members, generates a new module ID which is a path to a +// thin archive member file. +// 3. For regular archive members, generates a new unique module ID. +// 4. Updates the bitcode module's identifier. +Expected addInput(lto::LTO *LtoObj, + std::unique_ptr InputPtr) { + + // Add the input file to the LTO object. + LtoObj->InputFiles.push_back(std::move(InputPtr)); + lto::InputFile *Input = LtoObj->InputFiles.back().get(); + + // Skip processing if not in DTLTO mode. + if (!LtoObj->Dtlto) + return Input; + + StringRef ModuleId = Input->getName(); + StringRef ArchivePath = Input->getArchivePath(); + + // Only process archive members. + if (ArchivePath.empty()) + return Input; + + SmallString<64> NewModuleId; + BitcodeModule &BM = Input->getSingleBitcodeModule(); + + // Check if the archive is a thin archive. + Expected IsThin = isThinArchive(ArchivePath); + if (!IsThin) + return IsThin.takeError(); + + if (*IsThin) { + // For thin archives, use the path to the actual file. + NewModuleId = + computeThinArchiveMemberPath(ArchivePath, Input->getMemberName()); + } else { + // For regular archives, generate a unique name. + Input->memberOfArchive(true); + + // Create unique identifier using process ID and sequence number. + std::string PID = utohexstr(sys::Process::getProcessId()); + std::string Seq = std::to_string(LtoObj->InputFiles.size()); + + NewModuleId = {sys::path::filename(ModuleId), ".", Seq, ".", PID, ".o"}; + } + + // Update the module identifier and save it. + BM.setModuleIdentifier(LtoObj->Saver.save(NewModuleId.str())); + + return Input; +} + +// Write the archive member content to a file named after the module ID. +// If a file with that name already exists, it's likely a leftover from a +// previously terminated linker process and can be safely overwritten. +Error saveInputArchiveMember(lto::LTO &LtoObj, lto::InputFile *Input) { + StringRef ModuleId = Input->getName(); + if (Input->isMemberOfArchive()) { + MemoryBufferRef MemoryBufferRef = Input->getFileBuffer(); + if (Error EC = saveBuffer(MemoryBufferRef.getBuffer(), ModuleId)) + return EC; + } + return Error::success(); +} + +// Iterates through all ThinLTO-enabled input files and saves their content +// to separate files if they are regular archive members. +Error saveInputArchiveMembers(lto::LTO &LtoObj) { + for (auto &Input : LtoObj.InputFiles) { + if (!Input->isThinLTO()) + continue; + if (Error EC = saveInputArchiveMember(LtoObj, Input.get())) + return EC; + } + return Error::success(); +} + +// Entry point for DTLTO archives support. +// +// Sets up the temporary file remover and processes archive members. +// Must be called after all inputs are added but before optimization begins. +llvm::Error process(llvm::lto::LTO &LtoObj) { + if (!LtoObj.Dtlto) + return Error::success(); + + // Set up cleanup handler for temporary files + LtoObj.TempsRemover = std::make_unique(&LtoObj); + + // Process and save archive members to separate files if needed. + if (Error EC = saveInputArchiveMembers(LtoObj)) + return EC; + return Error::success(); +} + +} // namespace dtlto diff --git a/llvm/lib/LTO/CMakeLists.txt b/llvm/lib/LTO/CMakeLists.txt index 057d73b6349cf..499623eacf97c 100644 --- a/llvm/lib/LTO/CMakeLists.txt +++ b/llvm/lib/LTO/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_component_library(LLVMLTO CodeGen CodeGenTypes Core + DTLTO Extensions IPO InstCombine diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index e6544f3bafff4..b1f0105ccaafa 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -26,6 +26,7 @@ #include "llvm/CGData/CodeGenData.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/Config/llvm-config.h" +#include "llvm/DTLTO/DTLTO.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Intrinsics.h" @@ -571,6 +572,8 @@ Expected> InputFile::create(MemoryBufferRef Object) { File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts(); File->DependentLibraries = FOrErr->TheReader.getDependentLibraries(); File->ComdatTable = FOrErr->TheReader.getComdatTable(); + File->MbRef = + Object; // Save a memory buffer reference to an input file object. for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) { size_t Begin = File->Symbols.size(); @@ -731,13 +734,18 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, assert(ResI == Res.end()); } -Error LTO::add(std::unique_ptr Input, +Error LTO::add(std::unique_ptr InputPtr, ArrayRef Res) { - llvm::TimeTraceScope timeScope("LTO add input", Input->getName()); + llvm::TimeTraceScope timeScope("LTO add input", InputPtr->getName()); assert(!CalledGetMaxTasks); + Expected InputOrErr = dtlto::addInput(this, std::move(InputPtr)); + if (!InputOrErr) + return InputOrErr.takeError(); + InputFile *Input = *InputOrErr; + if (Conf.ResolutionFile) - writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res); + writeToResolutionFile(*Conf.ResolutionFile, Input, Res); if (RegularLTO.CombinedModule->getTargetTriple().empty()) { Triple InputTriple(Input->getTargetTriple()); @@ -786,6 +794,10 @@ LTO::addModule(InputFile &Input, ArrayRef InputRes, LTOMode = LTOK_UnifiedThin; bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular); + // If any of the modules inside of a input bitcode file was compiled with + // ThinLTO, we assume that the whole input file also was compiled with + // ThinLTO. + Input.IsThinLTO = IsThinLTO; auto ModSyms = Input.module_symbols(ModI); addModuleToGlobalRes(ModSyms, Res, @@ -1200,6 +1212,10 @@ Error LTO::checkPartiallySplit() { } Error LTO::run(AddStreamFn AddStream, FileCache Cache) { + if (Dtlto) { + if (Error EC = dtlto::process(*this)) + return EC; + } // Compute "dead" symbols, we don't want to import/export these! DenseSet GUIDPreservedSymbols; DenseMap GUIDPrevailingResolutions;