From 4ff113dd3c1c812efa6eb4d8de6813bd1634a24e Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Thu, 20 Mar 2025 14:01:22 -0400 Subject: [PATCH 1/5] [lld-macho] Support archives with no index --- lld/MachO/Driver.cpp | 11 ++-- lld/MachO/InputFiles.cpp | 68 +++++++++++++++++------ lld/MachO/InputFiles.h | 6 +- lld/test/MachO/archive-no-index.s | 39 +++++++++++++ lld/test/MachO/invalid/archive-no-index.s | 32 ----------- 5 files changed, 99 insertions(+), 57 deletions(-) create mode 100644 lld/test/MachO/archive-no-index.s delete mode 100644 lld/test/MachO/invalid/archive-no-index.s diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index bec94f830e976..a407ea018ef63 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -314,8 +314,6 @@ static InputFile *addFile(StringRef path, LoadType loadType, std::unique_ptr archive = CHECK( object::Archive::create(mbref), path + ": failed to parse archive"); - if (!archive->isEmpty() && !archive->hasSymbolTable()) - error(path + ": archive has no index; run ranlib to add one"); file = make(std::move(archive), isForceHidden); if (tar && file->getArchive().isThin()) @@ -362,9 +360,11 @@ static InputFile *addFile(StringRef path, LoadType loadType, ": Archive::children failed: " + toString(std::move(e))); } } else if (isCommandLineLoad && config->forceLoadObjC) { - for (const object::Archive::Symbol &sym : file->getArchive().symbols()) - if (sym.getName().starts_with(objc::symbol_names::klass)) - file->fetch(sym); + if (file->getArchive().getNumberOfSymbols() > 0) { + for (const object::Archive::Symbol &sym : file->getArchive().symbols()) + if (sym.getName().starts_with(objc::symbol_names::klass)) + file->fetch(sym); + } // TODO: no need to look for ObjC sections for a given archive member if // we already found that it contains an ObjC symbol. @@ -394,7 +394,6 @@ static InputFile *addFile(StringRef path, LoadType loadType, ": Archive::children failed: " + toString(std::move(e))); } } - file->addLazySymbols(); loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad}; newFile = file; diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 60667e3b415d7..0bf9783f1ac21 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -2159,9 +2159,34 @@ ArchiveFile::ArchiveFile(std::unique_ptr &&f, bool forceHidden) void ArchiveFile::addLazySymbols() { // Avoid calling getMemoryBufferRef() on zero-symbol archive // since that crashes. - if (file->isEmpty() || file->getNumberOfSymbols() == 0) + if (file->isEmpty()) return; + if (file->getNumberOfSymbols() == 0) { + // No index, treat each child as a lazy object file. + Error e = Error::success(); + for (const object::Archive::Child &c : file->children(e)) { + // Check `seen` but don't insert so a future eager load can still happen. + if (seen.contains(c.getChildOffset())) + continue; + if (!seenLazy.insert(c.getChildOffset()).second) { + continue; + } + // First check seen. + // Then, write to and check seenLazy + // Then, get the file, check for error, and add it to inputs. + auto file = childToObjectFile(c, /*lazy=*/true); + if (!file) + error(toString(this) + + ": couldn't process child: " + toString(file.takeError())); + inputFiles.insert(*file); + } + if (e) + error(toString(this) + + ": Archive::children failed: " + toString(std::move(e))); + return; + } + Error err = Error::success(); auto child = file->child_begin(err); // Ignore the I/O error here - will be reported later. @@ -2191,16 +2216,17 @@ void ArchiveFile::addLazySymbols() { static Expected loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, - uint64_t offsetInArchive, bool forceHidden, bool compatArch) { + uint64_t offsetInArchive, bool forceHidden, bool compatArch, + bool lazy) { if (config->zeroModTime) modTime = 0; switch (identify_magic(mb.getBuffer())) { case file_magic::macho_object: - return make(mb, modTime, archiveName, /*lazy=*/false, forceHidden, + return make(mb, modTime, archiveName, lazy, forceHidden, compatArch); case file_magic::bitcode: - return make(mb, archiveName, offsetInArchive, /*lazy=*/false, + return make(mb, archiveName, offsetInArchive, lazy, forceHidden, compatArch); default: return createStringError(inconvertibleErrorCode(), @@ -2209,22 +2235,11 @@ loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, } } -Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) { +Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason, + bool lazy) { if (!seen.insert(c.getChildOffset()).second) return Error::success(); - - Expected mb = c.getMemoryBufferRef(); - if (!mb) - return mb.takeError(); - - Expected> modTime = c.getLastModified(); - if (!modTime) - return modTime.takeError(); - - Expected file = - loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(), - forceHidden, compatArch); - + auto file = childToObjectFile(c, /*lazy=*/false); if (!file) return file.takeError(); @@ -2251,6 +2266,23 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) { toMachOString(symCopy) + ": " + toString(std::move(e))); } +Expected +ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c, + bool lazy) { + Expected mb = c.getMemoryBufferRef(); + if (!mb) + return mb.takeError(); + + Expected> modTime = c.getLastModified(); + if (!modTime) + return modTime.takeError(); + + Expected file = + loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(), + forceHidden, compatArch, lazy); + return file; +} + static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym, BitcodeFile &file) { StringRef name = saver().save(objSym.getName()); diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index bc8c8038a39d1..7b2b4769e0b2d 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -292,15 +292,19 @@ class ArchiveFile final : public InputFile { void fetch(const llvm::object::Archive::Symbol &); // LLD normally doesn't use Error for error-handling, but the underlying // Archive library does, so this is the cleanest way to wrap it. - Error fetch(const llvm::object::Archive::Child &, StringRef reason); + Error fetch(const llvm::object::Archive::Child &, StringRef reason, + bool lazy = false); const llvm::object::Archive &getArchive() const { return *file; }; static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } private: + Expected childToObjectFile(const llvm::object::Archive::Child &c, + bool lazy); std::unique_ptr file; // Keep track of children fetched from the archive by tracking // which address offsets have been fetched already. llvm::DenseSet seen; + llvm::DenseSet seenLazy; // Load all symbols with hidden visibility (-load_hidden). bool forceHidden; }; diff --git a/lld/test/MachO/archive-no-index.s b/lld/test/MachO/archive-no-index.s new file mode 100644 index 0000000000000..4135df0c69f4a --- /dev/null +++ b/lld/test/MachO/archive-no-index.s @@ -0,0 +1,39 @@ +# REQUIRES: x86 + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib.o %t/lib.s +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib2.o %t/lib2.s +# RUN: llvm-ar crST %t/lib.a %t/lib.o %t/lib2.o +# RUN: %lld %t/main.o %t/lib.a -o %t/out + +## Test that every kind of eager load mechanism still works. +# RUN: %lld %t/main.o %t/lib.a -all_load -o %t/all_load +# RUN: llvm-nm %t/all_load | FileCheck %s --check-prefix FORCED-LOAD +# RUN: %lld %t/main.o -force_load %t/lib.a -o %t/force_load +# RUN: llvm-nm %t/force_load | FileCheck %s --check-prefix FORCED-LOAD +# RUN: %lld %t/main.o %t/lib.a -ObjC -o %t/objc +# RUN: llvm-nm %t/objc | FileCheck %s --check-prefix FORCED-LOAD + +# FORCED-LOAD: T _bar + +#--- lib.s +.global _foo +_foo: + ret + +#--- lib2.s +.section __DATA,__objc_catlist +.quad 0x1234 + +.section __TEXT,__text +.global _bar +_bar: + ret + +#--- main.s +.global _main +_main: + call _foo + mov $0, %rax + ret \ No newline at end of file diff --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s deleted file mode 100644 index 9cda945652500..0000000000000 --- a/lld/test/MachO/invalid/archive-no-index.s +++ /dev/null @@ -1,32 +0,0 @@ -# REQUIRES: x86 -# RUN: rm -rf %t; split-file %s %t -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o - -# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o - -# RUN: not %lld %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s -# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one - -#--- 2.s -.globl _boo -_boo: - ret - -#--- 3.s -.globl _bar -_bar: - ret - -#--- 4.s -.globl _undefined, _unused -_unused: - ret - -#--- main.s -.global _main -_main: - mov $0, %rax - ret From ee75beadca00751c592c4358091f484ba3176482 Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Tue, 25 Mar 2025 10:33:12 -0400 Subject: [PATCH 2/5] Cleanup --- lld/MachO/InputFiles.cpp | 15 ++++----------- lld/MachO/InputFiles.h | 3 +-- lld/test/MachO/archive-no-index.s | 6 +++++- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 0bf9783f1ac21..f2b8d3ac87b73 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -2169,12 +2169,8 @@ void ArchiveFile::addLazySymbols() { // Check `seen` but don't insert so a future eager load can still happen. if (seen.contains(c.getChildOffset())) continue; - if (!seenLazy.insert(c.getChildOffset()).second) { + if (!seenLazy.insert(c.getChildOffset()).second) continue; - } - // First check seen. - // Then, write to and check seenLazy - // Then, get the file, check for error, and add it to inputs. auto file = childToObjectFile(c, /*lazy=*/true); if (!file) error(toString(this) + @@ -2235,8 +2231,7 @@ loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, } } -Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason, - bool lazy) { +Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) { if (!seen.insert(c.getChildOffset()).second) return Error::success(); auto file = childToObjectFile(c, /*lazy=*/false); @@ -2277,10 +2272,8 @@ ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c, if (!modTime) return modTime.takeError(); - Expected file = - loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(), - forceHidden, compatArch, lazy); - return file; + return loadArchiveMember(*mb, toTimeT(*modTime), getName(), + c.getChildOffset(), forceHidden, compatArch, lazy); } static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym, diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 7b2b4769e0b2d..2d5bceb160445 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -292,8 +292,7 @@ class ArchiveFile final : public InputFile { void fetch(const llvm::object::Archive::Symbol &); // LLD normally doesn't use Error for error-handling, but the underlying // Archive library does, so this is the cleanest way to wrap it. - Error fetch(const llvm::object::Archive::Child &, StringRef reason, - bool lazy = false); + Error fetch(const llvm::object::Archive::Child &, StringRef reason); const llvm::object::Archive &getArchive() const { return *file; }; static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } diff --git a/lld/test/MachO/archive-no-index.s b/lld/test/MachO/archive-no-index.s index 4135df0c69f4a..37d28fda5c920 100644 --- a/lld/test/MachO/archive-no-index.s +++ b/lld/test/MachO/archive-no-index.s @@ -6,6 +6,10 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib2.o %t/lib2.s # RUN: llvm-ar crST %t/lib.a %t/lib.o %t/lib2.o # RUN: %lld %t/main.o %t/lib.a -o %t/out +# RUN: llvm-nm %t/out | FileCheck %s + +# CHECK-NOT: T _bar +# CHECK: T _foo ## Test that every kind of eager load mechanism still works. # RUN: %lld %t/main.o %t/lib.a -all_load -o %t/all_load @@ -36,4 +40,4 @@ _bar: _main: call _foo mov $0, %rax - ret \ No newline at end of file + ret From 711402ed8cf530150456ff34b51b97467aabe44a Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Tue, 8 Apr 2025 17:57:51 -0400 Subject: [PATCH 3/5] Comments --- lld/MachO/Driver.cpp | 2 +- lld/MachO/InputFiles.cpp | 2 +- lld/test/MachO/archive-no-index.ll | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 lld/test/MachO/archive-no-index.ll diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index a407ea018ef63..5c32055166da6 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -360,7 +360,7 @@ static InputFile *addFile(StringRef path, LoadType loadType, ": Archive::children failed: " + toString(std::move(e))); } } else if (isCommandLineLoad && config->forceLoadObjC) { - if (file->getArchive().getNumberOfSymbols() > 0) { + if (file->getArchive().hasSymbolTable()) { for (const object::Archive::Symbol &sym : file->getArchive().symbols()) if (sym.getName().starts_with(objc::symbol_names::klass)) file->fetch(sym); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index f2b8d3ac87b73..a405772982f4f 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -2162,7 +2162,7 @@ void ArchiveFile::addLazySymbols() { if (file->isEmpty()) return; - if (file->getNumberOfSymbols() == 0) { + if (!file->hasSymbolTable()) { // No index, treat each child as a lazy object file. Error e = Error::success(); for (const object::Archive::Child &c : file->children(e)) { diff --git a/lld/test/MachO/archive-no-index.ll b/lld/test/MachO/archive-no-index.ll new file mode 100644 index 0000000000000..8ac42b03d949d --- /dev/null +++ b/lld/test/MachO/archive-no-index.ll @@ -0,0 +1,23 @@ +; REQUIRES: x86 +; RUN: rm -rf %t; split-file %s %t + +; RUN: llvm-as %t/lib.ll -o %t/lib.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s +; RUN: llvm-ar rcST %t/lib.a %t/lib.o +# RUN: %lld %t/main.o %t/lib.a -o %t/out + +;--- main.s +.global _main +_main: + call _foo + mov $0, %rax + ret + +;--- lib.ll +target triple = "x86_64-apple-darwin" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo() { +entry: + ret void +} From 43361637ffbb9e0cb55980c85a4447ac856d0a2f Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Thu, 10 Apr 2025 12:20:25 -0400 Subject: [PATCH 4/5] Fix comments --- lld/test/MachO/archive-no-index.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/test/MachO/archive-no-index.ll b/lld/test/MachO/archive-no-index.ll index 8ac42b03d949d..85bcf2a23cf86 100644 --- a/lld/test/MachO/archive-no-index.ll +++ b/lld/test/MachO/archive-no-index.ll @@ -2,9 +2,9 @@ ; RUN: rm -rf %t; split-file %s %t ; RUN: llvm-as %t/lib.ll -o %t/lib.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s +; RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s ; RUN: llvm-ar rcST %t/lib.a %t/lib.o -# RUN: %lld %t/main.o %t/lib.a -o %t/out +; RUN: %lld %t/main.o %t/lib.a -o %t/out ;--- main.s .global _main From 192c54e0ccef66ceab2cd522a740b1fe89b3b86c Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Thu, 10 Apr 2025 13:20:40 -0400 Subject: [PATCH 5/5] Empty symbol tables exist --- lld/MachO/InputFiles.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index a405772982f4f..d461647cec51f 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -2159,7 +2159,8 @@ ArchiveFile::ArchiveFile(std::unique_ptr &&f, bool forceHidden) void ArchiveFile::addLazySymbols() { // Avoid calling getMemoryBufferRef() on zero-symbol archive // since that crashes. - if (file->isEmpty()) + if (file->isEmpty() || + (file->hasSymbolTable() && file->getNumberOfSymbols() == 0)) return; if (!file->hasSymbolTable()) {