Skip to content

Commit

Permalink
[lld-macho] Fixed crashes when linking with incompatible-arch archives/
Browse files Browse the repository at this point in the history
Two changes:
 - Avoid crashing in predicate functions.
   Querying the property of the Symbols via these is*() functions shouldn't crash the program - the answer should just be "false".
   Currently, having them throw UNREACHABLE already (incorrectly) crash certain code paths involving macho::validateSymbolRelocation() .

 - Simply ignore input archives with incompatible arch (changes from PRESIDENT810@)

Differential Revision: https://reviews.llvm.org/D156468
  • Loading branch information
oontvoo committed Aug 4, 2023
1 parent 16a0fc2 commit 5ba9063
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 27 deletions.
90 changes: 69 additions & 21 deletions lld/MachO/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,27 @@ static bool checkCompatibility(const InputFile *input) {
return true;
}

template <class Header>
static bool compatWithTargetArch(const InputFile *file, const Header *hdr) {
uint32_t cpuType;
std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(config->arch());

if (hdr->cputype != cpuType) {
Architecture arch =
getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
auto msg = config->errorForArchMismatch
? static_cast<void (*)(const Twine &)>(error)
: warn;

msg(toString(file) + " has architecture " + getArchitectureName(arch) +
" which is incompatible with target architecture " +
getArchitectureName(config->arch()));
return false;
}

return checkCompatibility(file);
}

// This cache mostly exists to store system libraries (and .tbds) as they're
// loaded, rather than the input archives, which are already cached at a higher
// level, and other files like the filelist that are only read once.
Expand Down Expand Up @@ -930,9 +951,10 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
}

ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
bool lazy, bool forceHidden)
bool lazy, bool forceHidden, bool compatArch)
: InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden) {
this->archiveName = std::string(archiveName);
this->compatArch = compatArch;
if (lazy) {
if (target->wordSize == 8)
parseLazy<LP64>();
Expand All @@ -955,21 +977,10 @@ template <class LP> void ObjFile::parse() {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());

uint32_t cpuType;
std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(config->arch());
if (hdr->cputype != cpuType) {
Architecture arch =
getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
auto msg = config->errorForArchMismatch
? static_cast<void (*)(const Twine &)>(error)
: warn;
msg(toString(this) + " has architecture " + getArchitectureName(arch) +
" which is incompatible with target architecture " +
getArchitectureName(config->arch()));
// If we've already checked the arch, then don't need to check again.
if (!compatArch)
return;
}

if (!checkCompatibility(this))
if (!(compatArch = compatWithTargetArch(this, hdr)))
return;

for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
Expand Down Expand Up @@ -1026,6 +1037,12 @@ template <class LP> void ObjFile::parseLazy() {

auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());

if (!compatArch)
return;
if (!(compatArch = compatWithTargetArch(this, hdr)))
return;

const load_command *cmd = findCommand(hdr, LC_SYMTAB);
if (!cmd)
return;
Expand Down Expand Up @@ -2089,22 +2106,45 @@ ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
forceHidden(forceHidden) {}

void ArchiveFile::addLazySymbols() {
Error err = Error::success();
Expected<MemoryBufferRef> mbOrErr =
this->file->child_begin(err)->getMemoryBufferRef();

// Ignore the I/O error here - will be reported later.
if (!mbOrErr) {
llvm::consumeError(mbOrErr.takeError());
} else if (!err) {
if (identify_magic(mbOrErr->getBuffer()) == file_magic::macho_object) {
if (target->wordSize == 8)
compatArch = compatWithTargetArch(
this, reinterpret_cast<const LP64::mach_header *>(
mbOrErr->getBufferStart()));
else
compatArch = compatWithTargetArch(
this, reinterpret_cast<const ILP32::mach_header *>(
mbOrErr->getBufferStart()));

if (!compatArch)
return;
}
}
for (const object::Archive::Symbol &sym : file->symbols())
symtab->addLazyArchive(sym.getName(), this, sym);
}

static Expected<InputFile *>
loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
uint64_t offsetInArchive, bool forceHidden) {
uint64_t offsetInArchive, bool forceHidden, bool compatArch) {
if (config->zeroModTime)
modTime = 0;

switch (identify_magic(mb.getBuffer())) {
case file_magic::macho_object:
return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden);
return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden,
compatArch);
case file_magic::bitcode:
return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false,
forceHidden);
forceHidden, compatArch);
default:
return createStringError(inconvertibleErrorCode(),
mb.getBufferIdentifier() +
Expand All @@ -2128,8 +2168,9 @@ Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
if (!modTime)
return modTime.takeError();

Expected<InputFile *> file = loadArchiveMember(
*mb, toTimeT(*modTime), getName(), c.getChildOffset(), forceHidden);
Expected<InputFile *> file =
loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
forceHidden, compatArch);

if (!file)
return file.takeError();
Expand Down Expand Up @@ -2192,13 +2233,20 @@ static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
}

BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive, bool lazy, bool forceHidden)
uint64_t offsetInArchive, bool lazy, bool forceHidden,
bool compatArch)
: InputFile(BitcodeKind, mb, lazy), forceHidden(forceHidden) {
this->archiveName = std::string(archiveName);
this->compatArch = compatArch;
std::string path = mb.getBufferIdentifier().str();
if (config->thinLTOIndexOnly)
path = replaceThinLTOSuffix(mb.getBufferIdentifier());

// If the parent archive already determines that the arch is not compat with
// target, then just return.
if (!compatArch)
return;

// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
// name. If two members with the same name are provided, this causes a
// collision and ThinLTO can't proceed.
Expand Down
7 changes: 5 additions & 2 deletions lld/MachO/InputFiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ class InputFile {

InputFile(Kind, const llvm::MachO::InterfaceFile &);

// If true, this input's arch is compatiable with target.
bool compatArch = true;

private:
const Kind fileKind;
const StringRef name;
Expand All @@ -157,7 +160,7 @@ struct FDE {
class ObjFile final : public InputFile {
public:
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
bool lazy = false, bool forceHidden = false);
bool lazy = false, bool forceHidden = false, bool compatArch = true);
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
ArrayRef<uint8_t> getOptimizationHints() const;
template <class LP> void parse();
Expand Down Expand Up @@ -301,7 +304,7 @@ class BitcodeFile final : public InputFile {
public:
explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive, bool lazy = false,
bool forceHidden = false);
bool forceHidden = false, bool compatArch = true);
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
void parse();

Expand Down
4 changes: 2 additions & 2 deletions lld/MachO/Symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ class Symbol {

virtual uint64_t getVA() const { return 0; }

virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
virtual bool isWeakDef() const { return false; }

// Only undefined or dylib symbols can be weak references. A weak reference
// need not be satisfied at runtime, e.g. due to the symbol not being
// available on a given target platform.
virtual bool isWeakRef() const { return false; }

virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
virtual bool isTlv() const { return false; }

// Whether this symbol is in the GOT or TLVPointer sections.
bool isInGot() const { return gotIndex != UINT32_MAX; }
Expand Down
71 changes: 71 additions & 0 deletions lld/test/MachO/ignore-incompat-arch.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
## Test that LLD correctly ignored archives with incompatible architecture without crashing.

# RUN: rm -rf %t; split-file %s %t

# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos %t/callee.s -o %t/callee_arm64.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t/callee.s -o %t/callee_x86_64.o

# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos %t/caller.s -o %t/caller_arm64.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t/caller.s -o %t/caller_x86_64.o

# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos %t/main.s -o %t/main_arm64.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t/main.s -o %t/main_x86_64.o

# RUN: llvm-ar rc %t/libcallee_arm64.a %t/callee_arm64.o
# RUN: llvm-ar r %t/libcallee_x86.a %t/callee_x86_64.o

# RUN: llvm-ar r %t/libcaller_arm64.a %t/caller_arm64.o
# RUN: llvm-ar r %t/libcaller_x86.a %t/caller_x86_64.o

## Symbol from the arm64 archive should be ignored even tho it appears before the x86 archive.
# RUN: %no-fatal-warnings-lld -map %t/x86_a.map -arch x86_64 %t/main_x86_64.o %t/libcallee_arm64.a %t/libcallee_x86.a %t/libcaller_x86.a -o %t/x86_a.out 2>&1 \
# RUN: | FileCheck -check-prefix=X86-WARNING %s

# RUN: %no-fatal-warnings-lld -map %t/x86_b.map -arch x86_64 %t/main_x86_64.o %t/libcallee_x86.a %t/libcallee_arm64.a %t/libcaller_x86.a -o %t/x86_b.out 2>&1 \
# RUN: | FileCheck -check-prefix=X86-WARNING %s

# RUN: %no-fatal-warnings-lld -map %t/arm64_a.map -arch arm64 %t/main_arm64.o %t/libcallee_x86.a %t/libcallee_arm64.a %t/libcaller_arm64.a -o %t/arm64_a.out 2>&1 \
# RUN: | FileCheck -check-prefix=ARM64-WARNING %s

# RUN: %no-fatal-warnings-lld -map %t/arm64_b.map -arch arm64 %t/main_arm64.o %t/libcallee_arm64.a %t/libcallee_x86.a %t/libcaller_arm64.a -o %t/arm64_b.out 2>&1 \
# RUN: | FileCheck -check-prefix=ARM64-WARNING %s

## Verify that the output doesn't take any symbol from the in-compat archive
# RUN: FileCheck --check-prefix=SYM-X86 %s --input-file=%t/x86_a.map
# RUN: FileCheck --check-prefix=SYM-X86 %s --input-file=%t/x86_b.map

# RUN: FileCheck --check-prefix=SYM-ARM64 %s --input-file=%t/arm64_a.map
# RUN: FileCheck --check-prefix=SYM-ARM64 %s --input-file=%t/arm64_b.map


# X86-WARNING: libcallee_arm64.a has architecture arm64 which is incompatible with target architecture x86_64

# ARM64-WARNING: libcallee_x86.a has architecture x86_64 which is incompatible with target architecture arm64

# SYM-X86-NOT: libcallee_arm64.a
# SYM-X86: {{.+}}main_x86_64.o
# SYM-X86: {{.+}}libcallee_x86.a(callee_x86_64.o)
# SYM-X86: {{.+}}libcaller_x86.a(caller_x86_64.o)

# SYM-ARM64-NOT: libcallee_x86.a
# SYM-ARM64: {{.+}}main_arm64.o
# SYM-ARM64: {{.+}}libcallee_arm64.a(callee_arm64.o)
# SYM-ARM64: {{.+}}libcaller_arm64.a(caller_arm64.o)


#--- callee.s
.globl _callee
_callee:
ret

#--- caller.s
.globl _caller
_caller:
.quad _callee
ret

#--- main.s
.globl _main
_main:
.quad _caller
ret
8 changes: 6 additions & 2 deletions lld/test/MachO/objc.s
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,19 @@

# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o

# RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC -ObjC
# RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC -ObjC
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=OBJC

# RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC2 -ObjC
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=OBJC

# RUN: %lld -lSystem %t/test.o -o %t/test --start-lib %t/no-objc.o %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/has-swift-proto.o %t/wrong-arch.o --end-lib -ObjC
# RUN: %no-fatal-warnings-lld -lSystem %t/test.o -o %t/test --start-lib %t/no-objc.o %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/has-swift-proto.o %t/wrong-arch.o --end-lib -ObjC 2>&1 \
# RUN: | FileCheck -check-prefix=WARNING %s
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=OBJC

# WARNING: {{.+}}wrong-arch.o has architecture armv7 which is incompatible with target architecture x86_64
# WARNING-NOT: {{.}}

# OBJC: Sections:
# OBJC-NEXT: Idx Name Size VMA Type
# OBJC-NEXT: 0 __text {{.*}} TEXT
Expand Down

0 comments on commit 5ba9063

Please sign in to comment.