Skip to content

Commit

Permalink
Add support for chained fixup load commands to MachOObjectFile
Browse files Browse the repository at this point in the history
This is part of a series of patches to upstream support for Mach-O chained fixups.

This patch adds support for parsing the chained fixup load command and
parsing the chained fixups header. It also puts into place the
abstract interface that will be used to iterate over the fixups.

Differential Revision: https://reviews.llvm.org/D113630
  • Loading branch information
adrian-prantl committed Feb 22, 2022
1 parent 621e2de commit a3bfb01
Show file tree
Hide file tree
Showing 7 changed files with 507 additions and 8 deletions.
26 changes: 25 additions & 1 deletion llvm/include/llvm/BinaryFormat/MachO.h
Expand Up @@ -255,7 +255,8 @@ enum BindType {
enum BindSpecialDylib {
BIND_SPECIAL_DYLIB_SELF = 0,
BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1,
BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2
BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2,
BIND_SPECIAL_DYLIB_WEAK_LOOKUP = -3
};

enum {
Expand Down Expand Up @@ -1001,6 +1002,19 @@ struct nlist_64 {
uint64_t n_value;
};

/// Structs for dyld chained fixups.
/// dyld_chained_fixups_header is the data pointed to by LC_DYLD_CHAINED_FIXUPS
/// load command.
struct dyld_chained_fixups_header {
uint32_t fixups_version; ///< 0
uint32_t starts_offset; ///< Offset of dyld_chained_starts_in_image.
uint32_t imports_offset; ///< Offset of imports table in chain_data.
uint32_t symbols_offset; ///< Offset of symbol strings in chain_data.
uint32_t imports_count; ///< Number of imported symbol names.
uint32_t imports_format; ///< DYLD_CHAINED_IMPORT*
uint32_t symbols_format; ///< 0 => uncompressed, 1 => zlib compressed
};

// Byte order swapping functions for MachO structs

inline void swapStruct(fat_header &mh) {
Expand Down Expand Up @@ -2008,6 +2022,16 @@ union alignas(4) macho_load_command {
};
LLVM_PACKED_END

inline void swapStruct(dyld_chained_fixups_header &C) {
sys::swapByteOrder(C.fixups_version);
sys::swapByteOrder(C.starts_offset);
sys::swapByteOrder(C.imports_offset);
sys::swapByteOrder(C.symbols_offset);
sys::swapByteOrder(C.imports_count);
sys::swapByteOrder(C.imports_format);
sys::swapByteOrder(C.symbols_format);
}

/* code signing attributes of a process */

enum CodeSignAttrs {
Expand Down
126 changes: 126 additions & 0 deletions llvm/include/llvm/Object/MachO.h
Expand Up @@ -260,6 +260,126 @@ class MachOBindEntry {
};
using bind_iterator = content_iterator<MachOBindEntry>;

/// ChainedFixupTarget holds all the information about an external symbol
/// necessary to bind this binary to that symbol. These values are referenced
/// indirectly by chained fixup binds. This structure captures values from all
/// import and symbol formats.
///
/// Be aware there are two notions of weak here:
/// WeakImport == true
/// The associated bind may be set to 0 if this symbol is missing from its
/// parent library. This is called a "weak import."
/// LibOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP
/// This symbol may be coalesced with other libraries vending the same
/// symbol. E.g., C++'s "operator new". This is called a "weak bind."
struct ChainedFixupTarget {
public:
ChainedFixupTarget(int LibOrdinal, StringRef Symbol, uint64_t Addend,
bool WeakImport)
: LibOrdinal(LibOrdinal), SymbolName(Symbol), Addend(Addend),
WeakImport(WeakImport) {}

int libOrdinal() { return LibOrdinal; }
StringRef symbolName() { return SymbolName; }
uint64_t addend() { return Addend; }
bool weakImport() { return WeakImport; }
bool weakBind() {
return LibOrdinal == MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
}

private:
int LibOrdinal;
StringRef SymbolName;
uint64_t Addend;
bool WeakImport;
};

/// MachOAbstractFixupEntry is an abstract class representing a fixup in a
/// MH_DYLDLINK file. Fixups generally represent rebases and binds. Binds also
/// subdivide into additional subtypes (weak, lazy, reexport).
///
/// The two concrete subclasses of MachOAbstractFixupEntry are:
///
/// MachORebaseBindEntry - for dyld opcode-based tables, including threaded-
/// rebase, where rebases are mixed in with other
/// bind opcodes.
/// MachOChainedFixupEntry - for pointer chains embedded in data pages.
class MachOAbstractFixupEntry {
public:
MachOAbstractFixupEntry(Error *Err, const MachOObjectFile *O);

int32_t segmentIndex() const;
uint64_t segmentOffset() const;
uint64_t segmentAddress() const;
StringRef segmentName() const;
StringRef sectionName() const;
StringRef typeName() const;
StringRef symbolName() const;
uint32_t flags() const;
int64_t addend() const;
int ordinal() const;

/// \return the location of this fixup as a VM Address. For the VM
/// Address this fixup is pointing to, use pointerValue().
uint64_t address() const;

/// \return the VM Address pointed to by this fixup. Use
/// pointerValue() to compare against other VM Addresses, such as
/// section addresses or segment vmaddrs.
uint64_t pointerValue() const { return PointerValue; }

/// \return the raw "on-disk" representation of the fixup. For
/// Threaded rebases and Chained pointers these values are generally
/// encoded into various different pointer formats. This value is
/// exposed in API for tools that want to display and annotate the
/// raw bits.
uint64_t rawValue() const { return RawValue; }

void moveNext();

protected:
Error *E;
const MachOObjectFile *O;
uint64_t SegmentOffset = 0;
int32_t SegmentIndex = -1;
StringRef SymbolName;
int32_t Ordinal = 0;
uint32_t Flags = 0;
int64_t Addend = 0;
uint64_t PointerValue = 0;
uint64_t RawValue = 0;
bool Done = false;

void moveToFirst();
void moveToEnd();

/// \return the vm address of the start of __TEXT segment.
uint64_t textAddress() const { return TextAddress; }

private:
uint64_t TextAddress;
};

class MachOChainedFixupEntry : public MachOAbstractFixupEntry {
public:
enum class FixupKind { All, Bind, WeakBind, Rebase };

MachOChainedFixupEntry(Error *Err, const MachOObjectFile *O, FixupKind Kind,
bool Parse);

bool operator==(const MachOChainedFixupEntry &) const;

void moveNext();
void moveToFirst();
void moveToEnd();

private:
std::vector<ChainedFixupTarget> FixupTargets;
uint32_t FixupIndex = 0;
FixupKind Kind;
};
using fixup_iterator = content_iterator<MachOChainedFixupEntry>;

class MachOObjectFile : public ObjectFile {
public:
struct LoadCommandInfo {
Expand Down Expand Up @@ -402,6 +522,10 @@ class MachOObjectFile : public ObjectFile {
/// For use iterating over all bind table entries.
iterator_range<bind_iterator> bindTable(Error &Err);

/// For iterating over all chained fixups.
iterator_range<fixup_iterator>
fixupTable(Error &Err, MachOChainedFixupEntry::FixupKind Kind);

/// For use iterating over all lazy bind table entries.
iterator_range<bind_iterator> lazyBindTable(Error &Err);

Expand Down Expand Up @@ -562,6 +686,7 @@ class MachOObjectFile : public ObjectFile {
ArrayRef<uint8_t> getDyldInfoBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const;
Expected<std::vector<ChainedFixupTarget>> getDyldChainedFixupTargets() const;
ArrayRef<uint8_t> getDyldInfoExportsTrie() const;
SmallVector<uint64_t> getFunctionStarts() const;
ArrayRef<uint8_t> getUuid() const;
Expand Down Expand Up @@ -691,6 +816,7 @@ class MachOObjectFile : public ObjectFile {
const char *LinkOptHintsLoadCmd = nullptr;
const char *DyldInfoLoadCmd = nullptr;
const char *FuncStartsLoadCmd = nullptr;
const char *DyldChainedFixupsLoadCmd = nullptr;
const char *UuidLoadCmd = nullptr;
bool HasPageZeroSegment = false;
};
Expand Down
161 changes: 158 additions & 3 deletions llvm/lib/Object/MachOObjectFile.cpp
Expand Up @@ -1380,6 +1380,11 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd,
"LC_DYLD_INFO_ONLY", Elements)))
return;
} else if (Load.C.cmd == MachO::LC_DYLD_CHAINED_FIXUPS) {
if ((Err = checkLinkeditDataCommand(
*this, Load, I, &DyldChainedFixupsLoadCmd,
"LC_DYLD_CHAINED_FIXUPS", Elements, "chained fixups")))
return;
} else if (Load.C.cmd == MachO::LC_UUID) {
if (Load.C.cmdsize != sizeof(MachO::uuid_command)) {
Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect "
Expand Down Expand Up @@ -1595,9 +1600,9 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
return;
// Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported.
} else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) {
if ((Err = checkTwoLevelHintsCommand(*this, Load, I,
&TwoLevelHintsLoadCmd, Elements)))
return;
if ((Err = checkTwoLevelHintsCommand(*this, Load, I,
&TwoLevelHintsLoadCmd, Elements)))
return;
} else if (Load.C.cmd == MachO::LC_IDENT) {
// Note: LC_IDENT is ignored.
continue;
Expand Down Expand Up @@ -3185,6 +3190,106 @@ iterator_range<export_iterator> MachOObjectFile::exports(Error &Err) const {
return exports(Err, getDyldInfoExportsTrie(), this);
}

MachOAbstractFixupEntry::MachOAbstractFixupEntry(Error *E,
const MachOObjectFile *O)
: E(E), O(O) {
// Cache the vmaddress of __TEXT
for (const auto &Command : O->load_commands()) {
if (Command.C.cmd == MachO::LC_SEGMENT) {
MachO::segment_command SLC = O->getSegmentLoadCommand(Command);
if (StringRef(SLC.segname) == StringRef("__TEXT")) {
TextAddress = SLC.vmaddr;
break;
}
} else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 SLC_64 = O->getSegment64LoadCommand(Command);
if (StringRef(SLC_64.segname) == StringRef("__TEXT")) {
TextAddress = SLC_64.vmaddr;
break;
}
}
}
}

int32_t MachOAbstractFixupEntry::segmentIndex() const { return SegmentIndex; }

uint64_t MachOAbstractFixupEntry::segmentOffset() const {
return SegmentOffset;
}

uint64_t MachOAbstractFixupEntry::segmentAddress() const {
return O->BindRebaseAddress(SegmentIndex, 0);
}

StringRef MachOAbstractFixupEntry::segmentName() const {
return O->BindRebaseSegmentName(SegmentIndex);
}

StringRef MachOAbstractFixupEntry::sectionName() const {
return O->BindRebaseSectionName(SegmentIndex, SegmentOffset);
}

uint64_t MachOAbstractFixupEntry::address() const {
return O->BindRebaseAddress(SegmentIndex, SegmentOffset);
}

StringRef MachOAbstractFixupEntry::symbolName() const { return SymbolName; }

int64_t MachOAbstractFixupEntry::addend() const { return Addend; }

uint32_t MachOAbstractFixupEntry::flags() const { return Flags; }

int MachOAbstractFixupEntry::ordinal() const { return Ordinal; }

StringRef MachOAbstractFixupEntry::typeName() const { return "unknown"; }

void MachOAbstractFixupEntry::moveToFirst() {
SegmentOffset = 0;
SegmentIndex = -1;
Ordinal = 0;
Flags = 0;
Addend = 0;
Done = false;
}

void MachOAbstractFixupEntry::moveToEnd() { Done = true; }

MachOChainedFixupEntry::MachOChainedFixupEntry(Error *E,
const MachOObjectFile *O,
FixupKind Kind, bool Parse)
: MachOAbstractFixupEntry(E, O), Kind(Kind) {
ErrorAsOutParameter e(E);
if (Parse) {
if (auto FixupTargetsOrErr = O->getDyldChainedFixupTargets())
FixupTargets = *FixupTargetsOrErr;
else {
*E = FixupTargetsOrErr.takeError();
return;
}
}
}

void MachOChainedFixupEntry::moveToFirst() {
MachOAbstractFixupEntry::moveToFirst();
FixupIndex = 0;
moveNext();
}

void MachOChainedFixupEntry::moveToEnd() {
MachOAbstractFixupEntry::moveToEnd();
}

void MachOChainedFixupEntry::moveNext() { Done = true; }

bool MachOChainedFixupEntry::operator==(
const MachOChainedFixupEntry &Other) const {
if (Done == Other.Done)
return true;
if ((FixupIndex == Other.FixupIndex))
return true;
return false;
}

MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O,
ArrayRef<uint8_t> Bytes, bool is64Bit)
: E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()),
Expand Down Expand Up @@ -4193,6 +4298,18 @@ iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) {
MachOBindEntry::Kind::Weak);
}

iterator_range<fixup_iterator>
MachOObjectFile::fixupTable(Error &Err,
MachOChainedFixupEntry::FixupKind Kind) {
MachOChainedFixupEntry Start(&Err, this, Kind, true);
Start.moveToFirst();

MachOChainedFixupEntry Finish(&Err, this, Kind, false);
Finish.moveToEnd();

return make_range(fixup_iterator(Start), fixup_iterator(Finish));
}

MachOObjectFile::load_command_iterator
MachOObjectFile::begin_load_commands() const {
return LoadCommands.begin();
Expand Down Expand Up @@ -4648,6 +4765,44 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
}

Expected<std::vector<ChainedFixupTarget>>
MachOObjectFile::getDyldChainedFixupTargets() const {
// Load the dyld chained fixups load command.
if (!DyldChainedFixupsLoadCmd)
return std::vector<ChainedFixupTarget>();
auto DyldChainedFixupsOrErr = getStructOrErr<MachO::linkedit_data_command>(
*this, DyldChainedFixupsLoadCmd);
if (!DyldChainedFixupsOrErr)
return DyldChainedFixupsOrErr.takeError();
MachO::linkedit_data_command DyldChainedFixups = DyldChainedFixupsOrErr.get();

// If the load command is present but the data offset has been zeroed out,
// as is the case for dylib stubs, return an empty list of targets.
uint64_t CFHeaderOffset = DyldChainedFixups.dataoff;
std::vector<ChainedFixupTarget> Targets;
if (CFHeaderOffset == 0)
return Targets;

// Load the dyld chained fixups header.
const char *CFHeaderPtr = getPtr(*this, CFHeaderOffset);
auto CFHeaderOrErr =
getStructOrErr<MachO::dyld_chained_fixups_header>(*this, CFHeaderPtr);
if (!CFHeaderOrErr)
return CFHeaderOrErr.takeError();
MachO::dyld_chained_fixups_header CFHeader = CFHeaderOrErr.get();

// Reject unknown chained fixup formats.
if (CFHeader.fixups_version != 0)
return malformedError(Twine("bad chained fixups: unknown version: ") +
Twine(CFHeader.fixups_version));
if (CFHeader.imports_format < 1 || CFHeader.imports_format > 3)
return malformedError(
Twine("bad chained fixups: unknown imports format: ") +
Twine(CFHeader.imports_format));

return Targets;
}

ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
if (!DyldInfoLoadCmd)
return None;
Expand Down

0 comments on commit a3bfb01

Please sign in to comment.