Skip to content

Commit

Permalink
[lld-macho] Extend SyntheticSections to cover all segment load commands
Browse files Browse the repository at this point in the history
Previously, the special segments `__PAGEZERO` and `__LINKEDIT` were
implemented as special LoadCommands. This diff implements them using
special sections instead which have an `isHidden()` attribute. We do not
emit section headers for hidden sections, but we use their addresses and
file offsets to determine that of their containing segments. In addition
to allowing us to share more segment-related code, this refactor is also
important for the next step of emitting dylibs:

1) dylibs don't have segments like __PAGEZERO, so we need an easy way of
   omitting them w/o messing up segment indices
2) Unlike the kernel, which is happy to run an executable with
   out-of-order segments, dyld requires dylibs to have their segment
   load commands arranged in increasing address order. The refactor
   makes it easier to implement sorting of sections and segments.

Differential Revision: https://reviews.llvm.org/D76839
  • Loading branch information
int3 authored and smeenai committed Apr 27, 2020
1 parent 64d44ae commit 6f63216
Show file tree
Hide file tree
Showing 11 changed files with 467 additions and 239 deletions.
12 changes: 0 additions & 12 deletions lld/MachO/Driver.cpp
Expand Up @@ -139,10 +139,6 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
return !errorCount();
}

getOrCreateOutputSegment("__TEXT", VM_PROT_READ | VM_PROT_EXECUTE);
getOrCreateOutputSegment("__DATA", VM_PROT_READ | VM_PROT_WRITE);
getOrCreateOutputSegment("__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE);

for (opt::Arg *arg : args) {
switch (arg->getOption().getID()) {
case OPT_INPUT:
Expand All @@ -167,14 +163,6 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
for (InputSection *sec : file->sections)
inputSections.push_back(sec);

// Add input sections to output segments.
for (InputSection *isec : inputSections) {
OutputSegment *os =
getOrCreateOutputSegment(isec->segname, VM_PROT_READ | VM_PROT_WRITE);
isec->parent = os;
os->sections[isec->name].push_back(isec);
}

// Write to an output file.
writeResult();

Expand Down
5 changes: 5 additions & 0 deletions lld/MachO/InputSection.cpp
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "InputSection.h"
#include "OutputSegment.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
Expand All @@ -20,6 +21,10 @@ using namespace lld::macho;

std::vector<InputSection *> macho::inputSections;

uint64_t InputSection::getFileOffset() const {
return parent->fileOff + addr - parent->firstSection()->addr;
}

void InputSection::writeTo(uint8_t *buf) {
memcpy(buf, data.data(), data.size());

Expand Down
12 changes: 11 additions & 1 deletion lld/MachO/InputSection.h
Expand Up @@ -32,17 +32,27 @@ struct Reloc {
class InputSection {
public:
virtual ~InputSection() = default;
virtual void writeTo(uint8_t *buf);
virtual size_t getSize() const { return data.size(); }
virtual uint64_t getFileSize() const { return getSize(); }
uint64_t getFileOffset() const;
// Don't emit section_64 headers for hidden sections.
virtual bool isHidden() const { return false; }
// Unneeded sections are omitted entirely (header and body).
virtual bool isNeeded() const { return true; }
virtual void writeTo(uint8_t *buf);

InputFile *file = nullptr;
OutputSegment *parent = nullptr;
StringRef name;
StringRef segname;

ArrayRef<uint8_t> data;

// TODO these properties ought to live in an OutputSection class.
// Move them once available.
uint64_t addr = 0;
uint32_t align = 1;
uint32_t sectionIndex = 0;
uint32_t flags = 0;

std::vector<Reloc> relocs;
Expand Down
58 changes: 46 additions & 12 deletions lld/MachO/OutputSegment.cpp
Expand Up @@ -7,24 +7,58 @@
//===----------------------------------------------------------------------===//

#include "OutputSegment.h"
#include "InputSection.h"

#include "lld/Common/Memory.h"
#include "llvm/BinaryFormat/MachO.h"

using namespace llvm;
using namespace llvm::MachO;
using namespace lld;
using namespace lld::macho;

static uint32_t initProt(StringRef name) {
if (name == segment_names::text)
return VM_PROT_READ | VM_PROT_EXECUTE;
if (name == segment_names::pageZero)
return 0;
if (name == segment_names::linkEdit)
return VM_PROT_READ;
return VM_PROT_READ | VM_PROT_WRITE;
}

static uint32_t maxProt(StringRef name) {
if (name == segment_names::pageZero)
return 0;
return VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
}

void OutputSegment::addSection(InputSection *isec) {
isec->parent = this;
std::vector<InputSection *> &vec = sections[isec->name];
if (vec.empty() && !isec->isHidden()) {
++numNonHiddenSections;
}
vec.push_back(isec);
}

static llvm::DenseMap<StringRef, OutputSegment *> nameToOutputSegment;
std::vector<OutputSegment *> macho::outputSegments;

OutputSegment *macho::getOrCreateOutputSegment(StringRef name, uint32_t perms) {
for (OutputSegment *os : outputSegments)
if (os->name == name)
// TODO: assert that os->perms == perms, once we figure out what to do
// about default-created segments.
return os;

auto *os = make<OutputSegment>();
os->name = name;
os->perms = perms;
outputSegments.push_back(os);
return os;
OutputSegment *macho::getOutputSegment(StringRef name) {
return nameToOutputSegment.lookup(name);
}

OutputSegment *macho::getOrCreateOutputSegment(StringRef name) {
OutputSegment *&segRef = nameToOutputSegment[name];
if (segRef != nullptr)
return segRef;

segRef = make<OutputSegment>();
segRef->name = name;
segRef->maxProt = maxProt(name);
segRef->initProt = initProt(name);

outputSegments.push_back(segRef);
return segRef;
}
29 changes: 27 additions & 2 deletions lld/MachO/OutputSegment.h
Expand Up @@ -15,6 +15,14 @@
namespace lld {
namespace macho {

namespace segment_names {

constexpr const char *text = "__TEXT";
constexpr const char *pageZero = "__PAGEZERO";
constexpr const char *linkEdit = "__LINKEDIT";

} // namespace segment_names

class InputSection;

class OutputSegment {
Expand All @@ -23,15 +31,32 @@ class OutputSegment {

InputSection *lastSection() const { return sections.back().second.back(); }

bool isNeeded() const {
return !sections.empty() || name == segment_names::linkEdit;
}

void addSection(InputSection *);

const llvm::MapVector<StringRef, std::vector<InputSection *>> &
getSections() const {
return sections;
}

uint64_t fileOff = 0;
StringRef name;
uint32_t perms;
uint32_t numNonHiddenSections = 0;
uint32_t maxProt = 0;
uint32_t initProt = 0;
uint8_t index;

private:
llvm::MapVector<StringRef, std::vector<InputSection *>> sections;
};

extern std::vector<OutputSegment *> outputSegments;

OutputSegment *getOrCreateOutputSegment(StringRef name, uint32_t perms);
OutputSegment *getOutputSegment(StringRef name);
OutputSegment *getOrCreateOutputSegment(StringRef name);

} // namespace macho
} // namespace lld
Expand Down
98 changes: 98 additions & 0 deletions lld/MachO/SyntheticSections.cpp
Expand Up @@ -7,13 +7,60 @@
//===----------------------------------------------------------------------===//

#include "SyntheticSections.h"
#include "InputFiles.h"
#include "OutputSegment.h"
#include "Symbols.h"
#include "Writer.h"

#include "lld/Common/ErrorHandler.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/LEB128.h"

using namespace llvm;
using namespace llvm::MachO;
using namespace llvm::support;

namespace lld {
namespace macho {

MachHeaderSection::MachHeaderSection() {
// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
// from the beginning of the file (i.e. the header).
segname = segment_names::text;
name = section_names::header;
}

void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
loadCommands.push_back(lc);
sizeOfCmds += lc->getSize();
}

size_t MachHeaderSection::getSize() const {
return sizeof(mach_header_64) + sizeOfCmds;
}

void MachHeaderSection::writeTo(uint8_t *buf) {
auto *hdr = reinterpret_cast<mach_header_64 *>(buf);
hdr->magic = MH_MAGIC_64;
hdr->cputype = CPU_TYPE_X86_64;
hdr->cpusubtype = CPU_SUBTYPE_X86_64_ALL | CPU_SUBTYPE_LIB64;
hdr->filetype = MH_EXECUTE;
hdr->ncmds = loadCommands.size();
hdr->sizeofcmds = sizeOfCmds;
hdr->flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL;

uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
for (LoadCommand *lc : loadCommands) {
lc->writeTo(p);
p += lc->getSize();
}
}

PageZeroSection::PageZeroSection() {
segname = segment_names::pageZero;
name = section_names::pageZero;
}

GotSection::GotSection() {
segname = "__DATA_CONST";
name = "__got";
Expand All @@ -30,6 +77,57 @@ void GotSection::addEntry(DylibSymbol &sym) {
}
}

BindingSection::BindingSection() {
segname = segment_names::linkEdit;
name = section_names::binding;
}

bool BindingSection::isNeeded() const { return in.got->isNeeded(); }

// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
// interprets to update a record with the following fields:
// * segment index (of the segment to write the symbol addresses to, typically
// the __DATA_CONST segment which contains the GOT)
// * offset within the segment, indicating the next location to write a binding
// * symbol type
// * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
// * symbol name
// * addend
// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
// a symbol in the GOT, and increments the segment offset to point to the next
// entry. It does *not* clear the record state after doing the bind, so
// subsequent opcodes only need to encode the differences between bindings.
void BindingSection::finalizeContents() {
if (!isNeeded())
return;

raw_svector_ostream os{contents};
os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
in.got->parent->index);
encodeULEB128(in.got->addr - in.got->parent->firstSection()->addr, os);
for (const DylibSymbol *sym : in.got->getEntries()) {
// TODO: Implement compact encoding -- we only need to encode the
// differences between consecutive symbol entries.
if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) {
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
sym->file->ordinal);
} else {
error("TODO: Support larger dylib symbol ordinals");
continue;
}
os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
<< sym->getName() << '\0'
<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
}

os << static_cast<uint8_t>(BIND_OPCODE_DONE);
}

void BindingSection::writeTo(uint8_t *buf) {
memcpy(buf, contents.data(), contents.size());
}

InStruct in;

} // namespace macho
Expand Down
55 changes: 54 additions & 1 deletion lld/MachO/SyntheticSections.h
Expand Up @@ -13,10 +13,45 @@
#include "Target.h"
#include "llvm/ADT/SetVector.h"

using namespace llvm::MachO;

namespace lld {
namespace macho {

namespace section_names {

constexpr const char *pageZero = "__pagezero";
constexpr const char *header = "__mach_header";
constexpr const char *binding = "__binding";

} // namespace section_names

class DylibSymbol;
class LoadCommand;

// The header of the Mach-O file, which must have a file offset of zero.
class MachHeaderSection : public InputSection {
public:
MachHeaderSection();
void addLoadCommand(LoadCommand *);
bool isHidden() const override { return true; }
size_t getSize() const override;
void writeTo(uint8_t *buf) override;

private:
std::vector<LoadCommand *> loadCommands;
uint32_t sizeOfCmds = 0;
};

// A hidden section that exists solely for the purpose of creating the
// __PAGEZERO segment, which is used to catch null pointer dereferences.
class PageZeroSection : public InputSection {
public:
PageZeroSection();
bool isHidden() const override { return true; }
size_t getSize() const override { return ImageBase; }
uint64_t getFileSize() const override { return 0; }
};

// This section will be populated by dyld with addresses to non-lazily-loaded
// dylib symbols.
Expand All @@ -31,6 +66,8 @@ class GotSection : public InputSection {

size_t getSize() const override { return entries.size() * WordSize; }

bool isNeeded() const override { return !entries.empty(); }

void writeTo(uint8_t *buf) override {
// Nothing to write, GOT contains all zeros at link time; it's populated at
// runtime by dyld.
Expand All @@ -40,8 +77,24 @@ class GotSection : public InputSection {
llvm::SetVector<const DylibSymbol *> entries;
};

// Stores bind opcodes for telling dyld which symbols to load non-lazily.
class BindingSection : public InputSection {
public:
BindingSection();
void finalizeContents();
size_t getSize() const override { return contents.size(); }
// Like other sections in __LINKEDIT, the binding section is special: its
// offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in
// section headers.
bool isHidden() const override { return true; }
bool isNeeded() const override;
void writeTo(uint8_t *buf) override;

SmallVector<char, 128> contents;
};

struct InStruct {
GotSection *got;
GotSection *got = nullptr;
};

extern InStruct in;
Expand Down

0 comments on commit 6f63216

Please sign in to comment.