Skip to content

Commit

Permalink
[PPC64] Long branch thunks.
Browse files Browse the repository at this point in the history
On PowerPC64, when a function call offset is too large to encode in a call
instruction the address is stored in a table in the data segment. A thunk is
used to load the branch target address from the table relative to the
TOC-pointer and indirectly branch to the callee. When linking position-dependent
code the addresses are stored directly in the table, for position-independent
code the table is allocated and filled in at load time by the dynamic linker.

For position-independent code the branch targets could have gone in the .got.plt
but using the .branch_lt section for both position dependent and position
independent binaries keeps it consitent and helps keep this PPC64 specific logic
seperated from the target-independent code handling the .got.plt.

Differential Revision: https://reviews.llvm.org/D53408

llvm-svn: 346877
  • Loading branch information
Sean Fertile committed Nov 14, 2018
1 parent 36eef92 commit 614dc11
Show file tree
Hide file tree
Showing 13 changed files with 532 additions and 62 deletions.
26 changes: 23 additions & 3 deletions lld/ELF/Arch/PPC64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class PPC64 final : public TargetInfo {
void writeGotHeader(uint8_t *Buf) const override;
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const override;
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const override;
void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
Expand Down Expand Up @@ -709,9 +710,28 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {

bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const {
// If a function is in the plt it needs to be called through
// a call stub.
return Type == R_PPC64_REL24 && S.isInPlt();
// The only call relocation we currently support is the REL24 type.
if (Type != R_PPC64_REL24)
return false;

// If a function is in the Plt it needs to be called with a call-stub.
if (S.isInPlt())
return true;

// If a symbol is a weak undefined and we are compiling an executable
// it doesn't need a range-extending thunk since it can't be called.
if (S.isUndefWeak() && !Config->Shared)
return false;

// If the offset exceeds the range of the branch type then it will need
// a range-extending thunk.
return !inBranchRange(Type, BranchAddr, S.getVA());
}

bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
assert(Type == R_PPC64_REL24 && "Unexpected relocation type used in branch");
int64_t Offset = Dst - Src;
return isInt<26>(Offset);
}

RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
Expand Down
1 change: 1 addition & 0 deletions lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ static void replaceWithDefined(Symbol &Sym, SectionBase *Sec, uint64_t Value,
Sym.PltIndex = Old.PltIndex;
Sym.GotIndex = Old.GotIndex;
Sym.VerdefIndex = Old.VerdefIndex;
Sym.PPC64BranchltIndex = Old.PPC64BranchltIndex;
Sym.IsPreemptible = true;
Sym.ExportDynamic = true;
Sym.IsUsedInRegularObj = true;
Expand Down
11 changes: 11 additions & 0 deletions lld/ELF/Symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ uint64_t Symbol::getGotPltOffset() const {
return (PltIndex + Target->GotPltHeaderEntriesNum) * Target->GotPltEntrySize;
}

uint64_t Symbol::getPPC64LongBranchOffset() const {
assert(PPC64BranchltIndex != 0xffff);
return PPC64BranchltIndex * Target->GotPltEntrySize;
}

uint64_t Symbol::getPltVA() const {
if (this->IsInIplt)
return In.Iplt->getVA() + PltIndex * Target->PltEntrySize;
Expand All @@ -149,6 +154,12 @@ uint64_t Symbol::getPltOffset() const {
return Target->getPltEntryOffset(PltIndex);
}

uint64_t Symbol::getPPC64LongBranchTableVA() const {
assert(PPC64BranchltIndex != 0xffff);
return In.PPC64LongBranchTarget->getVA() +
PPC64BranchltIndex * Target->GotPltEntrySize;
}

uint64_t Symbol::getSize() const {
if (const auto *DR = dyn_cast<Defined>(this))
return DR->Size;
Expand Down
7 changes: 7 additions & 0 deletions lld/ELF/Symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class Symbol {
uint32_t DynsymIndex = 0;
uint32_t GotIndex = -1;
uint32_t PltIndex = -1;

uint32_t GlobalDynIndex = -1;

// This field is a index to the symbol's version definition.
Expand All @@ -87,6 +88,9 @@ class Symbol {
// Version definition index.
uint16_t VersionId;

// An index into the .branch_lt section on PPC64.
uint16_t PPC64BranchltIndex = -1;

// Symbol binding. This is not overwritten by replaceSymbol to track
// changes during resolution. In particular:
// - An undefined weak is still weak when it resolves to a shared library.
Expand Down Expand Up @@ -159,6 +163,7 @@ class Symbol {

bool isInGot() const { return GotIndex != -1U; }
bool isInPlt() const { return PltIndex != -1U; }
bool isInPPC64Branchlt() const { return PPC64BranchltIndex != 0xffff; }

uint64_t getVA(int64_t Addend = 0) const;

Expand All @@ -168,6 +173,8 @@ class Symbol {
uint64_t getGotPltVA() const;
uint64_t getPltVA() const;
uint64_t getPltOffset() const;
uint64_t getPPC64LongBranchTableVA() const;
uint64_t getPPC64LongBranchOffset() const;
uint64_t getSize() const;
OutputSection *getOutputSection() const;

Expand Down
47 changes: 47 additions & 0 deletions lld/ELF/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3070,6 +3070,53 @@ bool ThunkSection::assignOffsets() {
return Changed;
}

// If linking position-dependent code then the table will store the addresses
// directly in the binary so the section has type SHT_PROGBITS. If linking
// position-independent code the section has type SHT_NOBITS since it will be
// allocated and filled in by the dynamic linker.
PPC64LongBranchTargetSection::PPC64LongBranchTargetSection()
: SyntheticSection(SHF_ALLOC | SHF_WRITE,
Config->Pic ? SHT_NOBITS : SHT_PROGBITS, 8,
".branch_lt") {}

void PPC64LongBranchTargetSection::addEntry(Symbol &Sym) {
assert(Sym.PPC64BranchltIndex == 0xffff);
Sym.PPC64BranchltIndex = Entries.size();
Entries.push_back(&Sym);
}

size_t PPC64LongBranchTargetSection::getSize() const {
return Entries.size() * 8;
}

void PPC64LongBranchTargetSection::writeTo(uint8_t *Buf) {
assert(Target->GotPltEntrySize == 8);
// If linking non-pic we have the final addresses of the targets and they get
// written to the table directly. For pic the dynamic linker will allocate
// the section and fill it it.
if (Config->Pic)
return;

for (const Symbol *Sym : Entries) {
assert(Sym->getVA());
// Need calls to branch to the local entry-point since a long-branch
// must be a local-call.
write64(Buf,
Sym->getVA() + getPPC64GlobalEntryToLocalEntryOffset(Sym->StOther));
Buf += Target->GotPltEntrySize;
}
}

bool PPC64LongBranchTargetSection::empty() const {
// `removeUnusedSyntheticSections()` is called before thunk allocation which
// is too early to determine if this section will be empty or not. We need
// Finalized to keep the section alive until after thunk creation. Finalized
// only gets set to true once `finalizeSections()` is called after thunk
// creation. Becuase of this, if we don't create any long-branch thunks we end
// up with an empty .branch_lt section in the binary.
return Finalized && Entries.empty();
}

InStruct elf::In;

template GdbIndexSection *GdbIndexSection::create<ELF32LE>();
Expand Down
20 changes: 20 additions & 0 deletions lld/ELF/SyntheticSections.h
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,25 @@ class ThunkSection : public SyntheticSection {
size_t Size = 0;
};

// This section is used to store the addresses of functions that are called
// in range-extending thunks on PowerPC64. When producing position dependant
// code the addresses are link-time constants and the table is written out to
// the binary. When producing position-dependant code the table is allocated and
// filled in by the dynamic linker.
class PPC64LongBranchTargetSection final : public SyntheticSection {
public:
PPC64LongBranchTargetSection();
void addEntry(Symbol &Sym);
size_t getSize() const override;
void writeTo(uint8_t *Buf) override;
bool empty() const override;
void finalizeContents() override { Finalized = true; }

private:
std::vector<const Symbol *> Entries;
bool Finalized = false;
};

InputSection *createInterpSection();
MergeInputSection *createCommentSection();
template <class ELFT> void splitSections();
Expand All @@ -990,6 +1009,7 @@ struct InStruct {
GotSection *Got;
GotPltSection *GotPlt;
IgotPltSection *IgotPlt;
PPC64LongBranchTargetSection *PPC64LongBranchTarget;
MipsGotSection *MipsGot;
MipsRldMapSection *MipsRldMap;
PltSection *Plt;
Expand Down
83 changes: 71 additions & 12 deletions lld/ELF/Thunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,46 @@ class PPC64PltCallStub final : public Thunk {
void addSymbols(ThunkSection &IS) override;
};

// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte
// alignment. This gives a possible 26 bits of 'reach'. If the call offset is
// larger then that we need to emit a long-branch thunk. The target address
// of the callee is stored in a table to be accessed TOC-relative. Since the
// call must be local (a non-local call will have a PltCallStub instead) the
// table stores the address of the callee's local entry point. For
// position-independent code a corresponding relative dynamic relocation is
// used.
class PPC64LongBranchThunk : public Thunk {
public:
uint32_t size() override { return 16; }
void writeTo(uint8_t *Buf) override;
void addSymbols(ThunkSection &IS) override;

protected:
PPC64LongBranchThunk(Symbol &Dest) : Thunk(Dest) {}
};

class PPC64PILongBranchThunk final : public PPC64LongBranchThunk {
public:
PPC64PILongBranchThunk(Symbol &Dest) : PPC64LongBranchThunk(Dest) {
assert(!Dest.IsPreemptible);
if (Dest.isInPPC64Branchlt())
return;

In.PPC64LongBranchTarget->addEntry(Dest);
In.RelaDyn->addReloc({Target->RelativeRel, In.PPC64LongBranchTarget,
Dest.getPPC64LongBranchOffset(), true, &Dest,
getPPC64GlobalEntryToLocalEntryOffset(Dest.StOther)});
}
};

class PPC64PDLongBranchThunk final : public PPC64LongBranchThunk {
public:
PPC64PDLongBranchThunk(Symbol &Dest) : PPC64LongBranchThunk(Dest) {
if (!Dest.isInPPC64Branchlt())
In.PPC64LongBranchTarget->addEntry(Dest);
}
};

} // end anonymous namespace

Defined *Thunk::addSymbol(StringRef Name, uint8_t Type, uint64_t Value,
Expand Down Expand Up @@ -573,17 +613,21 @@ InputSection *MicroMipsR6Thunk::getTargetInputSection() const {
return dyn_cast<InputSection>(DR.Section);
}

void PPC64PltCallStub::writeTo(uint8_t *Buf) {
int64_t Off = Destination.getGotPltVA() - getPPC64TocBase();
// Need to add 0x8000 to offset to account for the low bits being signed.
uint16_t OffHa = (Off + 0x8000) >> 16;
uint16_t OffLo = Off;
static void writePPCLoadAndBranch(uint8_t *Buf, int64_t Offset) {
uint16_t OffHa = (Offset + 0x8000) >> 16;
uint16_t OffLo = Offset & 0xffff;

write32(Buf + 0, 0xf8410018); // std r2,24(r1)
write32(Buf + 4, 0x3d820000 | OffHa); // addis r12,r2, X@plt@to@ha
write32(Buf + 8, 0xe98c0000 | OffLo); // ld r12,X@plt@toc@l(r12)
write32(Buf + 12, 0x7d8903a6); // mtctr r12
write32(Buf + 16, 0x4e800420); // bctr
write32(Buf + 0, 0x3d820000 | OffHa); // addis r12, r2, OffHa
write32(Buf + 4, 0xe98c0000 | OffLo); // ld r12, OffLo(r12)
write32(Buf + 8, 0x7d8903a6); // mtctr r12
write32(Buf + 12, 0x4e800420); // bctr
}

void PPC64PltCallStub::writeTo(uint8_t *Buf) {
int64_t Offset = Destination.getGotPltVA() - getPPC64TocBase();
// Save the TOC pointer to the save-slot reserved in the call frame.
write32(Buf + 0, 0xf8410018); // std r2,24(r1)
writePPCLoadAndBranch(Buf + 4, Offset);
}

void PPC64PltCallStub::addSymbols(ThunkSection &IS) {
Expand All @@ -592,6 +636,16 @@ void PPC64PltCallStub::addSymbols(ThunkSection &IS) {
S->NeedsTocRestore = true;
}

void PPC64LongBranchThunk::writeTo(uint8_t *Buf) {
int64_t Offset = Destination.getPPC64LongBranchTableVA() - getPPC64TocBase();
writePPCLoadAndBranch(Buf, Offset);
}

void PPC64LongBranchThunk::addSymbols(ThunkSection &IS) {
addSymbol(Saver.save("__long_branch_" + Destination.getName()), STT_FUNC, 0,
IS);
}

Thunk::Thunk(Symbol &D) : Destination(D), Offset(0) {}

Thunk::~Thunk() = default;
Expand Down Expand Up @@ -675,9 +729,14 @@ static Thunk *addThunkMips(RelType Type, Symbol &S) {
}

static Thunk *addThunkPPC64(RelType Type, Symbol &S) {
if (Type == R_PPC64_REL24)
assert(Type == R_PPC64_REL24 && "unexpected relocation type for thunk");
if (S.isInPlt())
return make<PPC64PltCallStub>(S);
fatal("unexpected relocation type");

if (Config->Pic)
return make<PPC64PILongBranchThunk>(S);

return make<PPC64PDLongBranchThunk>(S);
}

Thunk *addThunk(RelType Type, Symbol &S) {
Expand Down
6 changes: 6 additions & 0 deletions lld/ELF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,11 @@ template <class ELFT> static void createSyntheticSections() {
Add(In.Got);
}

if (Config->EMachine == EM_PPC64) {
In.PPC64LongBranchTarget = make<PPC64LongBranchTargetSection>();
Add(In.PPC64LongBranchTarget);
}

In.GotPlt = make<GotPltSection>();
Add(In.GotPlt);
In.IgotPlt = make<IgotPltSection>();
Expand Down Expand Up @@ -1756,6 +1761,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {

// maybeAddThunks may have added local symbols to the static symbol table.
finalizeSynthetic(In.SymTab);
finalizeSynthetic(In.PPC64LongBranchTarget);

// Fill other section headers. The dynamic table is finalized
// at the end because some tags like RELSZ depend on result
Expand Down
Loading

0 comments on commit 614dc11

Please sign in to comment.