Skip to content
Permalink
Browse files

[ELF][PPC64] Support long branch thunks with addends

Fixes PPC64 part of PR40438

  // clang -target ppc64le -c a.cc
  // .text.unlikely may be placed in a separate output section (via -z keep-text-section-prefix)
  // The distance between bar in .text.unlikely and foo in .text may be larger than 32MiB.
  static void foo() {}
  __attribute__((section(".text.unlikely"))) static int bar() { foo(); return 0; }
  __attribute__((used)) static int dummy = bar();

This patch makes such thunks with addends work for PPC64.

AArch64: .text -> `__AArch64ADRPThunk_ (adrp x16, ...; add x16, x16, ...; br x16)` -> target
PPC64: .text -> `__long_branch_ (addis 12, 2, ...; ld 12, ...(12); mtctr 12; bctr)` -> target

AArch64 can leverage ADRP to jump to the target directly, but PPC64
needs to load an address from .branch_lt . Before Power ISA v3.0, the
PC-relative ADDPCIS was not available. .branch_lt was invented to work
around the limitation.

Symbol::ppc64BranchltIndex is replaced by
PPC64LongBranchTargetSection::entry_index which take addends into
consideration.

The tests are rewritten: ppc64-long-branch.s tests -no-pie and
ppc64-long-branch-pi.s tests -pie and -shared.

Reviewed By: sfertile

Differential Revision: https://reviews.llvm.org/D70937
  • Loading branch information
MaskRay committed Dec 2, 2019
1 parent e503fee commit c8f0d3e130d336f49c204b9ee317bf99be192a82
@@ -899,7 +899,7 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
}

bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const {
uint64_t branchAddr, const Symbol &s, int64_t a) const {
if (type != R_PPC64_REL14 && type != R_PPC64_REL24)
return false;

@@ -916,7 +916,7 @@ bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
// a range-extending thunk.
// See the comment in getRelocTargetVA() about R_PPC64_CALL.
return !inBranchRange(type, branchAddr,
s.getVA() +
s.getVA(a) +
getPPC64GlobalEntryToLocalEntryOffset(s.stOther));
}

@@ -1818,7 +1818,7 @@ bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) {
return true;
rel.sym = &t->destination;
// TODO Restore addend on all targets.
if (config->emachine == EM_AARCH64)
if (config->emachine == EM_AARCH64 || config->emachine == EM_PPC64)
rel.addend = t->addend;
if (rel.sym->isInPlt())
rel.expr = toPlt(rel.expr);
@@ -1897,12 +1897,14 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) {
rel.sym = t->getThunkTargetSym();
rel.expr = fromPlt(rel.expr);

// On AArch64, a jump/call relocation may be encoded as STT_SECTION
// + non-zero addend, clear the addend after redirection.
// On AArch64 and PPC64, a jump/call relocation may be encoded as
// STT_SECTION + non-zero addend, clear the addend after
// redirection.
//
// The addend of R_PPC_PLTREL24 should be ignored after changing to
// R_PC.
if (config->emachine == EM_AARCH64 ||
config->emachine == EM_PPC64 ||
(config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24))
rel.addend = 0;
}
@@ -162,11 +162,6 @@ uint64_t Symbol::getGotPltOffset() const {
return (pltIndex + target->gotPltHeaderEntriesNum) * config->wordsize;
}

uint64_t Symbol::getPPC64LongBranchOffset() const {
assert(ppc64BranchltIndex != 0xffff);
return ppc64BranchltIndex * config->wordsize;
}

uint64_t Symbol::getPltVA() const {
PltSection *plt = isInIplt ? in.iplt : in.plt;
uint64_t outVA =
@@ -179,12 +174,6 @@ uint64_t Symbol::getPltVA() const {
return outVA;
}

uint64_t Symbol::getPPC64LongBranchTableVA() const {
assert(ppc64BranchltIndex != 0xffff);
return in.ppc64LongBranchTarget->getVA() +
ppc64BranchltIndex * config->wordsize;
}

uint64_t Symbol::getSize() const {
if (const auto *dr = dyn_cast<Defined>(this))
return dr->size;
@@ -87,9 +87,6 @@ class Symbol {
// Version definition index.
uint16_t versionId;

// An index into the .branch_lt section on PPC64.
uint16_t ppc64BranchltIndex = -1;

// Symbol binding. This is not overwritten by replace() to track
// changes during resolution. In particular:
// - An undefined weak is still weak when it resolves to a shared library.
@@ -181,7 +178,6 @@ class Symbol {

bool isInGot() const { return gotIndex != -1U; }
bool isInPlt() const { return pltIndex != -1U; }
bool isInPPC64Branchlt() const { return ppc64BranchltIndex != 0xffff; }

uint64_t getVA(int64_t addend = 0) const;

@@ -190,8 +186,6 @@ class Symbol {
uint64_t getGotPltOffset() const;
uint64_t getGotPltVA() const;
uint64_t getPltVA() const;
uint64_t getPPC64LongBranchTableVA() const;
uint64_t getPPC64LongBranchOffset() const;
uint64_t getSize() const;
OutputSection *getOutputSection() const;

@@ -3426,10 +3426,19 @@ PPC64LongBranchTargetSection::PPC64LongBranchTargetSection()
config->isPic ? SHT_NOBITS : SHT_PROGBITS, 8,
".branch_lt") {}

void PPC64LongBranchTargetSection::addEntry(Symbol &sym) {
assert(sym.ppc64BranchltIndex == 0xffff);
sym.ppc64BranchltIndex = entries.size();
entries.push_back(&sym);
uint64_t PPC64LongBranchTargetSection::getEntryVA(const Symbol *sym,
int64_t addend) {
return getVA() + entry_index.find({sym, addend})->second * 8;
}

Optional<uint32_t> PPC64LongBranchTargetSection::addEntry(const Symbol *sym,
int64_t addend) {
auto res =
entry_index.try_emplace(std::make_pair(sym, addend), entries.size());
if (!res.second)
return None;
entries.emplace_back(sym, addend);
return res.first->second;
}

size_t PPC64LongBranchTargetSection::getSize() const {
@@ -3443,12 +3452,14 @@ void PPC64LongBranchTargetSection::writeTo(uint8_t *buf) {
if (config->isPic)
return;

for (const Symbol *sym : entries) {
for (auto entry : entries) {
const Symbol *sym = entry.first;
int64_t addend = entry.second;
assert(sym->getVA());
// Need calls to branch to the local entry-point since a long-branch
// must be a local-call.
write64(buf,
sym->getVA() + getPPC64GlobalEntryToLocalEntryOffset(sym->stOther));
write64(buf, sym->getVA(addend) +
getPPC64GlobalEntryToLocalEntryOffset(sym->stOther));
buf += 8;
}
}
@@ -1062,14 +1062,16 @@ class PPC32Got2Section final : public SyntheticSection {
class PPC64LongBranchTargetSection final : public SyntheticSection {
public:
PPC64LongBranchTargetSection();
void addEntry(Symbol &sym);
uint64_t getEntryVA(const Symbol *sym, int64_t addend);
llvm::Optional<uint32_t> addEntry(const Symbol *sym, int64_t addend);
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
bool isNeeded() const override;
void finalizeContents() override { finalized = true; }

private:
std::vector<const Symbol *> entries;
std::vector<std::pair<const Symbol *, int64_t>> entries;
llvm::DenseMap<std::pair<const Symbol *, int64_t>, uint32_t> entry_index;
bool finalized = false;
};

@@ -288,29 +288,29 @@ class PPC64LongBranchThunk : public Thunk {
void addSymbols(ThunkSection &isec) override;

protected:
PPC64LongBranchThunk(Symbol &dest) : Thunk(dest, 0) {}
PPC64LongBranchThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
};

class PPC64PILongBranchThunk final : public PPC64LongBranchThunk {
public:
PPC64PILongBranchThunk(Symbol &dest) : PPC64LongBranchThunk(dest) {
PPC64PILongBranchThunk(Symbol &dest, int64_t addend)
: PPC64LongBranchThunk(dest, addend) {
assert(!dest.isPreemptible);
if (dest.isInPPC64Branchlt())
return;

in.ppc64LongBranchTarget->addEntry(dest);
mainPart->relaDyn->addReloc(
{target->relativeRel, in.ppc64LongBranchTarget,
dest.getPPC64LongBranchOffset(), true, &dest,
getPPC64GlobalEntryToLocalEntryOffset(dest.stOther)});
if (Optional<uint32_t> index =
in.ppc64LongBranchTarget->addEntry(&dest, addend)) {
mainPart->relaDyn->addReloc(
{target->relativeRel, in.ppc64LongBranchTarget, *index * UINT64_C(8),
true, &dest,
addend + getPPC64GlobalEntryToLocalEntryOffset(dest.stOther)});
}
}
};

class PPC64PDLongBranchThunk final : public PPC64LongBranchThunk {
public:
PPC64PDLongBranchThunk(Symbol &dest) : PPC64LongBranchThunk(dest) {
if (!dest.isInPPC64Branchlt())
in.ppc64LongBranchTarget->addEntry(dest);
PPC64PDLongBranchThunk(Symbol &dest, int64_t addend)
: PPC64LongBranchThunk(dest, addend) {
in.ppc64LongBranchTarget->addEntry(&dest, addend);
}
};

@@ -785,7 +785,8 @@ void PPC64PltCallStub::addSymbols(ThunkSection &isec) {
}

void PPC64LongBranchThunk::writeTo(uint8_t *buf) {
int64_t offset = destination.getPPC64LongBranchTableVA() - getPPC64TocBase();
int64_t offset = in.ppc64LongBranchTarget->getEntryVA(&destination, addend) -
getPPC64TocBase();
writePPCLoadAndBranch(buf, offset);
}

@@ -901,15 +902,15 @@ static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel,
return make<PPC32PltCallStub>(isec, rel, s);
}

static Thunk *addThunkPPC64(RelType type, Symbol &s) {
static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
assert(type == R_PPC64_REL24 && "unexpected relocation type for thunk");
if (s.isInPlt())
return make<PPC64PltCallStub>(s);

if (config->picThunk)
return make<PPC64PILongBranchThunk>(s);
return make<PPC64PILongBranchThunk>(s, a);

return make<PPC64PDLongBranchThunk>(s);
return make<PPC64PDLongBranchThunk>(s, a);
}

Thunk *addThunk(const InputSection &isec, Relocation &rel) {
@@ -929,7 +930,7 @@ Thunk *addThunk(const InputSection &isec, Relocation &rel) {
return addThunkPPC32(isec, rel, s);

if (config->emachine == EM_PPC64)
return addThunkPPC64(rel.type, s);
return addThunkPPC64(rel.type, s, a);

llvm_unreachable("add Thunk only supported for ARM, Mips and PowerPC");
}
@@ -0,0 +1,89 @@
# REQUIRES: ppc
# RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t.o
# RUN: echo 'SECTIONS { \
# RUN: .text_low 0x2000: { *(.text_low) } \
# RUN: .text_high 0x2002000 : { *(.text_high) } \
# RUN: }' > %t.script
# RUN: ld.lld -pie -T %t.script %t.o -o %t
# RUN: llvm-readelf -S %t | FileCheck --check-prefix=SEC-PIE %s
# RUN: llvm-readobj -r %t | FileCheck --check-prefix=RELOC %s
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s

# RUN: ld.lld -shared -T %t.script %t.o -o %t.so
# RUN: llvm-readelf -S %t.so | FileCheck --check-prefix=SEC-SHARED %s
# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck %s

# SEC-PIE: Name Type Address Off Size ES Flg Lk Inf Al
# SEC-PIE: .got PROGBITS 00000000020020e0 20120e0 000008 00 WA 0 0 8
# SEC-PIE: .branch_lt NOBITS 00000000020020f0 20120f0 000020 00 WA 0 0 8

# SEC-SHARED: Name Type Address Off Size ES Flg Lk Inf Al
# SEC-SHARED: .got PROGBITS 00000000020020d0 20120d0 000008 00 WA 0 0 8
# SEC-SHARED: .branch_lt NOBITS 00000000020020e0 20120e0 000020 00 WA 0 0 8

# RELOC: .rela.dyn {
# RELOC-NEXT: 0x20020E8 R_PPC64_RELATIVE - 0x8000
# RELOC-NEXT: 0x20020F0 R_PPC64_RELATIVE - 0x2002000
# RELOC-NEXT: 0x20020F8 R_PPC64_RELATIVE - 0x2002008
# RELOC-NEXT: 0x2002100 R_PPC64_RELATIVE - 0x200200C
# RELOC-NEXT: 0x2002108 R_PPC64_RELATIVE - 0x2000
# RELOC-NEXT: }

# CHECK: _start:
# CHECK-NEXT: 2000: bl .+16
# CHECK-NEXT: bl .+33554428
# CHECK-NEXT: bl .+24
# CHECK-NEXT: bl .+36

## &.branch_lt[0] - .TOC. = .branch_lt - (.got+0x8000) = -32752
# CHECK: __long_branch_:
# CHECK-NEXT: 2010: addis 12, 2, 0
# CHECK-NEXT: ld 12, -32752(12)
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr

## &.branch_lt[1] - .TOC. = .branch_lt - (.got+0x8000) = -32744
# CHECK: __long_branch_:
# CHECK-NEXT: 2020: addis 12, 2, 0
# CHECK-NEXT: ld 12, -32744(12)
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr

## &.branch_lt[2] - .TOC. = .branch_lt - (.got+0x8000) = -32736
# CHECK: __long_branch_:
# CHECK-NEXT: 2030: addis 12, 2, 0
# CHECK-NEXT: ld 12, -32736(12)
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr

.section .text_low, "ax", %progbits
.globl _start
_start:
bl .text_high # Need a thunk
bl .text_high
bl .text_high+8 # Need a thunk
bl .text_high+0xc # Need a thunk

# CHECK: high_target:
# CHECK-NEXT: 2002000: bl .-33554428
# CHECK-NEXT: bl .-33554432
# CHECK-NEXT: bl .+8

## &.branch_lt[3] - .TOC. = .branch_lt - (.got+0x8000) = -32728
# CHECK: __long_branch_:
# CHECK-NEXT: 2002010: addis 12, 2, 0
# CHECK-NEXT: ld 12, -32728(12)
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr

.section .text_high, "ax", %progbits
high_target:
bl .text_low+4
bl .text_low+4
bl .text_low # Need a thunk
blr

## Force creation of .got
## The R_PPC64_RELATIVE makes sure .rela.dyn survives removeUnusedSyntheticSections.
.section .data
.quad .TOC.@tocbase

0 comments on commit c8f0d3e

Please sign in to comment.
You can’t perform that action at this time.