Skip to content

Commit

Permalink
[PowerPC] Implement R_PPC64_REL24_NOTOC local calls, callee requires …
Browse files Browse the repository at this point in the history
…a TOC

The PC Relative code now allows for calls that are marked with the relocation
R_PPC64_REL24_NOTOC. This indicates that the caller does not have a valid TOC
pointer in R2 and does not require R2 to be restored after the call.

This patch is added to support local calls to callees that require a TOC

Reviewed By: sfertile, MaskRay, nemanjai, stefanp

Differential Revision: https://reviews.llvm.org/D83504
  • Loading branch information
VictorHuangIBM committed Jul 20, 2020
1 parent 3a108ab commit 91cce1a
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 15 deletions.
22 changes: 8 additions & 14 deletions lld/ELF/Arch/PPC64.cpp
Expand Up @@ -106,6 +106,11 @@ bool elf::isPPC64SmallCodeModelTocReloc(RelType type) {
return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS;
}

void elf::writePrefixedInstruction(uint8_t *loc, uint64_t insn) {
insn = config->isLE ? insn << 32 | insn >> 32 : insn;
write64(loc, insn);
}

static bool addOptional(StringRef name, uint64_t value,
std::vector<Defined *> &defined) {
Symbol *sym = symtab->find(name);
Expand Down Expand Up @@ -376,15 +381,6 @@ static uint32_t readFromHalf16(const uint8_t *loc) {
return read32(config->isLE ? loc : loc - 2);
}

// The prefixed instruction is always a 4 byte prefix followed by a 4 byte
// instruction. Therefore, the prefix is always in lower memory than the
// instruction (regardless of endianness).
// As a result, we need to shift the pieces around on little endian machines.
static void writePrefixedInstruction(uint8_t *loc, uint64_t insn) {
insn = config->isLE ? insn << 32 | insn >> 32 : insn;
write64(loc, insn);
}

static uint64_t readPrefixedInstruction(const uint8_t *loc) {
uint64_t fullInstr = read64(loc);
return config->isLE ? (fullInstr << 32 | fullInstr >> 32) : fullInstr;
Expand Down Expand Up @@ -1048,17 +1044,15 @@ bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
if (s.isInPlt())
return true;

// FIXME: Remove the fatal error once the call protocol is implemented.
if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)
fatal("unimplemented feature: local function call with the reltype"
" R_PPC64_REL24_NOTOC and the callee needs toc-pointer setup");

// This check looks at the st_other bits of the callee with relocation
// R_PPC64_REL14 or R_PPC64_REL24. If the value is 1, then the callee
// clobbers the TOC and we need an R2 save stub.
if (type != R_PPC64_REL24_NOTOC && (s.stOther >> 5) == 1)
return true;

if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)
return true;

// If a symbol is a weak undefined and we are compiling an executable
// it doesn't need a range-extending thunk since it can't be called.
if (s.isUndefWeak() && !config->shared)
Expand Down
5 changes: 5 additions & 0 deletions lld/ELF/Target.h
Expand Up @@ -213,6 +213,11 @@ unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther);
// the .toc section.
bool isPPC64SmallCodeModelTocReloc(RelType type);

// Write a prefixed instruction, which is a 4-byte prefix followed by a 4-byte
// instruction (regardless of endianness). Therefore, the prefix is always in
// lower memory than the instruction.
void writePrefixedInstruction(uint8_t *loc, uint64_t insn);

void addPPC64SaveRestore();
uint64_t getPPC64TocBase();
uint64_t getAArch64Page(uint64_t expr);
Expand Down
35 changes: 34 additions & 1 deletion lld/ELF/Thunks.cpp
Expand Up @@ -293,6 +293,18 @@ class PPC64R2SaveStub final : public Thunk {
void addSymbols(ThunkSection &isec) override;
};

// PPC64 R12 Setup Stub
// When a caller that does not maintain a toc-pointer performs a local call to
// a callee which requires a toc-pointer then we need this stub to place the
// callee's global entry point into r12 without a save of R2.
class PPC64R12SetupStub final : public Thunk {
public:
PPC64R12SetupStub(Symbol &dest) : Thunk(dest, 0) {}
uint32_t size() override { return 16; }
void writeTo(uint8_t *buf) override;
void addSymbols(ThunkSection &isec) override;
};

// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte
// alignment. This gives a possible 26 bits of 'reach'. If the call offset is
// larger then that we need to emit a long-branch thunk. The target address
Expand Down Expand Up @@ -851,6 +863,23 @@ void PPC64R2SaveStub::addSymbols(ThunkSection &isec) {
s->needsTocRestore = true;
}

void PPC64R12SetupStub::writeTo(uint8_t *buf) {
int64_t offset = destination.getVA() - getThunkTargetSym()->getVA();
if (!isInt<34>(offset))
fatal("offset must fit in 34 bits to encode in the instruction");
uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) |
(offset & 0xffff);

writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1
write32(buf + 8, MTCTR_R12); // mtctr r12
write32(buf + 12, BCTR); // bctr
}

void PPC64R12SetupStub::addSymbols(ThunkSection &isec) {
addSymbol(saver.save("__gep_setup_" + destination.getName()), STT_FUNC, 0,
isec);
}

void PPC64LongBranchThunk::writeTo(uint8_t *buf) {
int64_t offset = in.ppc64LongBranchTarget->getEntryVA(&destination, addend) -
getPPC64TocBase();
Expand Down Expand Up @@ -974,7 +1003,8 @@ static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel,
}

static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
assert((type == R_PPC64_REL14 || type == R_PPC64_REL24) &&
assert((type == R_PPC64_REL14 || type == R_PPC64_REL24 ||
type == R_PPC64_REL24_NOTOC) &&
"unexpected relocation type for thunk");
if (s.isInPlt())
return make<PPC64PltCallStub>(s);
Expand All @@ -984,6 +1014,9 @@ static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
if ((s.stOther >> 5) == 1)
return make<PPC64R2SaveStub>(s);

if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)
return make<PPC64R12SetupStub>(s);

if (config->picThunk)
return make<PPC64PILongBranchThunk>(s, a);

Expand Down
67 changes: 67 additions & 0 deletions lld/test/ELF/ppc64-pcrel-call-to-toc.s
@@ -0,0 +1,67 @@
# REQUIRES: ppc
# RUN: echo 'SECTIONS { \
# RUN: .text_func 0x10010000 : { *(.text_func) } \
# RUN: .text_callee 0x10020000 : { *(.text_callee) } \
# RUN: .text_caller 0x10030000 : { *(.text_caller) } \
# RUN: }' > %t.script

# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o
# RUN: ld.lld -T %t.script %t.o -o %t
# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s

# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o
# RUN: ld.lld -T %t.script %t.o -o %t
# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL
# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s

## When a function without TOC accesses a function using TOC, an r12 setup stub
## is inserted

# SYMBOL: 1: 0000000010020000 0 NOTYPE LOCAL DEFAULT [<other: 0x60>] 2 callee
# SYMBOL-NEXT: 2: 0000000010030000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 3 caller
# SYMBOL-NEXT: 3: 0000000010010000 0 NOTYPE LOCAL DEFAULT 1 func
# SYMBOL: 6: 000000001003000c 16 FUNC LOCAL DEFAULT 3 __gep_setup_callee

# CHECK-LABEL: <func>:
# CHECK-NEXT: blr

# CHECK-LABEL: <callee>:
# CHECK: bl 0x10010000
# CHECK-NEXT: addis 4, 2, -1
# CHECK-NEXT: lwz 4, 32744(4)
# CHECK-NEXT: blr

# CHECK-LABEL: <caller>:
# CHECK-NEXT: bl 0x1003000c
# CHECK-NEXT: blr

# CHECK-LABEL: <__gep_setup_callee>:
# CHECK-NEXT: paddi 12, 0, -65548, 1
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr

.section .text_func, "ax", %progbits
func:
blr

.section .text_callee, "ax", %progbits
callee:
.Lfunc_gep1:
addis 2, 12, .TOC.-.Lfunc_gep1@ha
addi 2, 2, .TOC.-.Lfunc_gep1@l
.Lfunc_lep1:
.localentry callee, .Lfunc_lep1-.Lfunc_gep1
bl func
addis 4, 2, global@toc@ha
lwz 4, global@toc@l(4)
blr

.section .text_caller, "ax", %progbits
caller:
.localentry caller, 1
bl callee@notoc
blr
global:
.long 0
.size global, 4
6 changes: 6 additions & 0 deletions llvm/include/llvm/Object/ELF.h
Expand Up @@ -48,6 +48,12 @@ static inline Error createError(const Twine &Err) {
return make_error<StringError>(Err, object_error::parse_failed);
}

enum PPCInstrMasks : uint64_t {
PADDI_R12_NO_DISP = 0x0610000039800000,
MTCTR_R12 = 0x7D8903A6,
BCTR = 0x4E800420,
};

template <class ELFT> class ELFFile;

template <class ELFT>
Expand Down

0 comments on commit 91cce1a

Please sign in to comment.