Skip to content

Commit

Permalink
[PPC32] Improve the 32-bit PowerPC port
Browse files Browse the repository at this point in the history
Many -static/-no-pie/-shared/-pie applications linked against glibc or musl
should work with this patch. This also helps FreeBSD PowerPC64 to migrate
their lib32 (PR40888).

* Fix default image base and max page size.
* Support new-style Secure PLT (see below). Old-style BSS PLT is not
  implemented, so it is not suitable for FreeBSD rtld now because it doesn't
  support Secure PLT yet.
* Support more initial relocation types:
  R_PPC_ADDR32, R_PPC_REL16*, R_PPC_LOCAL24PC, R_PPC_PLTREL24, and R_PPC_GOT16.
  The addend of R_PPC_PLTREL24 is special: it decides the call stub PLT type
  but it should be ignored for the computation of target symbol VA.
* Support GNU ifunc
* Support .glink used for lazy PLT resolution in glibc
* Add a new thunk type: PPC32PltCallStub that is similar to PPC64PltCallStub.
  It is used by R_PPC_REL24 and R_PPC_PLTREL24.

A PLT stub used in -fPIE/-fPIC usually loads an address relative to
.got2+0x8000 (-fpie/-fpic code uses _GLOBAL_OFFSET_TABLE_ relative
addresses).
Two .got2 sections in two object files have different addresses, thus a PLT stub
can't be shared by two object files. To handle this incompatibility,
change the parameters of Thunk::isCompatibleWith to
`const InputSection &, const Relocation &`.

PowerPC psABI specified an old-style .plt (BSS PLT) that is both
writable and executable. Linkers don't make separate RW- and RWE segments,
which causes all initially writable memory (think .data) executable.
This is a big security concern so a new PLT scheme (secure PLT) was developed to
address the security issue.

TLS will be implemented in D62940.

glibc older than ~2012 requires .rela.dyn to include .rela.plt, it can
not handle the DT_RELA+DT_RELASZ == DT_JMPREL case correctly. A hack
(not included in this patch) in LinkerScript.cpp addOrphanSections() to
work around the issue:

    if (Config->EMachine == EM_PPC) {
      // Older glibc assumes .rela.dyn includes .rela.plt
      Add(In.RelaDyn);
      if (In.RelaPlt->isLive() && !In.RelaPlt->Parent)
        In.RelaDyn->getParent()->addSection(In.RelaPlt);
    }

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D62464

llvm-svn: 362721
  • Loading branch information
MaskRay committed Jun 6, 2019
1 parent f1b8c6a commit 82442ad
Show file tree
Hide file tree
Showing 26 changed files with 934 additions and 329 deletions.
181 changes: 170 additions & 11 deletions lld/ELF/Arch/PPC.cpp
Expand Up @@ -6,7 +6,9 @@
//
//===----------------------------------------------------------------------===//

#include "OutputSections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/Support/Endian.h"
Expand All @@ -21,53 +23,210 @@ namespace {
class PPC final : public TargetInfo {
public:
PPC();
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void writeGotHeader(uint8_t *Buf) const override;
void writePltHeader(uint8_t *Buf) const override {
llvm_unreachable("should call writePPC32GlinkSection() instead");
}
void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
int32_t Index, unsigned RelOff) const override {
llvm_unreachable("should call writePPC32GlinkSection() instead");
}
void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
bool needsThunk(RelExpr Expr, RelType RelocType, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const override;
uint32_t getThunkSectionSpacing() const override;
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
RelExpr getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const override;
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
};
} // namespace

static uint16_t lo(uint32_t V) { return V; }
static uint16_t ha(uint32_t V) { return (V + 0x8000) >> 16; }

void elf::writePPC32GlinkSection(uint8_t *Buf, size_t NumEntries) {
// On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an
// absolute address from a specific .plt slot (usually called .got.plt on
// other targets) and jumps there.
//
// a) With immediate binding (BIND_NOW), the .plt entry is resolved at load
// time. The .glink section is not used.
// b) With lazy binding, the .plt entry points to a `b PLTresolve`
// instruction in .glink, filled in by PPC::writeGotPlt().

// Write N `b PLTresolve` first.
for (size_t I = 0; I != NumEntries; ++I)
write32(Buf + 4 * I, 0x48000000 | 4 * (NumEntries - I));
Buf += 4 * NumEntries;

// Then write PLTresolve(), which has two forms: PIC and non-PIC. PLTresolve()
// computes the PLT index (by computing the distance from the landing b to
// itself) and calls _dl_runtime_resolve() (in glibc).
uint32_t GOT = In.Got->getVA();
uint32_t Glink = In.Plt->getVA(); // VA of .glink
const uint8_t *End = Buf + 64;
if (Config->Pic) {
uint32_t AfterBcl = In.Plt->getSize() - Target->PltHeaderSize + 12;
uint32_t GotBcl = GOT + 4 - (Glink + AfterBcl);
write32(Buf + 0, 0x3d6b0000 | ha(AfterBcl)); // addis r11,r11,1f-glink@ha
write32(Buf + 4, 0x7c0802a6); // mflr r0
write32(Buf + 8, 0x429f0005); // bcl 20,30,.+4
write32(Buf + 12, 0x396b0000 | lo(AfterBcl)); // 1: addi r11,r11,1b-.glink@l
write32(Buf + 16, 0x7d8802a6); // mflr r12
write32(Buf + 20, 0x7c0803a6); // mtlr r0
write32(Buf + 24, 0x7d6c5850); // sub r11,r11,r12
write32(Buf + 28, 0x3d8c0000 | ha(GotBcl)); // addis 12,12,GOT+4-1b@ha
if (ha(GotBcl) == ha(GotBcl + 4)) {
write32(Buf + 32, 0x800c0000 | lo(GotBcl)); // lwz r0,r12,GOT+4-1b@l(r12)
write32(Buf + 36,
0x818c0000 | lo(GotBcl + 4)); // lwz r12,r12,GOT+8-1b@l(r12)
} else {
write32(Buf + 32, 0x840c0000 | lo(GotBcl)); // lwzu r0,r12,GOT+4-1b@l(r12)
write32(Buf + 36, 0x818c0000 | 4); // lwz r12,r12,4(r12)
}
write32(Buf + 40, 0x7c0903a6); // mtctr 0
write32(Buf + 44, 0x7c0b5a14); // add r0,11,11
write32(Buf + 48, 0x7d605a14); // add r11,0,11
write32(Buf + 52, 0x4e800420); // bctr
Buf += 56;
} else {
write32(Buf + 0, 0x3d800000 | ha(GOT + 4)); // lis r12,GOT+4@ha
write32(Buf + 4, 0x3d6b0000 | ha(-Glink)); // addis r11,r11,-Glink@ha
if (ha(GOT + 4) == ha(GOT + 8))
write32(Buf + 8, 0x800c0000 | lo(GOT + 4)); // lwz r0,GOT+4@l(r12)
else
write32(Buf + 8, 0x840c0000 | lo(GOT + 4)); // lwzu r0,GOT+4@l(r12)
write32(Buf + 12, 0x396b0000 | lo(-Glink)); // addi r11,r11,-Glink@l
write32(Buf + 16, 0x7c0903a6); // mtctr r0
write32(Buf + 20, 0x7c0b5a14); // add r0,r11,r11
if (ha(GOT + 4) == ha(GOT + 8))
write32(Buf + 24, 0x818c0000 | lo(GOT + 8)); // lwz r12,GOT+8@ha(r12)
else
write32(Buf + 24, 0x818c0000 | 4); // lwz r12,4(r12)
write32(Buf + 28, 0x7d605a14); // add r11,r0,r11
write32(Buf + 32, 0x4e800420); // bctr
Buf += 36;
}

// Pad with nop. They should not be executed.
for (; Buf < End; Buf += 4)
write32(Buf, 0x60000000);
}

PPC::PPC() {
GotRel = R_PPC_GLOB_DAT;
NoneRel = R_PPC_NONE;
PltRel = R_PPC_JMP_SLOT;
RelativeRel = R_PPC_RELATIVE;
IRelativeRel = R_PPC_IRELATIVE;
GotBaseSymInGotPlt = false;
GotHeaderEntriesNum = 3;
GotPltHeaderEntriesNum = 0;
PltHeaderSize = 64; // size of PLTresolve in .glink
PltEntrySize = 4;

NeedsThunks = true;

DefaultMaxPageSize = 65536;
DefaultImageBase = 0x10000000;

write32(TrapInstr.data(), 0x7fe00008);
}

void PPC::writeGotHeader(uint8_t *Buf) const {
// _GLOBAL_OFFSET_TABLE_[0] = _DYNAMIC
// glibc stores _dl_runtime_resolve in _GLOBAL_OFFSET_TABLE_[1],
// link_map in _GLOBAL_OFFSET_TABLE_[2].
write32(Buf, In.Dynamic->getVA());
}

void PPC::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
// Address of the symbol resolver stub in .glink .
write32(Buf, In.Plt->getVA() + 4 * S.PltIndex);
}

bool PPC::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const {
if (Type != R_PPC_REL24 && Type != R_PPC_PLTREL24)
return false;
if (S.isInPlt())
return true;
if (S.isUndefWeak())
return false;
return !(Expr == R_PC && PPC::inBranchRange(Type, BranchAddr, S.getVA()));
}

uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; }

bool PPC::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
uint64_t Offset = Dst - Src;
if (Type == R_PPC_REL24 || R_PPC_PLTREL24)
return isInt<26>(Offset);
llvm_unreachable("unsupported relocation type used in branch");
}

RelExpr PPC::getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const {
switch (Type) {
case R_PPC_REL14:
case R_PPC_REL24:
case R_PPC_REL32:
case R_PPC_LOCAL24PC:
case R_PPC_REL16_LO:
case R_PPC_REL16_HI:
case R_PPC_REL16_HA:
return R_PC;
case R_PPC_PLTREL24:
case R_PPC_GOT16:
return R_GOT_OFF;
case R_PPC_REL24:
return R_PLT_PC;
case R_PPC_PLTREL24:
return R_PPC32_PLTREL;
default:
return R_ABS;
}
}

void PPC::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
switch (Type) {
case R_PPC_ADDR16:
case R_PPC_GOT16:
checkInt(Loc, Val, 16, Type);
write16(Loc, Val);
break;
case R_PPC_ADDR16_HA:
write16be(Loc, (Val + 0x8000) >> 16);
case R_PPC_REL16_HA:
write16(Loc, ha(Val));
break;
case R_PPC_ADDR16_HI:
write16be(Loc, Val >> 16);
case R_PPC_REL16_HI:
write16(Loc, Val >> 16);
break;
case R_PPC_ADDR16_LO:
write16be(Loc, Val);
case R_PPC_REL16_LO:
write16(Loc, Val);
break;
case R_PPC_ADDR32:
case R_PPC_GLOB_DAT:
case R_PPC_REL32:
write32be(Loc, Val);
write32(Loc, Val);
break;
case R_PPC_REL14:
write32be(Loc, read32be(Loc) | (Val & 0xFFFC));
case R_PPC_REL14: {
uint32_t Mask = 0x0000FFFC;
checkInt(Loc, Val, 16, Type);
checkAlignment(Loc, Val, 4, Type);
write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
break;
case R_PPC_PLTREL24:
}
case R_PPC_REL24:
write32be(Loc, read32be(Loc) | (Val & 0x3FFFFFC));
case R_PPC_LOCAL24PC:
case R_PPC_PLTREL24: {
uint32_t Mask = 0x03FFFFFC;
checkInt(Loc, Val, 26, Type);
checkAlignment(Loc, Val, 4, Type);
write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
break;
}
default:
error(getErrorLocation(Loc) + "unrecognized relocation " + toString(Type));
}
Expand Down
4 changes: 4 additions & 0 deletions lld/ELF/InputFiles.h
Expand Up @@ -117,6 +117,10 @@ class InputFile {
// True if this is an argument for --just-symbols. Usually false.
bool JustSymbols = false;

// OutSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE
// to compute offsets in PLT call stubs.
uint32_t PPC32Got2OutSecOff = 0;

// On PPC64 we need to keep track of which files contain small code model
// relocations that access the .toc section. To minimize the chance of a
// relocation overflow, files that do contain said relocations should have
Expand Down
7 changes: 7 additions & 0 deletions lld/ELF/InputSection.cpp
Expand Up @@ -718,6 +718,8 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
Dest = getARMUndefinedRelativeWeakVA(Type, A, P);
else if (Config->EMachine == EM_AARCH64)
Dest = getAArch64UndefinedRelativeWeakVA(Type, A, P);
else if (Config->EMachine == EM_PPC)
Dest = P;
else
Dest = Sym.getVA(A);
} else {
Expand All @@ -730,6 +732,11 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
case R_PLT_PC:
case R_PPC64_CALL_PLT:
return Sym.getPltVA() + A - P;
case R_PPC32_PLTREL:
// R_PPC_PLTREL24 uses the addend (usually 0 or 0x8000) to indicate r30
// stores _GLOBAL_OFFSET_TABLE_ or .got2+0x8000. The addend is ignored for
// target VA compuation.
return Sym.getPltVA() - P;
case R_PPC64_CALL: {
uint64_t SymVA = Sym.getVA(A);
// If we have an undefined weak symbol, we might get here with a symbol
Expand Down
1 change: 1 addition & 0 deletions lld/ELF/Options.td
Expand Up @@ -537,6 +537,7 @@ def: F<"no-warn-mismatch">;
def: Flag<["-"], "p">;
def: Separate<["--", "-"], "rpath-link">;
def: J<"rpath-link=">;
def: F<"secure-plt">;
def: F<"sort-common">;
def: F<"stats">;
def: F<"warn-execstack">;
Expand Down
42 changes: 26 additions & 16 deletions lld/ELF/Relocations.cpp
Expand Up @@ -363,7 +363,7 @@ static bool isAbsoluteValue(const Symbol &Sym) {

// Returns true if Expr refers a PLT entry.
static bool needsPlt(RelExpr Expr) {
return oneof<R_PLT_PC, R_PPC64_CALL_PLT, R_PLT>(Expr);
return oneof<R_PLT_PC, R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PLT>(Expr);
}

// Returns true if Expr refers a GOT entry. Note that this function
Expand Down Expand Up @@ -399,8 +399,8 @@ static bool isStaticLinkTimeConstant(RelExpr E, RelType Type, const Symbol &Sym,
R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOTREL, R_MIPS_GOT_OFF,
R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_MIPS_TLSGD,
R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC64_CALL_PLT,
R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_TLSDESC_PC,
R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC32_PLTREL,
R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_TLSDESC_PC,
R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, R_TLSIE_HINT>(E))
return true;

Expand Down Expand Up @@ -469,6 +469,7 @@ static RelExpr fromPlt(RelExpr Expr) {
// reference to the symbol itself.
switch (Expr) {
case R_PLT_PC:
case R_PPC32_PLTREL:
return R_PC;
case R_PPC64_CALL_PLT:
return R_PPC64_CALL;
Expand Down Expand Up @@ -1105,6 +1106,9 @@ static void scanReloc(InputSectionBase &Sec, OffsetGetter &GetOffset, RelTy *&I,
getLocation(Sec, Sym, Offset));
}

// Read an addend.
int64_t Addend = computeAddend<ELFT>(Rel, End, Sec, Expr, Sym.isLocal());

// Relax relocations.
//
// If we know that a PLT entry will be resolved within the same ELF module, we
Expand All @@ -1114,10 +1118,15 @@ static void scanReloc(InputSectionBase &Sec, OffsetGetter &GetOffset, RelTy *&I,
// runtime, because the main exectuable is always at the beginning of a search
// list. We can leverage that fact.
if (!Sym.IsPreemptible && (!Sym.isGnuIFunc() || Config->ZIfuncNoplt)) {
if (Expr == R_GOT_PC && !isAbsoluteValue(Sym))
if (Expr == R_GOT_PC && !isAbsoluteValue(Sym)) {
Expr = Target->adjustRelaxExpr(Type, RelocatedAddr, Expr);
else
} else {
// Addend of R_PPC_PLTREL24 is used to choose call stub type. It should be
// ignored if optimized to R_PC.
if (Config->EMachine == EM_PPC && Expr == R_PPC32_PLTREL)
Addend = 0;
Expr = fromPlt(Expr);
}
}

// If the relocation does not emit a GOT or GOTPLT entry but its computation
Expand All @@ -1131,9 +1140,6 @@ static void scanReloc(InputSectionBase &Sec, OffsetGetter &GetOffset, RelTy *&I,
In.Got->HasGotOffRel = true;
}

// Read an addend.
int64_t Addend = computeAddend<ELFT>(Rel, End, Sec, Expr, Sym.isLocal());

// Process some TLS relocations, including relaxing TLS relocations.
// Note that this function does not handle all TLS relocations.
if (unsigned Processed =
Expand Down Expand Up @@ -1618,27 +1624,27 @@ static bool isThunkSectionCompatible(InputSection *Source,
return true;
}

std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *IS, Symbol &Sym,
RelType Type, uint64_t Src) {
std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *IS,
Relocation &Rel, uint64_t Src) {
std::vector<Thunk *> *ThunkVec = nullptr;

// We use (section, offset) pair to find the thunk position if possible so
// that we create only one thunk for aliased symbols or ICFed sections.
if (auto *D = dyn_cast<Defined>(&Sym))
if (auto *D = dyn_cast<Defined>(Rel.Sym))
if (!D->isInPlt() && D->Section)
ThunkVec = &ThunkedSymbolsBySection[{D->Section->Repl, D->Value}];
if (!ThunkVec)
ThunkVec = &ThunkedSymbols[&Sym];
ThunkVec = &ThunkedSymbols[Rel.Sym];

// Check existing Thunks for Sym to see if they can be reused
for (Thunk *T : *ThunkVec)
if (isThunkSectionCompatible(IS, T->getThunkTargetSym()->Section) &&
T->isCompatibleWith(Type) &&
Target->inBranchRange(Type, Src, T->getThunkTargetSym()->getVA()))
T->isCompatibleWith(*IS, Rel) &&
Target->inBranchRange(Rel.Type, Src, T->getThunkTargetSym()->getVA()))
return std::make_pair(T, false);

// No existing compatible Thunk in range, create a new one
Thunk *T = addThunk(Type, Sym);
Thunk *T = addThunk(*IS, Rel);
ThunkVec->push_back(T);
return std::make_pair(T, true);
}
Expand Down Expand Up @@ -1717,7 +1723,7 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> OutputSections) {

Thunk *T;
bool IsNew;
std::tie(T, IsNew) = getThunk(IS, *Rel.Sym, Rel.Type, Src);
std::tie(T, IsNew) = getThunk(IS, Rel, Src);

if (IsNew) {
// Find or create a ThunkSection for the new Thunk
Expand All @@ -1733,6 +1739,10 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> OutputSections) {
// Redirect relocation to Thunk, we never go via the PLT to a Thunk
Rel.Sym = T->getThunkTargetSym();
Rel.Expr = fromPlt(Rel.Expr);

// Addend of R_PPC_PLTREL24 should be ignored after changing to R_PC.
if (Config->EMachine == EM_PPC && Rel.Type == R_PPC_PLTREL24)
Rel.Addend = 0;
}

for (auto &P : ISD->ThunkSections)
Expand Down

0 comments on commit 82442ad

Please sign in to comment.