Skip to content

Commit

Permalink
[PPC64] toc-indirect to toc-relative relaxation
Browse files Browse the repository at this point in the history
This is based on D54720 by Sean Fertile.

When accessing a global symbol which is not defined in the translation unit,
compilers will generate instructions that load the address from the toc entry.

If the symbol is defined, non-preemptable, and addressable with a 32-bit
signed offset from the toc pointer, the address can be computed
directly. e.g.

    addis 3, 2, .LC0@toc@ha  # R_PPC64_TOC16_HA
    ld    3, .LC0@toc@l(3)   # R_PPC64_TOC16_LO_DS, load the address from a .toc entry
    ld/lwa 3, 0(3)           # load the value from the address

    .section .toc,"aw",@progbits
    .LC0: .tc var[TC],var

can be relaxed to

    addis 3,2,var@toc@ha     # this may be relaxed to a nop,
    addi  3,3,var@toc@l      # then this becomes addi 3,2,var@toc
    ld/lwa 3, 0(3)           # load the value from the address

We can delete the test ppc64-got-indirect.s as its purpose is covered by
newly added ppc64-toc-relax.s and ppc64-toc-relax-constants.s

Reviewed By: ruiu, sfertile

Differential Revision: https://reviews.llvm.org/D60958

llvm-svn: 360112
  • Loading branch information
MaskRay authored and MrSidims committed May 17, 2019
1 parent 46a69bd commit d654ecb
Show file tree
Hide file tree
Showing 15 changed files with 403 additions and 147 deletions.
109 changes: 107 additions & 2 deletions lld/ELF/Arch/PPC64.cpp
Expand Up @@ -103,6 +103,88 @@ bool elf::isPPC64SmallCodeModelTocReloc(RelType Type) {
return Type == R_PPC64_TOC16 || Type == R_PPC64_TOC16_DS;
}

// Find the R_PPC64_ADDR64 in .rela.toc with matching offset.
template <typename ELFT>
static std::pair<Defined *, int64_t>
getRelaTocSymAndAddend(InputSectionBase *TocSec, uint64_t Offset) {
if (TocSec->NumRelocations == 0)
return {};

// .rela.toc contains exclusively R_PPC64_ADDR64 relocations sorted by
// r_offset: 0, 8, 16, etc. For a given Offset, Offset / 8 gives us the
// relocation index in most cases.
//
// In rare cases a TOC entry may store a constant that doesn't need an
// R_PPC64_ADDR64, the corresponding r_offset is therefore missing. Offset / 8
// points to a relocation with larger r_offset. Do a linear probe then.
// Constants are extremely uncommon in .toc and the extra number of array
// accesses can be seen as a small constant.
ArrayRef<typename ELFT::Rela> Relas = TocSec->template relas<ELFT>();
uint64_t Index = std::min<uint64_t>(Offset / 8, Relas.size() - 1);
for (;;) {
if (Relas[Index].r_offset == Offset) {
Symbol &Sym = TocSec->getFile<ELFT>()->getRelocTargetSym(Relas[Index]);
return {dyn_cast<Defined>(&Sym), getAddend<ELFT>(Relas[Index])};
}
if (Relas[Index].r_offset < Offset || Index == 0)
break;
--Index;
}
return {};
}

// When accessing a symbol defined in another translation unit, compilers
// reserve a .toc entry, allocate a local label and generate toc-indirect
// instuctions:
//
// addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA
// ld 3, .LC0@toc@l(3) # R_PPC64_TOC16_LO_DS, load the address from a .toc entry
// ld/lwa 3, 0(3) # load the value from the address
//
// .section .toc,"aw",@progbits
// .LC0: .tc var[TC],var
//
// If var is defined, non-preemptable and addressable with a 32-bit signed
// offset from the toc base, the address of var can be computed by adding an
// offset to the toc base, saving a load.
//
// addis 3,2,var@toc@ha # this may be relaxed to a nop,
// addi 3,3,var@toc@l # then this becomes addi 3,2,var@toc
// ld/lwa 3, 0(3) # load the value from the address
//
// Returns true if the relaxation is performed.
bool elf::tryRelaxPPC64TocIndirection(RelType Type, const Relocation &Rel,
uint8_t *BufLoc) {
assert(Config->TocOptimize);
if (Rel.Addend < 0)
return false;

// If the symbol is not the .toc section, this isn't a toc-indirection.
Defined *DefSym = dyn_cast<Defined>(Rel.Sym);
if (!DefSym || !DefSym->isSection() || DefSym->Section->Name != ".toc")
return false;

Defined *D;
int64_t Addend;
auto *TocISB = cast<InputSectionBase>(DefSym->Section);
std::tie(D, Addend) =
Config->IsLE ? getRelaTocSymAndAddend<ELF64LE>(TocISB, Rel.Addend)
: getRelaTocSymAndAddend<ELF64BE>(TocISB, Rel.Addend);

// Only non-preemptable defined symbols can be relaxed.
if (!D || D->IsPreemptible)
return false;

// Two instructions can materialize a 32-bit signed offset from the toc base.
uint64_t TocRelative = D->getVA(Addend) - getPPC64TocBase();
if (!isInt<32>(TocRelative))
return false;

// Add PPC64TocOffset that will be subtracted by relocateOne().
Target->relaxGot(BufLoc, Type, TocRelative + PPC64TocOffset);
return true;
}

namespace {
class PPC64 final : public TargetInfo {
public:
Expand All @@ -121,6 +203,7 @@ class PPC64 final : public TargetInfo {
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const override;
void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
Expand Down Expand Up @@ -270,6 +353,27 @@ uint32_t PPC64::calcEFlags() const {
return 2;
}

void PPC64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const {
switch (Type) {
case R_PPC64_TOC16_HA:
// Convert "addis reg, 2, .LC0@toc@h" to "addis reg, 2, var@toc@h" or "nop".
relocateOne(Loc, Type, Val);
break;
case R_PPC64_TOC16_LO_DS: {
// Convert "ld reg, .LC0@toc@l(reg)" to "addi reg, reg, var@toc@l" or
// "addi reg, 2, var@toc".
uint32_t Instr = readInstrFromHalf16(Loc);
if (getPrimaryOpCode(Instr) != LD)
error("expected a 'ld' for got-indirect to toc-relative relaxing");
writeInstrFromHalf16(Loc, (Instr & 0x03FFFFFF) | 0x38000000);
relocateOne(Loc, R_PPC64_TOC16_LO, Val);
break;
}
default:
llvm_unreachable("unexpected relocation type");
}
}

void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
// Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement.
// The general dynamic code sequence for a global `x` will look like:
Expand Down Expand Up @@ -439,11 +543,12 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
return R_GOT_OFF;
case R_PPC64_TOC16:
case R_PPC64_TOC16_DS:
case R_PPC64_TOC16_HA:
case R_PPC64_TOC16_HI:
case R_PPC64_TOC16_LO:
case R_PPC64_TOC16_LO_DS:
return R_GOTREL;
case R_PPC64_TOC16_HA:
case R_PPC64_TOC16_LO_DS:
return Config->TocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL;
case R_PPC64_TOC:
return R_PPC_TOC;
case R_PPC64_REL14:
Expand Down
4 changes: 2 additions & 2 deletions lld/ELF/Arch/X86_64.cpp
Expand Up @@ -38,7 +38,7 @@ class X86_64 : public TargetInfo {

RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const override;
void relaxGot(uint8_t *Loc, uint64_t Val) const override;
void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
Expand Down Expand Up @@ -453,7 +453,7 @@ static void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
write32le(Loc, Val);
}

void X86_64::relaxGot(uint8_t *Loc, uint64_t Val) const {
void X86_64::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const {
const uint8_t Op = Loc[-2];
const uint8_t ModRm = Loc[-1];

Expand Down
7 changes: 6 additions & 1 deletion lld/ELF/InputSection.cpp
Expand Up @@ -631,6 +631,7 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
case R_GOTPLTONLY_PC:
return In.GotPlt->getVA() + A - P;
case R_GOTREL:
case R_PPC64_RELAX_TOC:
return Sym.getVA(A) - In.Got->getVA();
case R_GOTPLTREL:
return Sym.getVA(A) - In.GotPlt->getVA();
Expand Down Expand Up @@ -894,7 +895,11 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
switch (Expr) {
case R_RELAX_GOT_PC:
case R_RELAX_GOT_PC_NOPIC:
Target->relaxGot(BufLoc, TargetVA);
Target->relaxGot(BufLoc, Type, TargetVA);
break;
case R_PPC64_RELAX_TOC:
if (!tryRelaxPPC64TocIndirection(Type, Rel, BufLoc))
Target->relocateOne(BufLoc, Type, TargetVA);
break;
case R_RELAX_TLS_IE_TO_LE:
Target->relaxTlsIeToLe(BufLoc, Type, TargetVA);
Expand Down
12 changes: 7 additions & 5 deletions lld/ELF/Relocations.cpp
Expand Up @@ -383,7 +383,7 @@ static bool needsGot(RelExpr Expr) {
// file (PC, or GOT for example).
static bool isRelExpr(RelExpr Expr) {
return oneof<R_PC, R_GOTREL, R_GOTPLTREL, R_MIPS_GOTREL, R_PPC_CALL,
R_PPC_CALL_PLT, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC>(Expr);
R_PPC64_RELAX_TOC, R_PPC_CALL_PLT, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC>(Expr);
}

// Returns true if a given relocation can be computed at link-time.
Expand All @@ -403,7 +403,7 @@ static bool isStaticLinkTimeConstant(RelExpr E, RelType Type, const Symbol &Sym,
R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_MIPS_TLSGD,
R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC_CALL_PLT,
R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT,
R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT,
R_TLSIE_HINT>(E))
return true;

Expand Down Expand Up @@ -1079,7 +1079,7 @@ static void scanReloc(InputSectionBase &Sec, OffsetGetter &GetOffset, RelTy *&I,
// The 4 types that relative GOTPLT are all x86 and x86-64 specific.
if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_TLSGD_GOTPLT>(Expr)) {
In.GotPlt->HasGotPltOffRel = true;
} else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC_TOC>(Expr)) {
} else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC_TOC, R_PPC64_RELAX_TOC>(Expr)) {
In.Got->HasGotOffRel = true;
}

Expand Down Expand Up @@ -1240,8 +1240,10 @@ static void scanRelocs(InputSectionBase &Sec, ArrayRef<RelTy> Rels) {
for (auto I = Rels.begin(), End = Rels.end(); I != End;)
scanReloc<ELFT>(Sec, GetOffset, I, End);

// Sort relocations by offset to binary search for R_RISCV_PCREL_HI20
if (Config->EMachine == EM_RISCV)
// Sort relocations by offset for more efficient searching for
// R_RISCV_PCREL_HI20 and R_PPC64_ADDR64.
if (Config->EMachine == EM_RISCV ||
(Config->EMachine == EM_PPC64 && Sec.Name == ".toc"))
llvm::stable_sort(Sec.Relocations,
[](const Relocation &LHS, const Relocation &RHS) {
return LHS.Offset < RHS.Offset;
Expand Down
1 change: 1 addition & 0 deletions lld/ELF/Relocations.h
Expand Up @@ -93,6 +93,7 @@ enum RelExpr {
R_PPC_CALL,
R_PPC_CALL_PLT,
R_PPC_TOC,
R_PPC64_RELAX_TOC,
R_RISCV_PC_INDIRECT,
};

Expand Down
2 changes: 1 addition & 1 deletion lld/ELF/Target.cpp
Expand Up @@ -149,7 +149,7 @@ RelExpr TargetInfo::adjustRelaxExpr(RelType Type, const uint8_t *Data,
return Expr;
}

void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const {
void TargetInfo::relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const {
llvm_unreachable("Should not have claimed to be relaxable");
}

Expand Down
9 changes: 6 additions & 3 deletions lld/ELF/Target.h
Expand Up @@ -124,7 +124,7 @@ class TargetInfo {

virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const;
virtual void relaxGot(uint8_t *Loc, uint64_t Val) const;
virtual void relaxGot(uint8_t *Loc, RelType Type, uint64_t Val) const;
virtual void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const;
virtual void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const;
virtual void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const;
Expand Down Expand Up @@ -164,8 +164,11 @@ static inline std::string getErrorLocation(const uint8_t *Loc) {
return getErrorPlace(Loc).Loc;
}

// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first is
// a global entry point (GEP) which typically is used to intiailzie the TOC
bool tryRelaxPPC64TocIndirection(RelType Type, const Relocation &Rel,
uint8_t *BufLoc);

// In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first
// is a global entry point (GEP) which typically is used to initialize the TOC
// pointer in general purpose register 2. The second is a local entry
// point (LEP) which bypasses the TOC pointer initialization code. The
// offset between GEP and LEP is encoded in a function's st_other flags.
Expand Down
7 changes: 7 additions & 0 deletions lld/test/ELF/Inputs/ppc64-toc-relax-shared.s
@@ -0,0 +1,7 @@
.data

.type shared,@object
.globl shared
shared:
.long 8
.size shared, 4
15 changes: 15 additions & 0 deletions lld/test/ELF/Inputs/ppc64-toc-relax.s
@@ -0,0 +1,15 @@
.data

.globl default, hidden
.hidden hidden

default:
hidden:
.long 0

.space 65532

.globl hidden2
.hidden hidden2
hidden2:
.long 0
2 changes: 1 addition & 1 deletion lld/test/ELF/ppc64-func-entry-points.s
Expand Up @@ -75,6 +75,6 @@ glob:
// CHECK: foo_external_diff:
// CHECK-NEXT: 10010080: {{.*}} addis 2, 12, 1
// CHECK-NEXT: 10010084: {{.*}} addi 2, 2, 32640
// CHECK-NEXT: 10010088: {{.*}} nop
// CHECK-NEXT: 10010088: {{.*}} addis 5, 2, 1
// CHECK: foo_external_same:
// CHECK-NEXT: 100100b0: {{.*}} add 3, 4, 3

0 comments on commit d654ecb

Please sign in to comment.