Skip to content

Commit

Permalink
Add isRelExprOneOf helper
Browse files Browse the repository at this point in the history
In various places in LLD's hot loops, we have expressions of the form
"E == R_FOO || E == R_BAR || ..." (E is a RelExpr).

Some of these expressions are quite long, and even though they usually go just
a very small number of ways and so should be well predicted, they can still
occupy branch predictor resources harming other parts of the code, or they
won't be predicted well if they overflow branch predictor resources or if the
branches are too dense and the branch predictor can't track them all (the
compiler can in theory avoid this, at a cost in text size). And some of these
expressions are so large and executed so frequently that even when
well-predicted they probably still have a nontrivial cost.

This speedup should be pretty portable. The cost of these simple bit tests is
independent of:

- the target we are linking for
- the distribution of RelExpr's for a given link (which can depend on how the
  input files were compiled)
- what compiler was used to compile LLD (it is just a simple bit test;
  hopefully the compiler gets it right!)
- adding new target-dependent relocations (e.g. needsPlt doesn't pay any extra
  cost checking R_PPC_PLT_OPD on x86-64 builds)

I did some rough measurements on clang-fsds and this patch gives over about 4%
speedup for a regular -O1 link, about 2.5% for -O3 --gc-sections and over 5%
for -O0. Sorry, I don't have my current machine set up for doing really
accurate measurements right now.

This also is just a bit cleaner. Thanks for Joerg for suggesting for
this approach.

Differential Revision: https://reviews.llvm.org/D27156

llvm-svn: 288314
  • Loading branch information
chisophugis committed Dec 1, 2016
1 parent e5f23fb commit 2eed759
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 21 deletions.
39 changes: 18 additions & 21 deletions lld/ELF/Relocations.cpp
Expand Up @@ -62,12 +62,10 @@ namespace lld {
namespace elf {

static bool refersToGotEntry(RelExpr Expr) {
return Expr == R_GOT || Expr == R_GOT_OFF || Expr == R_MIPS_GOT_LOCAL_PAGE ||
Expr == R_MIPS_GOT_OFF || Expr == R_MIPS_GOT_OFF32 ||
Expr == R_MIPS_TLSGD || Expr == R_MIPS_TLSLD ||
Expr == R_GOT_PAGE_PC || Expr == R_GOT_PC || Expr == R_GOT_FROM_END ||
Expr == R_TLSGD || Expr == R_TLSGD_PC || Expr == R_TLSDESC ||
Expr == R_TLSDESC_PAGE;
return isRelExprOneOf<R_GOT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF,
R_MIPS_GOT_OFF32, R_MIPS_TLSGD, R_MIPS_TLSLD,
R_GOT_PAGE_PC, R_GOT_PC, R_GOT_FROM_END, R_TLSGD,
R_TLSGD_PC, R_TLSDESC, R_TLSDESC_PAGE>(Expr);
}

static bool isPreemptible(const SymbolBody &Body, uint32_t Type) {
Expand Down Expand Up @@ -302,16 +300,16 @@ template <class ELFT> static bool isAbsoluteValue(const SymbolBody &Body) {
}

static bool needsPlt(RelExpr Expr) {
return Expr == R_PLT_PC || Expr == R_PPC_PLT_OPD || Expr == R_PLT ||
Expr == R_PLT_PAGE_PC || Expr == R_THUNK_PLT_PC;
return isRelExprOneOf<R_PLT_PC, R_PPC_PLT_OPD, R_PLT, R_PLT_PAGE_PC,
R_THUNK_PLT_PC>(Expr);
}

// True if this expression is of the form Sym - X, where X is a position in the
// file (PC, or GOT for example).
static bool isRelExpr(RelExpr Expr) {
return Expr == R_PC || Expr == R_GOTREL || Expr == R_GOTREL_FROM_END ||
Expr == R_MIPS_GOTREL || Expr == R_PAGE_PC || Expr == R_RELAX_GOT_PC ||
Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC;
return isRelExprOneOf<R_PC, R_GOTREL, R_GOTREL_FROM_END, R_MIPS_GOTREL,
R_PAGE_PC, R_RELAX_GOT_PC, R_THUNK_PC, R_THUNK_PLT_PC>(
Expr);
}

template <class ELFT>
Expand All @@ -320,12 +318,11 @@ static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type,
InputSectionBase<ELFT> &S,
typename ELFT::uint RelOff) {
// These expressions always compute a constant
if (E == R_SIZE || E == R_GOT_FROM_END || E == R_GOT_OFF ||
E == R_MIPS_GOT_LOCAL_PAGE || E == R_MIPS_GOT_OFF ||
E == R_MIPS_GOT_OFF32 || E == R_MIPS_TLSGD || E == R_GOT_PAGE_PC ||
E == R_GOT_PC || E == R_PLT_PC || E == R_TLSGD_PC || E == R_TLSGD ||
E == R_PPC_PLT_OPD || E == R_TLSDESC_CALL || E == R_TLSDESC_PAGE ||
E == R_HINT || E == R_THUNK_PC || E == R_THUNK_PLT_PC)
if (isRelExprOneOf<R_SIZE, R_GOT_FROM_END, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE,
R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_TLSGD,
R_GOT_PAGE_PC, R_GOT_PC, R_PLT_PC, R_TLSGD_PC, R_TLSGD,
R_PPC_PLT_OPD, R_TLSDESC_CALL, R_TLSDESC_PAGE, R_HINT,
R_THUNK_PC, R_THUNK_PLT_PC>(E))
return true;

// These never do, except if the entire file is position dependent or if
Expand Down Expand Up @@ -659,12 +656,12 @@ static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) {

// Ignore "hint" and TLS Descriptor call relocation because they are
// only markers for relaxation.
if (Expr == R_HINT || Expr == R_TLSDESC_CALL)
if (isRelExprOneOf<R_HINT, R_TLSDESC_CALL>(Expr))
continue;

if (needsPlt(Expr) || Expr == R_THUNK_ABS || Expr == R_THUNK_PC ||
Expr == R_THUNK_PLT_PC || refersToGotEntry(Expr) ||
!isPreemptible(Body, Type)) {
if (needsPlt(Expr) ||
isRelExprOneOf<R_THUNK_ABS, R_THUNK_PC, R_THUNK_PLT_PC>(Expr) ||
refersToGotEntry(Expr) || !isPreemptible(Body, Type)) {
// If the relocation points to something in the file, we can process it.
bool Constant =
isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, C, RI.r_offset);
Expand Down
29 changes: 29 additions & 0 deletions lld/ELF/Relocations.h
Expand Up @@ -73,6 +73,35 @@ enum RelExpr {
R_TLSLD_PC,
};

// Build a bitmask with one bit set for each RelExpr.
//
// Constexpr function arguments can't be used in static asserts, so we
// use template arguments to build the mask.
// But function template partial specializations don't exist (needed
// for base case of the recursion), so we need a dummy struct.
template <RelExpr... Exprs> struct RelExprMaskBuilder {
static inline uint64_t build() { return 0; }
};

// Specialization for recursive case.
template <RelExpr Head, RelExpr... Tail>
struct RelExprMaskBuilder<Head, Tail...> {
static inline uint64_t build() {
static_assert(0 <= Head && Head < 64,
"RelExpr is too large for 64-bit mask!");
return (uint64_t(1) << Head) | RelExprMaskBuilder<Tail...>::build();
}
};

// Return true if `Expr` is one of `Exprs`.
// There are fewer than 64 RelExpr's, so we can represent any set of
// RelExpr's as a constant bit mask and test for membership with a
// couple cheap bitwise operations.
template <RelExpr... Exprs> bool isRelExprOneOf(RelExpr Expr) {
assert(0 <= Expr && Expr < 64 && "RelExpr is too large for 64-bit mask!");
return (uint64_t(1) << Expr) & RelExprMaskBuilder<Exprs...>::build();
}

// Architecture-neutral representation of relocation.
struct Relocation {
RelExpr Expr;
Expand Down

0 comments on commit 2eed759

Please sign in to comment.