Skip to content

Commit

Permalink
[ELF] Fix R_RISCV_ALIGN relocations
Browse files Browse the repository at this point in the history
Fixes #419
  • Loading branch information
rui314 committed Apr 15, 2022
1 parent d0e4eee commit 0daf623
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 89 deletions.
131 changes: 42 additions & 89 deletions elf/arch-riscv64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {

for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_RISCV_NONE || rel.r_type == R_RISCV_RELAX)
if (rel.r_type == R_RISCV_NONE || rel.r_type == R_RISCV_RELAX ||
rel.r_type == R_RISCV_ALIGN)
continue;

Symbol<E> &sym = *file.symbols[rel.r_sym];
Expand Down Expand Up @@ -344,8 +345,6 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
case R_RISCV_SUB64:
*(u64 *)loc -= S + A;
break;
case R_RISCV_ALIGN:
break;
case R_RISCV_RVC_BRANCH:
write_cbtype((u16 *)loc, S + A - P);
break;
Expand All @@ -355,8 +354,6 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
case R_RISCV_RVC_LUI:
Error(ctx) << *this << ": unsupported relocation: " << rel;
break;
case R_RISCV_RELAX:
break;
case R_RISCV_SUB6:
*loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111);
break;
Expand Down Expand Up @@ -510,18 +507,12 @@ void InputSection<E>::copy_contents_riscv(Context<E> &ctx, u8 *buf) {
i64 delta = r_deltas[i + 1] - r_deltas[i];
if (delta == 0)
continue;
assert(delta < 0);

const ElfRel<E> &r = rels[i];
memcpy(buf, contents.data() + pos, r.r_offset - pos);
buf += r.r_offset - pos;
pos = r.r_offset;

if (delta < 0) {
pos -= delta;
} else {
memset(buf, 0, delta);
buf += delta;
}
pos = r.r_offset - delta;
}

memcpy(buf, contents.data() + pos, contents.size() - pos);
Expand Down Expand Up @@ -672,46 +663,11 @@ static void initialize_storage(Context<E> &ctx) {
});
}

// Interpret R_RISCV_ALIGN relocations and align them if necessary.
// This function may enlarge input sections but never shrinks.
static void align_contents(Context<E> &ctx, InputSection<E> &isec) {
std::span<i32> r_deltas = isec.get_r_deltas();
std::span<Symbol<E> *> syms = isec.get_sorted_symbols();
i64 delta = 0;

std::span<ElfRel<E>> rels = isec.get_rels(ctx);

for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &r = rels[i];
r_deltas[i] = delta;

if (r.r_type != R_RISCV_ALIGN)
continue;

i64 delta2 = align_to(r.r_offset, r.r_addend) - r.r_offset;
if (delta2 == 0)
continue;

while (!syms.empty() && syms[0]->value <= r.r_offset) {
syms[0]->value += delta;
syms = syms.subspan(1);
}

delta += delta2;
}

for (Symbol<E> *sym : syms)
sym->value += delta;
r_deltas[rels.size()] = delta;

isec.sh_size += delta;
}

// Returns the distance between a relocated place and a symbol.
static i64 compute_distance(Context<E> &ctx, Symbol<E> &sym,
InputSection<E> &isec, const ElfRel<E> &rel) {
// We handle absolute symbols as if they were infinitely far away
// because `relax_call` may increase a distance between a branch
// because `relax_section` may increase a distance between a branch
// instruction and an absolute symbol. Branching to an absolute
// location is extremely rare in real code, though.
if (sym.is_absolute())
Expand All @@ -729,7 +685,7 @@ static i64 compute_distance(Context<E> &ctx, Symbol<E> &sym,
}

// Relax R_RISCV_CALL and R_RISCV_CALL_PLT relocations.
static void relax_call(Context<E> &ctx, InputSection<E> &isec) {
static void relax_section(Context<E> &ctx, InputSection<E> &isec) {
std::span<i32> r_deltas = isec.get_r_deltas();
std::span<Symbol<E> *> syms = isec.get_sorted_symbols();
i64 delta = 0;
Expand All @@ -743,21 +699,37 @@ static void relax_call(Context<E> &ctx, InputSection<E> &isec) {
r_deltas[i] += delta;

switch (r.r_type) {
case R_RISCV_ALIGN:
delta2 = align_to(r.r_offset, r.r_addend) - r.r_offset;
case R_RISCV_ALIGN: {
// R_RISCV_ALIGN refers NOP instructions. We need to eliminate
// some or all of the instructions so that the instruction that
// immediately follows the NOPs is aligned to a specified
// alignment boundary.
u64 loc = isec.get_addr() + r.r_offset + delta;

// The total bytes of NOPs is stored to r_addend, so the next
// instruction is r_addend away.
u64 next_loc = loc + r.r_addend;

u64 alignment = (std::popcount<u64>(r.r_addend) == 1)
? r.r_addend : next_power_of_two(r.r_addend);

if (next_loc % alignment)
delta2 = align_to(loc, alignment) - next_loc;
break;
case R_RISCV_CALL:
case R_RISCV_CALL_PLT: {
if (i == rels.size() - 1 || rels[i + 1].r_type != R_RISCV_RELAX)
break;

// If the jump target is within ±1 MiB, we can replace AUIPC+JALR
// with JAL, saving 4 bytes.
Symbol<E> &sym = *isec.file.symbols[r.r_sym];
i64 dist = compute_distance(ctx, sym, isec, r);
if (dist % 2 == 0 && -(1 << 20) <= dist && dist < (1 << 20))
delta2 = -4;
}
case R_RISCV_CALL:
case R_RISCV_CALL_PLT:
if (ctx.arg.relax) {
if (i == rels.size() - 1 || rels[i + 1].r_type != R_RISCV_RELAX)
break;

// If the jump target is within ±1 MiB, we can replace AUIPC+JALR
// with JAL, saving 4 bytes.
Symbol<E> &sym = *isec.file.symbols[r.r_sym];
i64 dist = compute_distance(ctx, sym, isec, r);
if (dist % 2 == 0 && -(1 << 20) <= dist && dist < (1 << 20))
delta2 = -4;
}
}

if (delta2 == 0)
Expand Down Expand Up @@ -819,38 +791,19 @@ static void relax_call(Context<E> &ctx, InputSection<E> &isec) {
// mandatory because of R_RISCV_ALIGN. R_RISCV_ALIGN relocation is a
// directive to the linker to align the location referred to by the
// relocation to a specified byte boundary. We at least have to
// interpret them satisfy the constraints imposed by R_RISCV_ALIGN,
// and that means we may change section sizes anyway.
// interpret them satisfy the constraints imposed by R_RISCV_ALIGN
// relocations.
i64 riscv_resize_sections(Context<E> &ctx) {
Timer t(ctx, "riscv_resize_sections");

initialize_storage(ctx);

// First, interpret R_RISCV_ALIGN relocations. This may enlarge
// sections.
{
Timer t(ctx, "align_contents");
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (std::unique_ptr<InputSection<E>> &isec : file->sections)
if (is_resizable(ctx, isec.get()))
align_contents(ctx, *isec);
});
}

// Re-compute section offset.
compute_section_sizes(ctx);
set_osec_offsets(ctx);

// Find R_RISCV_CALL AND R_RISCV_CALL_PLT that can be relaxed.
// This step should only shrink sections.
{
Timer t(ctx, "relax_call");
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (std::unique_ptr<InputSection<E>> &isec : file->sections)
if (is_resizable(ctx, isec.get()))
relax_call(ctx, *isec);
});
}
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (std::unique_ptr<InputSection<E>> &isec : file->sections)
if (is_resizable(ctx, isec.get()))
relax_section(ctx, *isec);
});

// Re-compute section offset again to finalize them.
compute_section_sizes(ctx);
Expand Down
46 changes: 46 additions & 0 deletions test/elf/large-alignment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
export LC_ALL=C
set -e
CC="${CC:-cc}"
CXX="${CXX:-c++}"
GCC="${GCC:-gcc}"
GXX="${GXX:-g++}"
OBJDUMP="${OBJDUMP:-objdump}"
MACHINE="${MACHINE:-$(uname -m)}"
testname=$(basename "$0" .sh)
echo -n "Testing $testname ... "
cd "$(dirname "$0")"/../..
mold="$(pwd)/mold"
t=out/test/elf/$testname
mkdir -p $t

[ $MACHINE = i386 -o $MACHINE = arm ] && { echo skipped; exit; }

cat <<EOF | $CC -o $t/a.o -c -xc - -ffunction-sections
#include <stdio.h>
#include <stdint.h>
void hello() __attribute__((aligned(32768), section(".hello")));
void world() __attribute__((aligned(32768), section(".world")));
void hello() {
printf("Hello");
}
void world() {
printf(" world");
}
int main() {
hello();
world();
printf(" %lu %lu\n",
(unsigned long)((uintptr_t)hello % 32768),
(unsigned long)((uintptr_t)world % 32768));
}
EOF

$CC -B. -o $t/exe $t/a.o
$QEMU $t/exe | grep -q 'Hello world 0 0'

echo OK

0 comments on commit 0daf623

Please sign in to comment.