Skip to content

Commit

Permalink
Handle LLVM-generated TLSDESC code sequence
Browse files Browse the repository at this point in the history
It had been assumed that the TLSDESC code seqeunce looks like this:

  lea    0(%rip), %rax
      R_X86_64_GOTPC32_TLSDESC    foo
  call   *(%rax)
      R_X86_64_TLSDESC_CALL       foo

However, LLVM seems to also emit something like this:

  lea    0(%rip), %reg
      R_X86_64_GOTPC32_TLSDESC    foo
  ...
  mov    %reg, %rax
  ...
  call   *(%rax)
      R_X86_64_TLSDESC_CALL       foo

That means when we rewrite the LEA instruction to relax the code
sequence, we need to handle destination registers other than %rax.

The wrong assumption caused a program crash as reported as
https://bugs.gentoo.org/914849.
  • Loading branch information
rui314 committed Nov 26, 2023
1 parent c910552 commit 000ce0e
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 12 deletions.
51 changes: 47 additions & 4 deletions elf/arch-i386.cc
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,34 @@ static void relax_ld_to_le(u8 *loc, ElfRel<E> rel, u64 val) {
}
}

static u32 relax_tlsdesc_to_ie(u8 *loc) {
switch ((loc[0] << 8) | loc[1]) {
case 0x8d83: return 0x8b83; // lea 0(%ebx), %eax -> mov 0(%ebx), %eax
case 0x8d9b: return 0x8b9b; // lea 0(%ebx), %ebx -> mov 0(%ebx), %ebx
case 0x8d8b: return 0x8b8b; // lea 0(%ebx), %ecx -> mov 0(%ebx), %ecx
case 0x8d93: return 0x8b93; // lea 0(%ebx), %edx -> mov 0(%ebx), %edx
case 0x8db3: return 0x8bb3; // lea 0(%ebx), %esi -> mov 0(%ebx), %esi
case 0x8dbb: return 0x8bbb; // lea 0(%ebx), %edi -> mov 0(%ebx), %edi
case 0x8da3: return 0x8ba3; // lea 0(%ebx), %esp -> mov 0(%ebx), %esp
case 0x8dab: return 0x8bab; // lea 0(%ebx), %ebp -> mov 0(%ebx), %ebp
}
return 0;
}

static u32 relax_tlsdesc_to_le(u8 *loc) {
switch ((loc[0] << 8) | loc[1]) {
case 0x8d83: return 0x90b8; // lea 0(%ebx), %eax -> mov $0, %eax
case 0x8d9b: return 0x90bb; // lea 0(%ebx), %ebx -> mov $0, %ebx
case 0x8d8b: return 0x90b9; // lea 0(%ebx), %ecx -> mov $0, %ecx
case 0x8d93: return 0x90ba; // lea 0(%ebx), %edx -> mov $0, %edx
case 0x8db3: return 0x90be; // lea 0(%ebx), %esi -> mov $0, %esi
case 0x8dbb: return 0x90bf; // lea 0(%ebx), %edi -> mov $0, %edi
case 0x8da3: return 0x90bc; // lea 0(%ebx), %esp -> mov $0, %esp
case 0x8dab: return 0x90bd; // lea 0(%ebx), %ebp -> mov $0, %ebp
}
return 0;
}

template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
Expand Down Expand Up @@ -373,15 +401,30 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
//
// mov $foo@TPOFF, %eax
// nop
//
// We allow the following alternative code sequence too because
// LLVM emits such code.
//
// lea 0(%ebx), %reg
// R_386_TLS_GOTDESC foo
// mov %reg, %eax
// call *(%eax)
// R_386_TLS_DESC_CALL foo
if (sym.has_tlsdesc(ctx)) {
*(ul32 *)loc = sym.get_tlsdesc_addr(ctx) + A - GOT;
} else if (sym.has_gottp(ctx)) {
loc[-2] = 0x8b;
loc[-1] = 0x83;
u32 insn = relax_tlsdesc_to_ie(loc - 2);
if (!insn)
Fatal(ctx) << *this << ": illegal instruction sequence for TLSDESC";
loc[-2] = insn >> 8;
loc[-1] = insn;
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
} else {
loc[-2] = 0x90;
loc[-1] = 0xb8;
u32 insn = relax_tlsdesc_to_le(loc - 2);
if (!insn)
Fatal(ctx) << *this << ": illegal instruction sequence for TLSDESC";
loc[-2] = insn >> 8;
loc[-1] = insn;
*(ul32 *)loc = S + A - ctx.tp_addr;
}
break;
Expand Down
73 changes: 65 additions & 8 deletions elf/arch-x86-64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,50 @@ static u32 relax_gottpoff(u8 *loc) {
return 0;
}

static u32 relax_tlsdesc_to_ie(u8 *loc) {
switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) {
case 0x488d05: return 0x488b05; // lea 0(%rip), %rax -> mov 0(%rip), %rax
case 0x488d0d: return 0x488b0d; // lea 0(%rip), %rcx -> mov 0(%rip), %rcx
case 0x488d15: return 0x488b15; // lea 0(%rip), %rdx -> mov 0(%rip), %rdx
case 0x488d1d: return 0x488b1d; // lea 0(%rip), %rbx -> mov 0(%rip), %rbx
case 0x488d25: return 0x488b25; // lea 0(%rip), %rsp -> mov 0(%rip), %rsp
case 0x488d2d: return 0x488b2d; // lea 0(%rip), %rbp -> mov 0(%rip), %rbp
case 0x488d35: return 0x488b35; // lea 0(%rip), %rsi -> mov 0(%rip), %rsi
case 0x488d3d: return 0x488b3d; // lea 0(%rip), %rdi -> mov 0(%rip), %rdi
case 0x4c8d05: return 0x4c8b05; // lea 0(%rip), %r8 -> mov 0(%rip), %r8
case 0x4c8d0d: return 0x4c8b0d; // lea 0(%rip), %r9 -> mov 0(%rip), %r9
case 0x4c8d15: return 0x4c8b15; // lea 0(%rip), %r10 -> mov 0(%rip), %r10
case 0x4c8d1d: return 0x4c8b1d; // lea 0(%rip), %r11 -> mov 0(%rip), %r11
case 0x4c8d25: return 0x4c8b25; // lea 0(%rip), %r12 -> mov 0(%rip), %r12
case 0x4c8d2d: return 0x4c8b2d; // lea 0(%rip), %r13 -> mov 0(%rip), %r13
case 0x4c8d35: return 0x4c8b35; // lea 0(%rip), %r14 -> mov 0(%rip), %r14
case 0x4c8d3d: return 0x4c8b3d; // lea 0(%rip), %r15 -> mov 0(%rip), %r15
}
return 0;
}

static u32 relax_tlsdesc_to_le(u8 *loc) {
switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) {
case 0x488d05: return 0x48c7c0; // lea 0(%rip), %rax -> mov $0, %rax
case 0x488d0d: return 0x48c7c1; // lea 0(%rip), %rcx -> mov $0, %rcx
case 0x488d15: return 0x48c7c2; // lea 0(%rip), %rdx -> mov $0, %rdx
case 0x488d1d: return 0x48c7c3; // lea 0(%rip), %rbx -> mov $0, %rbx
case 0x488d25: return 0x48c7c4; // lea 0(%rip), %rsp -> mov $0, %rsp
case 0x488d2d: return 0x48c7c5; // lea 0(%rip), %rbp -> mov $0, %rbp
case 0x488d35: return 0x48c7c6; // lea 0(%rip), %rsi -> mov $0, %rsi
case 0x488d3d: return 0x48c7c7; // lea 0(%rip), %rdi -> mov $0, %rdi
case 0x4c8d05: return 0x49c7c0; // lea 0(%rip), %r8 -> mov $0, %r8
case 0x4c8d0d: return 0x49c7c1; // lea 0(%rip), %r9 -> mov $0, %r9
case 0x4c8d15: return 0x49c7c2; // lea 0(%rip), %r10 -> mov $0, %r10
case 0x4c8d1d: return 0x49c7c3; // lea 0(%rip), %r11 -> mov $0, %r11
case 0x4c8d25: return 0x49c7c4; // lea 0(%rip), %r12 -> mov $0, %r12
case 0x4c8d2d: return 0x49c7c5; // lea 0(%rip), %r13 -> mov $0, %r13
case 0x4c8d35: return 0x49c7c6; // lea 0(%rip), %r14 -> mov $0, %r14
case 0x4c8d3d: return 0x49c7c7; // lea 0(%rip), %r15 -> mov $0, %r15
}
return 0;
}

// Rewrite a function call to __tls_get_addr to a cheaper instruction
// sequence. We can do this when we know the thread-local variable's TP-
// relative address at link-time.
Expand Down Expand Up @@ -501,19 +545,32 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
//
// mov $foo@TPOFF, %rax
// nop
//
// We allow the following alternative code sequence too because
// LLVM emits such code.
//
// lea 0(%rip), %reg
// R_X86_64_GOTPC32_TLSDESC foo
// mov %reg, %rax
// call *(%rax)
// R_X86_64_TLSDESC_CALL foo
if (sym.has_tlsdesc(ctx)) {
write32s(sym.get_tlsdesc_addr(ctx) + A - P);
} else if (sym.has_gottp(ctx)) {
// mov foo@gottpoff(%rip), %rax
loc[-3] = 0x48;
loc[-2] = 0x8b;
loc[-1] = 0x05;
u32 insn = relax_tlsdesc_to_ie(loc - 3);
if (!insn)
Fatal(ctx) << *this << ": illegal instruction sequence for TLSDESC";
loc[-3] = insn >> 16;
loc[-2] = insn >> 8;
loc[-1] = insn;
write32s(sym.get_gottp_addr(ctx) + A - P);
} else {
// mov $foo@tpoff, %rax
loc[-3] = 0x48;
loc[-2] = 0xc7;
loc[-1] = 0xc0;
u32 insn = relax_tlsdesc_to_le(loc - 3);
if (!insn)
Fatal(ctx) << *this << ": illegal instruction sequence for TLSDESC";
loc[-3] = insn >> 16;
loc[-2] = insn >> 8;
loc[-1] = insn;
write32s(S - ctx.tp_addr);
}
break;
Expand Down
File renamed without changes.
48 changes: 48 additions & 0 deletions test/elf/i686_tlsdesc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
. $(dirname $0)/common.inc

supports_tlsdesc || skip

cat <<'EOF' | $GCC -c -o $t/a.o -xassembler -
.globl get_foo
.type get_foo, @function
get_foo:
pushl %ebx
call __x86.get_pc_thunk.bx
addl $_GLOBAL_OFFSET_TABLE_, %ebx
subl $8, %esp
leal foo@TLSDESC(%ebx), %ebx
movl %ebx, %eax
call *foo@TLSCALL(%eax)
movl %gs:(%eax), %eax
addl $8, %esp
popl %ebx
ret
EOF

cat <<EOF | $GCC -fPIC -c -o $t/b.o -xc - $tlsdesc_opt
#include <stdio.h>
_Thread_local int foo;
int get_foo();
int main() {
foo = 42;
printf("%d\n", get_foo());
}
EOF

$CC -B. -o $t/exe1 $t/a.o $t/b.o
$QEMU $t/exe1 | grep -q 42

$CC -B. -o $t/exe2 $t/a.o $t/b.o -Wl,-no-relax
$QEMU $t/exe2 | grep -q 42

$CC -B. -shared -o $t/c.so $t/a.o
$CC -B. -o $t/exe3 $t/b.o $t/c.so
$QEMU $t/exe3 | grep -q 42

$CC -B. -shared -o $t/c.so $t/a.o -Wl,-no-relax
$CC -B. -o $t/exe4 $t/b.o $t/c.so -Wl,-no-relax
$QEMU $t/exe4 | grep -q 42
47 changes: 47 additions & 0 deletions test/elf/x86_64_tlsdesc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
. $(dirname $0)/common.inc

supports_tlsdesc || skip

cat <<EOF | $GCC -c -o $t/a.o -xassembler -
.globl get_foo
.type get_foo, @function
get_foo:
pushq %rbp
movq %rsp, %rbp
leaq foo@TLSDESC(%rip), %rbx
movq %rbx, %rax
call *foo@TLSCALL(%rax)
movq %fs:0, %rdx
addq %rdx, %rax
movl (%rax), %eax
popq %rbp
ret
EOF

cat <<EOF | $GCC -fPIC -c -o $t/b.o -xc - $tlsdesc_opt
#include <stdio.h>
_Thread_local int foo;
int get_foo();
int main() {
foo = 42;
printf("%d\n", get_foo());
}
EOF

$CC -B. -o $t/exe1 $t/a.o $t/b.o
$QEMU $t/exe1 | grep -q 42

$CC -B. -o $t/exe2 $t/a.o $t/b.o -Wl,-no-relax
$QEMU $t/exe2 | grep -q 42

$CC -B. -shared -o $t/c.so $t/a.o
$CC -B. -o $t/exe3 $t/b.o $t/c.so
$QEMU $t/exe3 | grep -q 42

$CC -B. -shared -o $t/c.so $t/a.o -Wl,-no-relax
$CC -B. -o $t/exe4 $t/b.o $t/c.so -Wl,-no-relax
$QEMU $t/exe4 | grep -q 42

0 comments on commit 000ce0e

Please sign in to comment.