Skip to content

Commit

Permalink
[ELF][RISCV] Relax local-exec TLS model
Browse files Browse the repository at this point in the history
In -mrelax mode, GCC/Clang may generate a local-exec TLS code sequence like:
```
# R_RISCV_TPREL_HI20, R_RISCV_RELAX
lui rd, %tprel_hi(x)
# R_RISCV_TPREL_ADD, R_RISCV_RELAX
add rd, rd, tp, %tprel_add(x)
# (R_RISCV_TPREL_LO12_I || R_RISCV_TPREL_LO12_S), R_RISCV_RELAX
addi rd, rd, %tprel_lo(x) || sw rs, %tprel(x)(rd)
```

Note: st_value(x) for TLS should be in the range [0,p_memsz(PT_TLS)).
When st_value(x) < 2048 (i.e. hi20(x) == 0), the linker can relax the code
sequence to:
```
addi rd, tp, st_value(x) || sw rs, st_value(x)(rd)
```

Differential Revision: https://reviews.llvm.org/D129425
  • Loading branch information
MaskRay committed Jul 15, 2022
1 parent bc08c3c commit f77b77e
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 34 deletions.
82 changes: 67 additions & 15 deletions lld/ELF/Arch/RISCV.cpp
Expand Up @@ -57,6 +57,7 @@ enum Op {

enum Reg {
X_RA = 1,
X_TP = 4,
X_T0 = 5,
X_T1 = 6,
X_T2 = 7,
Expand All @@ -76,6 +77,19 @@ static uint32_t utype(uint32_t op, uint32_t rd, uint32_t imm) {
return op | (rd << 7) | (imm << 12);
}

// Extract bits v[begin:end], where range is inclusive, and begin must be < 63.
static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
return (v & ((1ULL << (begin + 1)) - 1)) >> end;
}

static uint32_t setLO12_I(uint32_t insn, uint32_t imm) {
return (insn & 0xfffff) | (imm << 20);
}
static uint32_t setLO12_S(uint32_t insn, uint32_t imm) {
return (insn & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) |
(extractBits(imm, 4, 0) << 7);
}

RISCV::RISCV() {
copyRel = R_RISCV_COPY;
pltRel = R_RISCV_JUMP_SLOT;
Expand Down Expand Up @@ -270,10 +284,9 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
case R_RISCV_TPREL_LO12_I:
case R_RISCV_TPREL_LO12_S:
return R_TPREL;
case R_RISCV_TPREL_ADD:
return R_NONE;
case R_RISCV_ALIGN:
return R_RELAX_HINT;
case R_RISCV_TPREL_ADD:
case R_RISCV_RELAX:
return config->relax ? R_RELAX_HINT : R_NONE;
default:
Expand All @@ -283,11 +296,6 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
}
}

// Extract bits V[Begin:End], where range is inclusive, and Begin must be < 63.
static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
return (v & ((1ULL << (begin + 1)) - 1)) >> end;
}

void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
const unsigned bits = config->wordsize * 8;

Expand Down Expand Up @@ -404,7 +412,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_RISCV_LO12_I: {
uint64_t hi = (val + 0x800) >> 12;
uint64_t lo = val - (hi << 12);
write32le(loc, (read32le(loc) & 0xFFFFF) | ((lo & 0xFFF) << 20));
write32le(loc, setLO12_I(read32le(loc), lo & 0xfff));
return;
}

Expand All @@ -413,9 +421,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_RISCV_LO12_S: {
uint64_t hi = (val + 0x800) >> 12;
uint64_t lo = val - (hi << 12);
uint32_t imm11_5 = extractBits(lo, 11, 5) << 25;
uint32_t imm4_0 = extractBits(lo, 4, 0) << 7;
write32le(loc, (read32le(loc) & 0x1FFF07F) | imm11_5 | imm4_0);
write32le(loc, setLO12_S(read32le(loc), lo));
return;
}

Expand Down Expand Up @@ -567,6 +573,35 @@ static void relaxCall(const InputSection &sec, size_t i, uint64_t loc,
}
}

// Relax local-exec TLS when hi20 is zero.
static void relaxTlsLe(const InputSection &sec, size_t i, uint64_t loc,
Relocation &r, uint32_t &remove) {
uint64_t val = r.sym->getVA(r.addend);
if (hi20(val) != 0)
return;
uint32_t insn = read32le(sec.rawData.data() + r.offset);
switch (r.type) {
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
// Remove lui rd, %tprel_hi(x) and add rd, rd, tp, %tprel_add(x).
sec.relaxAux->relocTypes[i] = R_RISCV_RELAX;
remove = 4;
break;
case R_RISCV_TPREL_LO12_I:
// addi rd, rd, %tprel_lo(x) => addi rd, tp, st_value(x)
sec.relaxAux->relocTypes[i] = R_RISCV_32;
insn = (insn & ~(31 << 15)) | (X_TP << 15);
sec.relaxAux->writes.push_back(setLO12_I(insn, val));
break;
case R_RISCV_TPREL_LO12_S:
// sw rs, %tprel_lo(x)(rd) => sw rs, st_value(x)(rd)
sec.relaxAux->relocTypes[i] = R_RISCV_32;
insn = (insn & ~(31 << 15)) | (X_TP << 15);
sec.relaxAux->writes.push_back(setLO12_S(insn, val));
break;
}
}

static bool relax(InputSection &sec) {
const uint64_t secAddr = sec.getVA();
auto &aux = *sec.relaxAux;
Expand Down Expand Up @@ -612,6 +647,14 @@ static bool relax(InputSection &sec) {
sec.relocations[i + 1].type == R_RISCV_RELAX)
relaxCall(sec, i, loc, r, remove);
break;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
case R_RISCV_TPREL_LO12_I:
case R_RISCV_TPREL_LO12_S:
if (i + 1 != sec.relocations.size() &&
sec.relocations[i + 1].type == R_RISCV_RELAX)
relaxTlsLe(sec, i, loc, r, remove);
break;
}

// For all anchors whose offsets are <= r.offset, they are preceded by
Expand Down Expand Up @@ -697,7 +740,7 @@ void elf::riscvFinalizeRelax(int passes) {
for (size_t i = 0, e = rels.size(); i != e; ++i) {
uint32_t remove = aux.relocDeltas[i] - delta;
delta = aux.relocDeltas[i];
if (remove == 0)
if (remove == 0 && aux.relocTypes[i] == R_RISCV_NONE)
continue;

// Copy from last location to the current relocated location.
Expand All @@ -723,15 +766,24 @@ void elf::riscvFinalizeRelax(int passes) {
}
}
} else if (RelType newType = aux.relocTypes[i]) {
const uint32_t insn = aux.writes[writesIdx++];
switch (newType) {
case R_RISCV_RELAX:
// Used by relaxTlsLe to indicate the relocation is ignored.
break;
case R_RISCV_RVC_JUMP:
skip = 2;
write16le(p, insn);
write16le(p, aux.writes[writesIdx++]);
break;
case R_RISCV_JAL:
skip = 4;
write32le(p, insn);
write32le(p, aux.writes[writesIdx++]);
break;
case R_RISCV_32:
// Used by relaxTlsLe to write a uint32_t then suppress the handling
// in relocateAlloc.
skip = 4;
write32le(p, aux.writes[writesIdx++]);
aux.relocTypes[i] = R_RISCV_NONE;
break;
default:
llvm_unreachable("unsupported type");
Expand Down
81 changes: 62 additions & 19 deletions lld/test/ELF/riscv-tls-le.s
@@ -1,48 +1,91 @@
# REQUIRES: riscv

## Additionally test that (a) -no-pie/-pie have the same behavior
## (b) --no-relax/--relax have the same behavior when R_RISCV_RELAX is suppressed.
# RUN: llvm-mc -filetype=obj -triple=riscv32 %s -o %t.32.o
# RUN: ld.lld %t.32.o -o %t.32
# RUN: ld.lld --relax %t.32.o -o %t.32
# RUN: llvm-nm -p %t.32 | FileCheck --check-prefixes=NM %s
# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=LE %s
# RUN: ld.lld -pie %t.32.o -o %t.32
# RUN: ld.lld -pie --no-relax %t.32.o -o %t.32
# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=LE %s

# RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t.64.o
# RUN: ld.lld %t.64.o -o %t.64
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax %s -o %t.64.o
# RUN: ld.lld --no-relax %t.64.o -o %t.64
# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s
# RUN: ld.lld -pie %t.64.o -o %t.64
# RUN: ld.lld -pie --no-relax %t.64.o -o %t.64
# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s
# RUN: ld.lld %t.64.o -o %t.64.relax
# RUN: llvm-objdump -d --no-show-raw-insn %t.64.relax | FileCheck --check-prefixes=LE-RELAX %s

# RUN: not ld.lld -shared %t.32.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:

# ERR: error: relocation R_RISCV_TPREL_HI20 against .LANCHOR0 cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_LO12_I against .LANCHOR0 cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_LO12_I against a cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_LO12_S against a cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_LO12_S against a cannot be used with -shared

# NM: {{0*}}00000008 b .LANCHOR0
# NM: {{0*}}0000000c B a
# NM: {{0*}}00000800 B a

## .LANCHOR0@tprel = 8
## a@tprel = 12
# LE: lui a5, 0
# LE-NEXT: add a5, a5, tp
# LE-NEXT: addi a5, a5, 8
# LE-NEXT: lui a5, 0
# LE-NEXT: add a5, a5, tp
# LE-NEXT: sw a0, 12(a5)
# LE: lui a1, 0
# LE-NEXT: add a1, a1, tp
# LE-NEXT: addi a1, a1, 8
# LE-NEXT: lui a2, 0
# LE-NEXT: add a2, a2, tp
# LE-NEXT: addi a2, a2, 2044
# LE-NEXT: lui a3, 0
# LE-NEXT: addi a0, a0, 1
# LE-NEXT: add a3, a3, tp
# LE-NEXT: addi a0, a0, 2
# LE-NEXT: sw a0, 2044(a3)
# LE-NEXT: lui a4, 1
# LE-NEXT: add a4, a4, tp
# LE-NEXT: sw a0, -2048(a4)
# LE-EMPTY:

# LE-RELAX: <.text>:
# LE-RELAX-NEXT: addi a1, tp, 8
# LE-RELAX-NEXT: addi a2, tp, 2044
# LE-RELAX-NEXT: addi a0, a0, 1
# LE-RELAX-NEXT: addi a0, a0, 2
# LE-RELAX-NEXT: sw a0, 2044(tp)
# LE-RELAX-NEXT: lui a4, 1
# LE-RELAX-NEXT: add a4, a4, tp
# LE-RELAX-NEXT: sw a0, -2048(a4)
# LE-RELAX-EMPTY:

lui a5, %tprel_hi(.LANCHOR0)
add a5, a5, tp, %tprel_add(.LANCHOR0)
addi a5, a5, %tprel_lo(.LANCHOR0)
lui a1, %tprel_hi(.LANCHOR0)
add a1, a1, tp, %tprel_add(.LANCHOR0)
addi a1, a1, %tprel_lo(.LANCHOR0)

lui a5, %tprel_hi(a)
add a5, a5, tp, %tprel_add(a)
sw a0, %tprel_lo(a)(a5)
## hi20(a-4) = hi20(0x7fc) = 0. relaxable
lui a2, %tprel_hi(a-4)
add a2, a2, tp, %tprel_add(a-4)
addi a2, a2, %tprel_lo(a-4)

## hi20(a-4) = hi20(0x7fc) = 0. relaxable
## Test non-adjacent instructions.
lui a3, %tprel_hi(a-4)
addi a0, a0, 1
add a3, a3, tp, %tprel_add(a-4)
addi a0, a0, 2
sw a0, %tprel_lo(a-4)(a3)

## hi20(a) = hi20(0x800) = 1. not relaxable
lui a4, %tprel_hi(a)
add a4, a4, tp, %tprel_add(a)
sw a0, %tprel_lo(a)(a4)

.section .tbss
.space 8
.LANCHOR0:
.zero 4
.space 0x800-8
.globl a
a:
.zero 4

0 comments on commit f77b77e

Please sign in to comment.