Skip to content

Commit

Permalink
[ELF] Optimize Arm PLT sequences
Browse files Browse the repository at this point in the history
A more efficient PLT sequence can be used when the distance between the
.plt and the end of the .plt.got is less than 128 Megabytes, which is
frequently true. We fall back to the old sequence when the offset is larger
than 128 Megabytes. This gives us an alternative to forcing the longer
entries with --long-plt as we gracefully fall back to it as needed. 

See ELF for the ARM Architecture Appendix A for details of the PLT sequence.

Differential Revision: https://reviews.llvm.org/D41246

llvm-svn: 320987
  • Loading branch information
smithp35 committed Dec 18, 2017
1 parent 1909791 commit 3c73a41
Show file tree
Hide file tree
Showing 10 changed files with 458 additions and 183 deletions.
79 changes: 70 additions & 9 deletions lld/ELF/Arch/ARM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ ARM::ARM() {
GotEntrySize = 4;
GotPltEntrySize = 4;
PltEntrySize = 16;
PltHeaderSize = 20;
PltHeaderSize = 32;
TrapInstr = 0xd4d4d4d4;
// ARM uses Variant 1 TLS
TcbSize = 8;
Expand Down Expand Up @@ -184,32 +184,65 @@ void ARM::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
write32le(Buf, S.getVA());
}

void ARM::writePltHeader(uint8_t *Buf) const {
// Long form PLT Heade that does not have any restrictions on the displacement
// of the .plt from the .plt.got.
static void writePltHeaderLong(uint8_t *Buf) {
const uint8_t PltData[] = {
0x04, 0xe0, 0x2d, 0xe5, // str lr, [sp,#-4]!
0x04, 0xe0, 0x9f, 0xe5, // ldr lr, L2
0x0e, 0xe0, 0x8f, 0xe0, // L1: add lr, pc, lr
0x08, 0xf0, 0xbe, 0xe5, // ldr pc, [lr, #8]
0x00, 0x00, 0x00, 0x00, // L2: .word &(.got.plt) - L1 - 8
};
0xd4, 0xd4, 0xd4, 0xd4, // Pad to 32-byte boundary
0xd4, 0xd4, 0xd4, 0xd4, // Pad to 32-byte boundary
0xd4, 0xd4, 0xd4, 0xd4};
memcpy(Buf, PltData, sizeof(PltData));
uint64_t GotPlt = InX::GotPlt->getVA();
uint64_t L1 = InX::Plt->getVA() + 8;
write32le(Buf + 16, GotPlt - L1 - 8);
}

// The default PLT header requires the .plt.got to be within 128 Mb of the
// .plt in the positive direction.
void ARM::writePltHeader(uint8_t *Buf) const {
// Use a similar sequence to that in writePlt(), the difference is the calling
// conventions mean we use lr instead of ip. The PLT entry is responsible for
// saving lr on the stack, the dynamic loader is responsible for reloading
// it.
const uint32_t PltData[] = {
0xe52de004, // L1: str lr, [sp,#-4]!
0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
};

uint64_t Offset = InX::GotPlt->getVA() - InX::Plt->getVA() - 4;
if (!llvm::isUInt<27>(Offset)) {
// We cannot encode the Offset, use the long form.
writePltHeaderLong(Buf);
return;
}
write32le(Buf + 0, PltData[0]);
write32le(Buf + 4, PltData[1] | ((Offset >> 20) & 0xff));
write32le(Buf + 8, PltData[2] | ((Offset >> 12) & 0xff));
write32le(Buf + 12, PltData[3] | (Offset & 0xfff));
write32le(Buf + 16, TrapInstr); // Pad to 32-byte boundary
write32le(Buf + 20, TrapInstr);
write32le(Buf + 24, TrapInstr);
write32le(Buf + 28, TrapInstr);
}

void ARM::addPltHeaderSymbols(InputSectionBase *ISD) const {
auto *IS = cast<InputSection>(ISD);
addSyntheticLocal("$a", STT_NOTYPE, 0, 0, IS);
addSyntheticLocal("$d", STT_NOTYPE, 16, 0, IS);
}

void ARM::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
uint64_t PltEntryAddr, int32_t Index,
unsigned RelOff) const {
// FIXME: Using simple code sequence with simple relocations.
// There is a more optimal sequence but it requires support for the group
// relocations. See ELF for the ARM Architecture Appendix A.3
// Long form PLT entries that do not have any restrictions on the displacement
// of the .plt from the .plt.got.
static void writePltLong(uint8_t *Buf, uint64_t GotPltEntryAddr,
uint64_t PltEntryAddr, int32_t Index,
unsigned RelOff) {
const uint8_t PltData[] = {
0x04, 0xc0, 0x9f, 0xe5, // ldr ip, L2
0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc
Expand All @@ -221,6 +254,34 @@ void ARM::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
write32le(Buf + 12, GotPltEntryAddr - L1 - 8);
}

// The default PLT entries require the .plt.got to be within 128 Mb of the
// .plt in the positive direction.
void ARM::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
uint64_t PltEntryAddr, int32_t Index,
unsigned RelOff) const {
// The PLT entry is similar to the example given in Appendix A of ELF for
// the Arm Architecture. Instead of using the Group Relocations to find the
// optimal rotation for the 8-bit immediate used in the add instructions we
// hard code the most compact rotations for simplicity. This saves a load
// instruction over the long plt sequences.
const uint32_t PltData[] = {
0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.plt.got) - L1 - 8
0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.plt.got) - L1 - 8
0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.plt.got) - L1 - 8
};

uint64_t Offset = GotPltEntryAddr - PltEntryAddr - 8;
if (!llvm::isUInt<27>(Offset)) {
// We cannot encode the Offset, use the long form.
writePltLong(Buf, GotPltEntryAddr, PltEntryAddr, Index, RelOff);
return;
}
write32le(Buf + 0, PltData[0] | ((Offset >> 20) & 0xff));
write32le(Buf + 4, PltData[1] | ((Offset >> 12) & 0xff));
write32le(Buf + 8, PltData[2] | (Offset & 0xfff));
write32le(Buf + 12, TrapInstr); // Pad to 16-byte boundary
}

void ARM::addPltSymbols(InputSectionBase *ISD, uint64_t Off) const {
auto *IS = cast<InputSection>(ISD);
addSyntheticLocal("$a", STT_NOTYPE, Off, 0, IS);
Expand Down
12 changes: 6 additions & 6 deletions lld/test/ELF/arm-branch-undef-weak-plt-thunk.s
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ _start:
// CHECK-NEXT: 11000: 00 00 00 ea b #0 <__ARMv7ABSLongThunk_undefined_weak_we_expect_a_plt_entry_for>
// CHECK-NEXT: 11004: 02 00 00 eb bl #8 <__ARMv7ABSLongThunk_bar2>
// CHECK: __ARMv7ABSLongThunk_undefined_weak_we_expect_a_plt_entry_for:
// CHECK-NEXT: 11008: 34 c0 01 e3 movw r12, #4148
// CHECK-NEXT: 1100c: 01 c2 40 e3 movt r12, #513
// CHECK-NEXT: 11010: 1c ff 2f e1 bx r12
// CHECK-NEXT: 11008: 40 c0 01 e3 movw r12, #4160
// CHECK-NEXT: 1100c: 01 c2 40 e3 movt r12, #513
// CHECK-NEXT: 11010: 1c ff 2f e1 bx r12
// CHECK: __ARMv7ABSLongThunk_bar2:
// CHECK-NEXT: 11014: 44 c0 01 e3 movw r12, #4164
// CHECK-NEXT: 11018: 01 c2 40 e3 movt r12, #513
// CHECK-NEXT: 1101c: 1c ff 2f e1 bx r12
// CHECK-NEXT: 11014: 50 c0 01 e3 movw r12, #4176
// CHECK-NEXT: 11018: 01 c2 40 e3 movt r12, #513
// CHECK-NEXT: 1101c: 1c ff 2f e1 bx r12
4 changes: 2 additions & 2 deletions lld/test/ELF/arm-exidx-shared.s
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ __aeabi_unwind_cpp_pr0:
// CHECK-NEXT: 0x200C R_ARM_JUMP_SLOT __gxx_personality_v0

// CHECK-EXTAB: Contents of section .ARM.extab:
// 014c + 0ed8 = 0x1024 = __gxx_personality_v0(PLT)
// CHECK-EXTAB-NEXT: 014c d80e0000 b0b0b000 00000000
// 014c + 0ee4 = 0x1030 = __gxx_personality_v0(PLT)
// CHECK-EXTAB-NEXT: 014c e40e0000 b0b0b000 00000000
54 changes: 27 additions & 27 deletions lld/test/ELF/arm-gnu-ifunc-plt.s
Original file line number Diff line number Diff line change
Expand Up @@ -33,49 +33,49 @@
// DISASM-NEXT: 11000: 1e ff 2f e1 bx lr
// DISASM: bar:
// DISASM-NEXT: 11004: 1e ff 2f e1 bx lr
// DISASM: _start:
// DISASM: _start:
// DISASM-NEXT: 11008: 14 00 00 eb bl #80
// DISASM-NEXT: 1100c: 17 00 00 eb bl #92
// DISASM: 11010: 00 00 00 00 .word 0x00000000
// DISASM: $d.1:
// DISASM-NEXT: 11010: 00 00 00 00 .word 0x00000000
// DISASM-NEXT: 11014: 04 00 00 00 .word 0x00000004
// DISASM: 11018: 05 00 00 eb bl #20
// DISASM-NEXT: 1101c: 08 00 00 eb bl #32
// DISASM: 11018: 08 00 00 eb bl #32
// DISASM-NEXT: 1101c: 0b 00 00 eb bl #44
// DISASM-NEXT: Disassembly of section .plt:
// DISASM-NEXT: $a:
// DISASM-NEXT: 11020: 04 e0 2d e5 str lr, [sp, #-4]!
// DISASM-NEXT: 11024: 04 e0 9f e5 ldr lr, [pc, #4]
// DISASM-NEXT: 11028: 0e e0 8f e0 add lr, pc, lr
// DISASM-NEXT: 1102c: 08 f0 be e5 ldr pc, [lr, #8]!
// DISASM-NEXT: 11024: 00 e6 8f e2 add lr, pc, #0, #12
// DISASM-NEXT: 11028: 00 ea 8e e2 add lr, lr, #0, #20
// DISASM-NEXT: 1102c: dc ff be e5 ldr pc, [lr, #4060]!
// DISASM: $d:
// DISASM-NEXT: 11030: d0 0f 00 00 .word 0x00000fd0
// DISASM-NEXT: 11030: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM-NEXT: 11034: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM-NEXT: 11038: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM-NEXT: 1103c: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM: $a:
// DISASM-NEXT: 11034: 04 c0 9f e5 ldr r12, [pc, #4]
// DISASM-NEXT: 11038: 0f c0 8c e0 add r12, r12, pc
// DISASM-NEXT: 1103c: 00 f0 9c e5 ldr pc, [r12]
// DISASM-NEXT: 11040: 00 c6 8f e2 add r12, pc, #0, #12
// DISASM-NEXT: 11044: 00 ca 8c e2 add r12, r12, #0, #20
// DISASM-NEXT: 11048: c4 ff bc e5 ldr pc, [r12, #4036]!
// DISASM: $d:
// DISASM-NEXT: 11040: cc 0f 00 00 .word 0x00000fcc
// DISASM-NEXT: 1104c: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM: $a:
// DISASM-NEXT: 11044: 04 c0 9f e5 ldr r12, [pc, #4]
// DISASM-NEXT: 11048: 0f c0 8c e0 add r12, r12, pc
// DISASM-NEXT: 1104c: 00 f0 9c e5 ldr pc, [r12]
// DISASM-NEXT: 11050: 00 c6 8f e2 add r12, pc, #0, #12
// DISASM-NEXT: 11054: 00 ca 8c e2 add r12, r12, #0, #20
// DISASM-NEXT: 11058: b8 ff bc e5 ldr pc, [r12, #4024]!
// DISASM: $d:
// DISASM-NEXT: 11050: c0 0f 00 00 .word 0x00000fc0
// Alignment to 16 byte boundary not strictly necessary on ARM, but harmless
// DISASM-NEXT: 11054: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM-NEXT: 11058: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM-NEXT: 1105c: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM: $a:
// DISASM-NEXT: 11060: 04 c0 9f e5 ldr r12, [pc, #4]
// DISASM-NEXT: 11064: 0f c0 8c e0 add r12, r12, pc
// DISASM-NEXT: 11068: 00 f0 9c e5 ldr pc, [r12]
// DISASM-NEXT: 11060: 00 c6 8f e2 add r12, pc, #0, #12
// DISASM-NEXT: 11064: 02 ca 8c e2 add r12, r12, #8192
// DISASM-NEXT: 11068: 18 f0 bc e5 ldr pc, [r12, #24]!
// DISASM: $d:
// DISASM-NEXT: 1106c: 14 20 00 00 .word 0x00002014
// DISASM-NEXT: 1106c: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM: $a:
// DISASM-NEXT: 11070: 04 c0 9f e5 ldr r12, [pc, #4]
// DISASM-NEXT: 11074: 0f c0 8c e0 add r12, r12, pc
// DISASM-NEXT: 11078: 00 f0 9c e5 ldr pc, [r12]
// DISASM-NEXT: 11070: 00 c6 8f e2 add r12, pc, #0, #12
// DISASM-NEXT: 11074: 02 ca 8c e2 add r12, r12, #8192
// DISASM-NEXT: 11078: 0c f0 bc e5 ldr pc, [r12, #12]!
// DISASM: $d:
// DISASM-NEXT: 1107c: 08 20 00 00 .word 0x00002008
// DISASM-NEXT: 1107c: d4 d4 d4 d4 .word 0xd4d4d4d4

.syntax unified
.text
Expand Down
41 changes: 20 additions & 21 deletions lld/test/ELF/arm-gnu-ifunc.s
Original file line number Diff line number Diff line change
Expand Up @@ -111,30 +111,29 @@ _start:

// DISASM: Disassembly of section .text:
// DISASM-NEXT: foo:
// DISASM-NEXT: 11000: 1e ff 2f e1 bx lr
// DISASM: bar:
// DISASM-NEXT: 11004: 1e ff 2f e1 bx lr
// DISASM: _start:
// DISASM-NEXT: 11008: 04 00 00 eb bl #16
// DISASM-NEXT: 1100c: 07 00 00 eb bl #28
// DISASM-NEXT: 11000: 1e ff 2f e1 bx lr
// DISASM: bar:
// DISASM-NEXT: 11004: 1e ff 2f e1 bx lr
// DISASM: _start:
// DISASM-NEXT: 11008: 04 00 00 eb bl #16
// DISASM-NEXT: 1100c: 07 00 00 eb bl #28
// 1 * 65536 + 244 = 0x100f4 __rel_iplt_start
// DISASM-NEXT: 11010: f4 00 00 e3 movw r0, #244
// DISASM-NEXT: 11014: 01 00 40 e3 movt r0, #1
// DISASM-NEXT: 11010: f4 00 00 e3 movw r0, #244
// DISASM-NEXT: 11014: 01 00 40 e3 movt r0, #1
// 1 * 65536 + 260 = 0x10104 __rel_iplt_end
// DISASM-NEXT: 11018: 04 01 00 e3 movw r0, #260
// DISASM-NEXT: 1101c: 01 00 40 e3 movt r0, #1
// DISASM-NEXT: 11018: 04 01 00 e3 movw r0, #260
// DISASM-NEXT: 1101c: 01 00 40 e3 movt r0, #1
// DISASM-NEXT: Disassembly of section .plt:
// DISASM: $a:
// DISASM-NEXT: 11020: 04 c0 9f e5 ldr r12, [pc, #4]
// DISASM-NEXT: 11024: 0f c0 8c e0 add r12, r12, pc
// 11024 + 8 + fd4 = 0x12000
// DISASM-NEXT: 11028: 00 f0 9c e5 ldr pc, [r12]
// DISASM-NEXT: $a:
// DISASM-NEXT: 11020: 00 c6 8f e2 add r12, pc, #0, #12
// DISASM-NEXT: 11024: 00 ca 8c e2 add r12, r12, #0, #20
// DISASM-NEXT: 11028: d8 ff bc e5 ldr pc, [r12, #4056]!
// DISASM: $d:
// DISASM-NEXT: 1102c: d4 0f 00 00 .word 0x00000fd4
// DISASM-NEXT: 1102c: d4 d4 d4 d4 .word 0xd4d4d4d4
// DISASM: $a:
// DISASM-NEXT: 11030: 04 c0 9f e5 ldr r12, [pc, #4]
// DISASM-NEXT: 11034: 0f c0 8c e0 add r12, r12, pc
// 11034 + 8 + fc8 = 0x12004
// DISASM-NEXT: 11038: 00 f0 9c e5 ldr pc, [r12]
// DISASM-NEXT: 11030: 00 c6 8f e2 add r12, pc, #0, #12
// DISASM-NEXT: 11034: 00 ca 8c e2 add r12, r12, #0, #20
// DISASM-NEXT: 11038: cc ff bc e5 ldr pc, [r12, #4044]!
// DISASM: $d:
// DISASM-NEXT: 1103c: c8 0f 00 00 .word 0x00000fc8
// DISASM-NEXT: 1103c: d4 d4 d4 d4 .word 0xd4d4d4d4

Loading

0 comments on commit 3c73a41

Please sign in to comment.