From 4450a2a23df0e7081ca7fee3ec641774afedc2bc Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Wed, 2 Feb 2022 06:08:05 +0000 Subject: [PATCH] [lld][ELF] Add support for ADRP+ADD optimization for AArch64 This diff adds support for ADRP+ADD optimization for AArch64 described in https://github.com/ARM-software/abi-aa/commit/d2ca58c54b8e955cfef25c71822f837ae0439d73 i.e. under appropriate constraints ADRP x0, symbol ADD x0, x0, :lo12: symbol can be turned into NOP ADR x0, symbol Test plan: make check-all Differential revision: https://reviews.llvm.org/D117614 --- lld/ELF/Arch/AArch64.cpp | 50 ++++++++ lld/ELF/InputSection.cpp | 8 ++ lld/ELF/Target.h | 2 + lld/test/ELF/aarch64-adrp-add.s | 107 ++++++++++++++++++ lld/test/ELF/aarch64-adrp-ldr-got.s | 21 +++- lld/test/ELF/aarch64-copy.s | 2 +- .../ELF/aarch64-gnu-ifunc-nonpreemptable.s | 4 +- 7 files changed, 188 insertions(+), 6 deletions(-) create mode 100644 lld/test/ELF/aarch64-adrp-add.s diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 784d578312d791..5789bc935b638b 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -591,6 +591,55 @@ AArch64Relaxer::AArch64Relaxer(ArrayRef relocs) { safeToRelaxAdrpLdr = i == size; } +bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel, + const Relocation &addRel, uint64_t secAddr, + uint8_t *buf) const { + // When the address of sym is within the range of ADR then + // we may relax + // ADRP xn, sym + // ADD xn, xn, :lo12: sym + // to + // NOP + // ADR xn, sym + if (!config->relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 || + addRel.type != R_AARCH64_ADD_ABS_LO12_NC) + return false; + // Check if the relocations apply to consecutive instructions. + if (adrpRel.offset + 4 != addRel.offset) + return false; + if (adrpRel.sym != addRel.sym) + return false; + if (adrpRel.addend != 0 || addRel.addend != 0) + return false; + + uint32_t adrpInstr = read32le(buf + adrpRel.offset); + uint32_t addInstr = read32le(buf + addRel.offset); + // Check if the first instruction is ADRP and the second instruction is ADD. + if ((adrpInstr & 0x9f000000) != 0x90000000 || + (addInstr & 0xffc00000) != 0x91000000) + return false; + uint32_t adrpDestReg = adrpInstr & 0x1f; + uint32_t addDestReg = addInstr & 0x1f; + uint32_t addSrcReg = (addInstr >> 5) & 0x1f; + if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg) + return false; + + Symbol &sym = *adrpRel.sym; + // Check if the address difference is within 1MiB range. + int64_t val = sym.getVA() - (secAddr + addRel.offset); + if (val < -1024 * 1024 || val >= 1024 * 1024) + return false; + + Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset, + /*addend=*/0, &sym}; + // nop + write32le(buf + adrpRel.offset, 0xd503201f); + // adr x_ + write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg); + target->relocate(buf + adrRel.offset, adrRel, val); + return true; +} + bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel, uint64_t secAddr, uint8_t *buf) const { @@ -657,6 +706,7 @@ bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel, getAArch64Page(secAddr + adrpSymRel.offset), 64)); target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64)); + tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf); return true; } diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 4e5b0f68592273..4b047f75ad69ca 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -1025,6 +1025,14 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { } target.relocate(bufLoc, rel, targetVA); break; + case R_AARCH64_PAGE_PC: + if (i + 1 < size && aarch64relaxer.tryRelaxAdrpAdd( + rel, relocations[i + 1], secAddr, buf)) { + ++i; + continue; + } + target.relocate(bufLoc, rel, targetVA); + break; case R_PPC64_RELAX_GOT_PC: { // The R_PPC64_PCREL_OPT relocation must appear immediately after // R_PPC64_GOT_PCREL34 in the relocations table at the same offset. diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 15330b2be258bf..e002114f84394a 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -227,6 +227,8 @@ class AArch64Relaxer { public: explicit AArch64Relaxer(ArrayRef relocs); + bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel, + uint64_t secAddr, uint8_t *buf) const; bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel, uint64_t secAddr, uint8_t *buf) const; }; diff --git a/lld/test/ELF/aarch64-adrp-add.s b/lld/test/ELF/aarch64-adrp-add.s new file mode 100644 index 00000000000000..3b3eb18f04cb56 --- /dev/null +++ b/lld/test/ELF/aarch64-adrp-add.s @@ -0,0 +1,107 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t && split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o +# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-low.t -o %t/a-low +# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=OUT-OF-RANGE +# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-high.t -o %t/a-high +# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=OUT-OF-RANGE + +# OUT-OF-RANGE: adrp x30 +# OUT-OF-RANGE-NEXT: add x30, x30 + +# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o +# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t -o %t/a-low +# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=IN-RANGE-LOW + +# IN-RANGE-LOW: nop +# IN-RANGE-LOW-NEXT: adr x30 +# IN-RANGE-LOW-NEXT: adrp x1 +# IN-RANGE-LOW-NEXT: add x1 +# IN-RANGE-LOW-NEXT: adrp x15 +# IN-RANGE-LOW-NEXT: add x15 + +## ADRP and ADD use different registers, no relaxations should be applied. +# IN-RANGE-LOW-NEXT: adrp x2 +# IN-RANGE-LOW-NEXT: add x3, x2 + +## ADRP and ADD use different registers, no relaxations should be applied. +# IN-RANGE-LOW-NEXT: adrp x2 +# IN-RANGE-LOW-NEXT: add x2, x3 + +# RUN: ld.lld %t/a.o -T %t/within-adr-range-high.t -o %t/a-high +# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=IN-RANGE-HIGH + +# IN-RANGE-HIGH: nop +# IN-RANGE-HIGH-NEXT: adr x30 +# IN-RANGE-HIGH-NEXT: nop +# IN-RANGE-HIGH-NEXT: adr x1 +# IN-RANGE-HIGH-NEXT: nop +# IN-RANGE-HIGH-NEXT: adr x15 + +## ADRP and ADD use different registers, no relaxations should be applied. +# IN-RANGE-HIGH-NEXT: adrp x2 +# IN-RANGE-HIGH-NEXT: add x3, x2 + +## ADRP and ADD use different registers, no relaxations should be applied. +# IN-RANGE-HIGH-NEXT: adrp x2 +# IN-RANGE-HIGH-NEXT: add x2, x3 + +# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o +# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t --no-relax -o %t/a +## --no-relax disables relaxations. +# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s --check-prefix=OUT-OF-RANGE + +## .rodata and .text are close to each other, +## the adrp + add pair can be relaxed to nop + adr, moreover, the address difference +## is equal to the lowest allowed value. +#--- within-adr-range-low.t +SECTIONS { + .rodata 0x1000: { *(.rodata) } + .text 0x100ffc: { *(.text) } +} + +## .rodata and .text are far apart, +## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address difference +## is equal to the lowest allowed value minus one. +#--- out-of-adr-range-low.t +SECTIONS { + .rodata 0x1000: { *(.rodata) } + .text 0x100ffd: { *(.text) } +} + +## .rodata and .text are close to each other, +## the adrp + add pair can be relaxed to nop + adr, moreover, the address difference +## is equal to the highest allowed value. +#--- within-adr-range-high.t +SECTIONS { + .text 0x1000: { *(.text) } + .rodata 0x101003: { *(.rodata) } +} + +## .rodata and .text are far apart, +## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address difference +## is equal to the highest allowed value plus one. +#--- out-of-adr-range-high.t +SECTIONS { + .text 0x1000: { *(.text) } + .rodata 0x101004: { *(.rodata) } +} + +#--- a.s +.rodata +x: +.word 10 +.text +.global _start +_start: + adrp x30, x + add x30, x30, :lo12:x + adrp x1, x + add x1, x1, :lo12:x + adrp x15, x + add x15, x15, :lo12:x + adrp x2, x + add x3, x2, :lo12:x + adrp x2, x + add x2, x3, :lo12:x diff --git a/lld/test/ELF/aarch64-adrp-ldr-got.s b/lld/test/ELF/aarch64-adrp-ldr-got.s index f085f31290db5e..56a90aac3876c8 100644 --- a/lld/test/ELF/aarch64-adrp-ldr-got.s +++ b/lld/test/ELF/aarch64-adrp-ldr-got.s @@ -5,7 +5,7 @@ # RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o # RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o -# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a +# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t -o %t/a # RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s ## Symbol 'x' is nonpreemptible, the relaxation should be applied. @@ -29,8 +29,15 @@ # CHECK-NEXT: adrp x6 # CHECK-NEXT: ldr +# RUN: ld.lld %t/a.o -T %t/within-adr-range.t -o %t/a +# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck --check-prefix=ADR %s + +## Symbol 'x' is nonpreemptible, the relaxation should be applied. +# ADR: nop +# ADR-NEXT: adr x1 + ## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations. -# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax +# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t --no-relax -o %t/no-relax # RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \ # RUN: FileCheck --check-prefix=X1-NO-RELAX %s @@ -61,12 +68,20 @@ ## This linker script ensures that .rodata and .text are sufficiently (>1M) ## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop. -#--- linker.t +#--- out-of-adr-range.t SECTIONS { .rodata 0x1000: { *(.rodata) } .text 0x200100: { *(.text) } } +## This linker script ensures that .rodata and .text are sufficiently (<1M) +## close to each other so that the adrp + ldr pair can be relaxed to nop + adr. +#--- within-adr-range.t +SECTIONS { + .rodata 0x1000: { *(.rodata) } + .text 0x2000: { *(.text) } +} + ## This linker script ensures that .rodata and .text are sufficiently (>4GB) ## far apart so that the adrp + ldr pair cannot be relaxed. #--- out-of-range.t diff --git a/lld/test/ELF/aarch64-copy.s b/lld/test/ELF/aarch64-copy.s index 591186f1470f03..0d20f00323aa50 100644 --- a/lld/test/ELF/aarch64-copy.s +++ b/lld/test/ELF/aarch64-copy.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t.o // RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %p/Inputs/relocation-copy.s -o %t2.o // RUN: ld.lld -shared %t2.o -soname fixed-length-string.so -o %t2.so -// RUN: ld.lld %t.o %t2.so -o %t +// RUN: ld.lld --no-relax %t.o %t2.so -o %t // RUN: llvm-readobj -S -r --symbols %t | FileCheck %s // RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=CODE %s // RUN: llvm-objdump -s --section=.rodata %t | FileCheck --check-prefix=RODATA %s diff --git a/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s index 284d9a8d7edce0..930709badcd114 100644 --- a/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s +++ b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s @@ -1,11 +1,11 @@ # REQUIRES: aarch64 # RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o -# RUN: ld.lld %t.o -o %t +# RUN: ld.lld --no-relax %t.o -o %t # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PDE # RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PDE-RELOC -# RUN: ld.lld -pie %t.o -o %t +# RUN: ld.lld -pie --no-relax %t.o -o %t # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PIE # RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PIE-RELOC