Skip to content

Commit

Permalink
[lld][ELF] Add support for ADRP+ADD optimization for AArch64
Browse files Browse the repository at this point in the history
This diff adds support for ADRP+ADD optimization for AArch64 described in
ARM-software/abi-aa@d2ca58c
i.e. under appropriate constraints

ADRP  x0, symbol
ADD   x0, x0, :lo12: symbol

can be turned into

NOP
ADR   x0, symbol

Test plan: make check-all

Differential revision: https://reviews.llvm.org/D117614
  • Loading branch information
alexander-shaposhnikov committed Feb 2, 2022
1 parent e188aae commit 4450a2a
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 6 deletions.
50 changes: 50 additions & 0 deletions lld/ELF/Arch/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,55 @@ AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
safeToRelaxAdrpLdr = i == size;
}

bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
const Relocation &addRel, uint64_t secAddr,
uint8_t *buf) const {
// When the address of sym is within the range of ADR then
// we may relax
// ADRP xn, sym
// ADD xn, xn, :lo12: sym
// to
// NOP
// ADR xn, sym
if (!config->relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 ||
addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
return false;
// Check if the relocations apply to consecutive instructions.
if (adrpRel.offset + 4 != addRel.offset)
return false;
if (adrpRel.sym != addRel.sym)
return false;
if (adrpRel.addend != 0 || addRel.addend != 0)
return false;

uint32_t adrpInstr = read32le(buf + adrpRel.offset);
uint32_t addInstr = read32le(buf + addRel.offset);
// Check if the first instruction is ADRP and the second instruction is ADD.
if ((adrpInstr & 0x9f000000) != 0x90000000 ||
(addInstr & 0xffc00000) != 0x91000000)
return false;
uint32_t adrpDestReg = adrpInstr & 0x1f;
uint32_t addDestReg = addInstr & 0x1f;
uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
return false;

Symbol &sym = *adrpRel.sym;
// Check if the address difference is within 1MiB range.
int64_t val = sym.getVA() - (secAddr + addRel.offset);
if (val < -1024 * 1024 || val >= 1024 * 1024)
return false;

Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset,
/*addend=*/0, &sym};
// nop
write32le(buf + adrpRel.offset, 0xd503201f);
// adr x_<dest_reg>
write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg);
target->relocate(buf + adrRel.offset, adrRel, val);
return true;
}

bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
const Relocation &ldrRel, uint64_t secAddr,
uint8_t *buf) const {
Expand Down Expand Up @@ -657,6 +706,7 @@ bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
getAArch64Page(secAddr + adrpSymRel.offset),
64));
target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf);
return true;
}

Expand Down
8 changes: 8 additions & 0 deletions lld/ELF/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,14 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
}
target.relocate(bufLoc, rel, targetVA);
break;
case R_AARCH64_PAGE_PC:
if (i + 1 < size && aarch64relaxer.tryRelaxAdrpAdd(
rel, relocations[i + 1], secAddr, buf)) {
++i;
continue;
}
target.relocate(bufLoc, rel, targetVA);
break;
case R_PPC64_RELAX_GOT_PC: {
// The R_PPC64_PCREL_OPT relocation must appear immediately after
// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.
Expand Down
2 changes: 2 additions & 0 deletions lld/ELF/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ class AArch64Relaxer {
public:
explicit AArch64Relaxer(ArrayRef<Relocation> relocs);

bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
uint64_t secAddr, uint8_t *buf) const;
bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
uint64_t secAddr, uint8_t *buf) const;
};
Expand Down
107 changes: 107 additions & 0 deletions lld/test/ELF/aarch64-adrp-add.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# REQUIRES: aarch64
# RUN: rm -rf %t && split-file %s %t

# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-low.t -o %t/a-low
# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=OUT-OF-RANGE
# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-high.t -o %t/a-high
# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=OUT-OF-RANGE

# OUT-OF-RANGE: adrp x30
# OUT-OF-RANGE-NEXT: add x30, x30

# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t -o %t/a-low
# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=IN-RANGE-LOW

# IN-RANGE-LOW: nop
# IN-RANGE-LOW-NEXT: adr x30
# IN-RANGE-LOW-NEXT: adrp x1
# IN-RANGE-LOW-NEXT: add x1
# IN-RANGE-LOW-NEXT: adrp x15
# IN-RANGE-LOW-NEXT: add x15

## ADRP and ADD use different registers, no relaxations should be applied.
# IN-RANGE-LOW-NEXT: adrp x2
# IN-RANGE-LOW-NEXT: add x3, x2

## ADRP and ADD use different registers, no relaxations should be applied.
# IN-RANGE-LOW-NEXT: adrp x2
# IN-RANGE-LOW-NEXT: add x2, x3

# RUN: ld.lld %t/a.o -T %t/within-adr-range-high.t -o %t/a-high
# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=IN-RANGE-HIGH

# IN-RANGE-HIGH: nop
# IN-RANGE-HIGH-NEXT: adr x30
# IN-RANGE-HIGH-NEXT: nop
# IN-RANGE-HIGH-NEXT: adr x1
# IN-RANGE-HIGH-NEXT: nop
# IN-RANGE-HIGH-NEXT: adr x15

## ADRP and ADD use different registers, no relaxations should be applied.
# IN-RANGE-HIGH-NEXT: adrp x2
# IN-RANGE-HIGH-NEXT: add x3, x2

## ADRP and ADD use different registers, no relaxations should be applied.
# IN-RANGE-HIGH-NEXT: adrp x2
# IN-RANGE-HIGH-NEXT: add x2, x3

# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t --no-relax -o %t/a
## --no-relax disables relaxations.
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s --check-prefix=OUT-OF-RANGE

## .rodata and .text are close to each other,
## the adrp + add pair can be relaxed to nop + adr, moreover, the address difference
## is equal to the lowest allowed value.
#--- within-adr-range-low.t
SECTIONS {
.rodata 0x1000: { *(.rodata) }
.text 0x100ffc: { *(.text) }
}

## .rodata and .text are far apart,
## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address difference
## is equal to the lowest allowed value minus one.
#--- out-of-adr-range-low.t
SECTIONS {
.rodata 0x1000: { *(.rodata) }
.text 0x100ffd: { *(.text) }
}

## .rodata and .text are close to each other,
## the adrp + add pair can be relaxed to nop + adr, moreover, the address difference
## is equal to the highest allowed value.
#--- within-adr-range-high.t
SECTIONS {
.text 0x1000: { *(.text) }
.rodata 0x101003: { *(.rodata) }
}

## .rodata and .text are far apart,
## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address difference
## is equal to the highest allowed value plus one.
#--- out-of-adr-range-high.t
SECTIONS {
.text 0x1000: { *(.text) }
.rodata 0x101004: { *(.rodata) }
}

#--- a.s
.rodata
x:
.word 10
.text
.global _start
_start:
adrp x30, x
add x30, x30, :lo12:x
adrp x1, x
add x1, x1, :lo12:x
adrp x15, x
add x15, x15, :lo12:x
adrp x2, x
add x3, x2, :lo12:x
adrp x2, x
add x2, x3, :lo12:x
21 changes: 18 additions & 3 deletions lld/test/ELF/aarch64-adrp-ldr-got.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o

# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a
# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t -o %t/a
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s

## Symbol 'x' is nonpreemptible, the relaxation should be applied.
Expand All @@ -29,8 +29,15 @@
# CHECK-NEXT: adrp x6
# CHECK-NEXT: ldr

# RUN: ld.lld %t/a.o -T %t/within-adr-range.t -o %t/a
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck --check-prefix=ADR %s

## Symbol 'x' is nonpreemptible, the relaxation should be applied.
# ADR: nop
# ADR-NEXT: adr x1

## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations.
# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax
# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t --no-relax -o %t/no-relax
# RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \
# RUN: FileCheck --check-prefix=X1-NO-RELAX %s

Expand Down Expand Up @@ -61,12 +68,20 @@

## This linker script ensures that .rodata and .text are sufficiently (>1M)
## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
#--- linker.t
#--- out-of-adr-range.t
SECTIONS {
.rodata 0x1000: { *(.rodata) }
.text 0x200100: { *(.text) }
}

## This linker script ensures that .rodata and .text are sufficiently (<1M)
## close to each other so that the adrp + ldr pair can be relaxed to nop + adr.
#--- within-adr-range.t
SECTIONS {
.rodata 0x1000: { *(.rodata) }
.text 0x2000: { *(.text) }
}

## This linker script ensures that .rodata and .text are sufficiently (>4GB)
## far apart so that the adrp + ldr pair cannot be relaxed.
#--- out-of-range.t
Expand Down
2 changes: 1 addition & 1 deletion lld/test/ELF/aarch64-copy.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t.o
// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %p/Inputs/relocation-copy.s -o %t2.o
// RUN: ld.lld -shared %t2.o -soname fixed-length-string.so -o %t2.so
// RUN: ld.lld %t.o %t2.so -o %t
// RUN: ld.lld --no-relax %t.o %t2.so -o %t
// RUN: llvm-readobj -S -r --symbols %t | FileCheck %s
// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=CODE %s
// RUN: llvm-objdump -s --section=.rodata %t | FileCheck --check-prefix=RODATA %s
Expand Down
4 changes: 2 additions & 2 deletions lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# REQUIRES: aarch64
# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o

# RUN: ld.lld %t.o -o %t
# RUN: ld.lld --no-relax %t.o -o %t
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PDE
# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PDE-RELOC

# RUN: ld.lld -pie %t.o -o %t
# RUN: ld.lld -pie --no-relax %t.o -o %t
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PIE
# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PIE-RELOC

Expand Down

0 comments on commit 4450a2a

Please sign in to comment.