From f0c70f8d3449dd672a33133aba4ef0f781c5044c Mon Sep 17 00:00:00 2001 From: Peter Smith Date: Fri, 27 Oct 2017 08:58:28 +0000 Subject: [PATCH] [ELF] Pre-create ThunkSections at Target specific intervals When an OutputSection is larger than the branch range for a Target we need to place thunks such that they are always in range of their caller, and sufficiently spaced to maximise the number of callers that can use the thunk. We use the simple heuristic of placing the ThunkSection at intervals corresponding to a target specific branch range. If the OutputSection is small we put the thunks at the end of the executable sections. Differential Revision: https://reviews.llvm.org/D34689 llvm-svn: 316751 --- lld/ELF/Arch/ARM.cpp | 31 +++++++++++++ lld/ELF/Relocations.cpp | 64 ++++++++++++++++++++------ lld/ELF/Relocations.h | 6 ++- lld/ELF/SyntheticSections.cpp | 2 + lld/ELF/Target.h | 4 ++ lld/test/ELF/arm-thumb-thunk-symbols.s | 6 +-- 6 files changed, 94 insertions(+), 19 deletions(-) diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 6318c24f27780..e04e3d64aa844 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -63,6 +63,37 @@ ARM::ARM() { // ARM uses Variant 1 TLS TcbSize = 8; NeedsThunks = true; + + // The placing of pre-created ThunkSections is controlled by the + // ThunkSectionSpacing parameter. The aim is to place the + // ThunkSection such that all branches from the InputSections prior to the + // ThunkSection can reach a Thunk placed at the end of the ThunkSection. + // Graphically: + // | up to ThunkSectionSpacing .text input sections | + // | ThunkSection | + // | up to ThunkSectionSpacing .text input sections | + // | ThunkSection | + + // Pre-created ThunkSections are spaced roughly 16MiB apart on ARM. This is to + // match the most common expected case of a Thumb 2 encoded BL, BLX or B.W + // ARM B, BL, BLX range +/- 32MiB + // Thumb B.W, BL, BLX range +/- 16MiB + // Thumb B.W range +/- 1MiB + // If a branch cannot reach a pre-created ThunkSection a new one will be + // created so we can handle the rare cases of a Thumb 2 conditional branch. + // We intentionally use a lower size for ThunkSectionSpacing than the maximum + // branch range so the end of the ThunkSection is more likely to be within + // range of the branch instruction that is furthest away. The value we shorten + // ThunkSectionSpacing by is set conservatively to allow us to create 16,384 + // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to + // one of the Thunks going out of range. + + // FIXME: lld assumes that the Thumb BL and BLX encoding permits the J1 and + // J2 bits to be used to extend the branch range. On earlier Architectures + // such as ARMv4, ARMv5 and ARMv6 (except ARMv6T2) the range is +/- 4MiB. If + // support for the earlier encodings is added then when they are used the + // ThunkSectionSpacing will need lowering. + ThunkSectionSpacing = 0x1000000 - 0x30000; } uint32_t ARM::calcEFlags() const { diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 7849860a82fbf..acbf679e81c50 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1055,7 +1055,11 @@ void ThunkCreator::mergeThunks(ArrayRef OutputSections) { if (ISD->ThunkSections.empty()) return; - // Order Thunks in ascending OutSecOff + // Remove any zero sized precreated Thunks. + llvm::erase_if(ISD->ThunkSections, [](const ThunkSection *TS) { + return TS->getSize() == 0; + }); + // Order Thunks in ascending OutSecOff. std::stable_sort(ISD->ThunkSections.begin(), ISD->ThunkSections.end(), [](const ThunkSection *A, const ThunkSection *B) { return A->OutSecOff < B->OutSecOff; @@ -1084,22 +1088,17 @@ void ThunkCreator::mergeThunks(ArrayRef OutputSections) { }); } -static uint32_t findEndOfFirstNonExec(OutputSection &Cmd) { - for (BaseCommand *Base : Cmd.SectionCommands) - if (auto *ISD = dyn_cast(Base)) - for (auto *IS : ISD->Sections) - if ((IS->Flags & SHF_EXECINSTR) == 0) - return IS->OutSecOff + IS->getSize(); - return 0; -} - -ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *OS, - InputSectionDescription *ISD) { +ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *OS, + InputSectionDescription *ISD) { + // FIXME: When range extension thunks are supported we will need to check + // that the ThunkSection is in range of the caller. if (!ISD->ThunkSections.empty()) return ISD->ThunkSections.front(); - uint32_t Off = findEndOfFirstNonExec(*OS); - return addThunkSection(OS, ISD, Off); + // FIXME: When range extension thunks are supported we must handle the case + // where no pre-created ThunkSections are in range by creating a new one in + // range; for now, it is unreachable. + llvm_unreachable("Must have created at least one ThunkSection per ISR"); } // Add a Thunk that needs to be placed in a ThunkSection that immediately @@ -1126,6 +1125,38 @@ ThunkSection *ThunkCreator::getISThunkSec(InputSection *IS) { return TS; } +// Create one or more ThunkSections per OS that can be used to place Thunks. +// We attempt to place the ThunkSections using the following desirable +// properties: +// - Within range of the maximum number of callers +// - Minimise the number of ThunkSections +// +// We follow a simple but conservative heuristic to place ThunkSections at +// offsets that are multiples of a Target specific branch range. +// For an InputSectionRange that is smaller than the range, a single +// ThunkSection at the end of the range will do. +void ThunkCreator::createInitialThunkSections( + ArrayRef OutputSections) { + forEachInputSectionDescription( + OutputSections, [&](OutputSection *OS, InputSectionDescription *ISD) { + if (ISD->Sections.empty()) + return; + uint32_t ISLimit; + uint32_t PrevISLimit = ISD->Sections.front()->OutSecOff; + uint32_t ThunkUpperBound = PrevISLimit + Target->ThunkSectionSpacing; + + for (const InputSection *IS : ISD->Sections) { + ISLimit = IS->OutSecOff + IS->getSize(); + if (ISLimit > ThunkUpperBound) { + addThunkSection(OS, ISD, PrevISLimit); + ThunkUpperBound = PrevISLimit + Target->ThunkSectionSpacing; + } + PrevISLimit = ISLimit; + } + addThunkSection(OS, ISD, ISLimit); + }); +} + ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS, InputSectionDescription *ISD, uint64_t Off) { @@ -1175,6 +1206,9 @@ void ThunkCreator::forEachInputSectionDescription( // extension Thunks are not yet supported. bool ThunkCreator::createThunks(ArrayRef OutputSections) { bool AddressesChanged = false; + if (Pass == 0 && Target->ThunkSectionSpacing) + createInitialThunkSections(OutputSections); + // Create all the Thunks and insert them into synthetic ThunkSections. The // ThunkSections are later inserted back into InputSectionDescriptions. // We separate the creation of ThunkSections from the insertion of the @@ -1198,7 +1232,7 @@ bool ThunkCreator::createThunks(ArrayRef OutputSections) { if (auto *TIS = T->getTargetInputSection()) TS = getISThunkSec(TIS); else - TS = getOSThunkSec(OS, ISD); + TS = getISDThunkSec(OS, ISD); TS->addThunk(T); Thunks[T->ThunkSym] = T; } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 1e501c80c6d9d..ed665127f0e4a 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -139,16 +139,20 @@ class ThunkCreator { private: void mergeThunks(ArrayRef OutputSections); - ThunkSection *getOSThunkSec(OutputSection *OS, InputSectionDescription *ISD); + ThunkSection *getISDThunkSec(OutputSection *OS, InputSectionDescription *ISD); ThunkSection *getISThunkSec(InputSection *IS); + void createInitialThunkSections(ArrayRef OutputSections); + void forEachInputSectionDescription( ArrayRef OutputSections, std::function Fn); std::pair getThunk(SymbolBody &Body, RelType Type); + ThunkSection *addThunkSection(OutputSection *OS, InputSectionDescription *, uint64_t Off); + // Record all the available Thunks for a Symbol llvm::DenseMap> ThunkedSymbols; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 5870448f8b3b1..261d69076302b 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2391,6 +2391,8 @@ void ThunkSection::writeTo(uint8_t *Buf) { } InputSection *ThunkSection::getTargetInputSection() const { + if (Thunks.empty()) + return nullptr; const Thunk *T = Thunks.front(); return T->getTargetInputSection(); } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index cde437ebf64a1..7970b79e31595 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -74,6 +74,10 @@ class TargetInfo { // end of .got uint64_t GotBaseSymOff = 0; + // On systems with range extensions we place collections of Thunks at + // regular spacings that enable the majority of branches reach the Thunks. + uint32_t ThunkSectionSpacing = 0; + RelType CopyRel; RelType GotRel; RelType PltRel; diff --git a/lld/test/ELF/arm-thumb-thunk-symbols.s b/lld/test/ELF/arm-thumb-thunk-symbols.s index 42046f802f965..faa39fec02189 100644 --- a/lld/test/ELF/arm-thumb-thunk-symbols.s +++ b/lld/test/ELF/arm-thumb-thunk-symbols.s @@ -25,18 +25,18 @@ arm_fn: b thumb_fn // CHECK: Name: __Thumbv7ABSLongThunk_arm_fn -// CHECK-NEXT: Value: 0x11005 +// CHECK-NEXT: Value: 0x12005 // CHECK-NEXT: Size: 10 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Function (0x2) // CHECK: Name: __ARMv7ABSLongThunk_thumb_fn -// CHECK-NEXT: Value: 0x11010 +// CHECK-NEXT: Value: 0x12010 // CHECK-NEXT: Size: 12 // CHECK-NEXT: Binding: Local (0x0) // CHECK-NEXT: Type: Function (0x2) // CHECK-PI: Name: __ThumbV7PILongThunk_arm_fn -// CHECK-PI-NEXT: Value: 0x1005 +// CHECK-PI-NEXT: Value: 0x2005 // CHECK-PI-NEXT: Size: 12 // CHECK-PI-NEXT: Binding: Local (0x0) // CHECK-PI-NEXT: Type: Function (0x2)