212 changes: 205 additions & 7 deletions lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ class Writer {
std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> &Map);
void createExportTable();
void mergeSections();
void readRelocTargets();
void assignAddresses();
void finalizeAddresses();
void removeEmptySections();
void createSymbolAndStringTable();
void openFile(StringRef OutputPath);
Expand Down Expand Up @@ -299,6 +301,193 @@ void OutputSection::writeHeaderTo(uint8_t *Buf) {
} // namespace coff
} // namespace lld

// Check whether the target address S is in range from a relocation
// of type RelType at address P.
static bool isInRange(uint16_t RelType, uint64_t S, uint64_t P, int Margin) {
assert(Config->Machine == ARMNT);
int64_t Diff = AbsoluteDifference(S, P + 4) + Margin;
switch (RelType) {
case IMAGE_REL_ARM_BRANCH20T:
return isInt<21>(Diff);
case IMAGE_REL_ARM_BRANCH24T:
case IMAGE_REL_ARM_BLX23T:
return isInt<25>(Diff);
default:
return true;
}
}

// Return the last thunk for the given target if it is in range,
// or create a new one.
static std::pair<Defined *, bool>
getThunk(DenseMap<uint64_t, Defined *> &LastThunks, Defined *Target, uint64_t P,
uint16_t Type, int Margin) {
Defined *&LastThunk = LastThunks[Target->getRVA()];
if (LastThunk && isInRange(Type, LastThunk->getRVA(), P, Margin))
return {LastThunk, false};
RangeExtensionThunk *C = make<RangeExtensionThunk>(Target);
Defined *D = make<DefinedSynthetic>("", C);
LastThunk = D;
return {D, true};
}

// This checks all relocations, and for any relocation which isn't in range
// it adds a thunk after the section chunk that contains the relocation.
// If the latest thunk for the specific target is in range, that is used
// instead of creating a new thunk. All range checks are done with the
// specified margin, to make sure that relocations that originally are in
// range, but only barely, also get thunks - in case other added thunks makes
// the target go out of range.
//
// After adding thunks, we verify that all relocations are in range (with
// no extra margin requirements). If this failed, we restart (throwing away
// the previously created thunks) and retry with a wider margin.
static bool createThunks(std::vector<Chunk *> &Chunks, int Margin) {
bool AddressesChanged = false;
DenseMap<uint64_t, Defined *> LastThunks;
size_t ThunksSize = 0;
// Recheck Chunks.size() each iteration, since we can insert more
// elements into it.
for (size_t I = 0; I != Chunks.size(); ++I) {
SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Chunks[I]);
if (!SC)
continue;
size_t ThunkInsertionSpot = I + 1;

// Try to get a good enough estimate of where new thunks will be placed.
// Offset this by the size of the new thunks added so far, to make the
// estimate slightly better.
size_t ThunkInsertionRVA = SC->getRVA() + SC->getSize() + ThunksSize;
for (size_t J = 0, E = SC->Relocs.size(); J < E; ++J) {
const coff_relocation &Rel = SC->Relocs[J];
Symbol *&RelocTarget = SC->RelocTargets[J];

// The estimate of the source address P should be pretty accurate,
// but we don't know whether the target Symbol address should be
// offset by ThunkSize or not (or by some of ThunksSize but not all of
// it), giving us some uncertainty once we have added one thunk.
uint64_t P = SC->getRVA() + Rel.VirtualAddress + ThunksSize;

Defined *Sym = dyn_cast_or_null<Defined>(RelocTarget);
if (!Sym)
continue;

uint64_t S = Sym->getRVA();

if (isInRange(Rel.Type, S, P, Margin))
continue;

// If the target isn't in range, hook it up to an existing or new
// thunk.
Defined *Thunk;
bool WasNew;
std::tie(Thunk, WasNew) = getThunk(LastThunks, Sym, P, Rel.Type, Margin);
if (WasNew) {
Chunk *ThunkChunk = Thunk->getChunk();
ThunkChunk->setRVA(
ThunkInsertionRVA); // Estimate of where it will be located.
Chunks.insert(Chunks.begin() + ThunkInsertionSpot, ThunkChunk);
ThunkInsertionSpot++;
ThunksSize += ThunkChunk->getSize();
ThunkInsertionRVA += ThunkChunk->getSize();
AddressesChanged = true;
}
RelocTarget = Thunk;
}
}
return AddressesChanged;
}

// Verify that all relocations are in range, with no extra margin requirements.
static bool verifyRanges(const std::vector<Chunk *> Chunks) {
for (Chunk *C : Chunks) {
SectionChunk *SC = dyn_cast_or_null<SectionChunk>(C);
if (!SC)
continue;

for (size_t J = 0, E = SC->Relocs.size(); J < E; ++J) {
const coff_relocation &Rel = SC->Relocs[J];
Symbol *RelocTarget = SC->RelocTargets[J];

Defined *Sym = dyn_cast_or_null<Defined>(RelocTarget);
if (!Sym)
continue;

uint64_t P = SC->getRVA() + Rel.VirtualAddress;
uint64_t S = Sym->getRVA();

if (!isInRange(Rel.Type, S, P, 0))
return false;
}
}
return true;
}

// Assign addresses and add thunks if necessary.
void Writer::finalizeAddresses() {
assignAddresses();
if (Config->Machine != ARMNT)
return;

size_t OrigNumChunks = 0;
for (OutputSection *Sec : OutputSections) {
Sec->OrigChunks = Sec->Chunks;
OrigNumChunks += Sec->Chunks.size();
}

int Pass = 0;
int Margin = 1024 * 100;
while (true) {
// First check whether we need thunks at all, or if the previous pass of
// adding them turned out ok.
bool RangesOk = true;
size_t NumChunks = 0;
for (OutputSection *Sec : OutputSections) {
if (!verifyRanges(Sec->Chunks)) {
RangesOk = false;
break;
}
NumChunks += Sec->Chunks.size();
}
if (RangesOk) {
if (Pass > 0)
log("Added " + Twine(NumChunks - OrigNumChunks) + " thunks with " +
"margin " + Twine(Margin) + " in " + Twine(Pass) + " passes");
return;
}

if (Pass >= 10)
fatal("adding thunks hasn't converged after " + Twine(Pass) + " passes");

if (Pass > 0) {
// If the previous pass didn't work out, reset everything back to the
// original conditions before retrying with a wider margin. This should
// ideally never happen under real circumstances.
for (OutputSection *Sec : OutputSections) {
Sec->Chunks = Sec->OrigChunks;
for (Chunk *C : Sec->Chunks)
C->resetRelocTargets();
}
Margin *= 2;
}

// Try adding thunks everywhere where it is needed, with a margin
// to avoid things going out of range due to the added thunks.
bool AddressesChanged = false;
for (OutputSection *Sec : OutputSections)
AddressesChanged |= createThunks(Sec->Chunks, Margin);
// If the verification above thought we needed thunks, we should have
// added some.
assert(AddressesChanged);

// Recalculate the layout for the whole image (and verify the ranges at
// the start of the next round).
assignAddresses();

Pass++;
}
}

// The main function of the writer.
void Writer::run() {
ScopedTimer T1(CodeLayoutTimer);
Expand All @@ -309,7 +498,8 @@ void Writer::run() {
appendImportThunks();
createExportTable();
mergeSections();
assignAddresses();
readRelocTargets();
finalizeAddresses();
removeEmptySections();
setSectionPermissions();
createSymbolAndStringTable();
Expand Down Expand Up @@ -796,9 +986,9 @@ void Writer::createSymbolAndStringTable() {
}

void Writer::mergeSections() {
if (!PdataSec->getChunks().empty()) {
FirstPdata = PdataSec->getChunks().front();
LastPdata = PdataSec->getChunks().back();
if (!PdataSec->Chunks.empty()) {
FirstPdata = PdataSec->Chunks.front();
LastPdata = PdataSec->Chunks.back();
}

for (auto &P : Config->Merge) {
Expand Down Expand Up @@ -826,6 +1016,13 @@ void Writer::mergeSections() {
}
}

// Visits all sections to initialize their relocation targets.
void Writer::readRelocTargets() {
for (OutputSection *Sec : OutputSections)
for_each(parallel::par, Sec->Chunks.begin(), Sec->Chunks.end(),
[&](Chunk *C) { C->readRelocTargets(); });
}

// Visits all sections to assign incremental, non-overlapping RVAs and
// file offsets.
void Writer::assignAddresses() {
Expand All @@ -843,7 +1040,7 @@ void Writer::assignAddresses() {
addBaserels();
uint64_t RawSize = 0, VirtualSize = 0;
Sec->Header.VirtualAddress = RVA;
for (Chunk *C : Sec->getChunks()) {
for (Chunk *C : Sec->Chunks) {
VirtualSize = alignTo(VirtualSize, C->Alignment);
C->setRVA(RVA + VirtualSize);
C->OutputSectionOff = VirtualSize;
Expand Down Expand Up @@ -1315,7 +1512,7 @@ void Writer::writeSections() {
// ADD instructions).
if (Sec->Header.Characteristics & IMAGE_SCN_CNT_CODE)
memset(SecBuf, 0xCC, Sec->getRawSize());
for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(),
for_each(parallel::par, Sec->Chunks.begin(), Sec->Chunks.end(),
[&](Chunk *C) { C->writeTo(SecBuf); });
}
}
Expand Down Expand Up @@ -1399,12 +1596,13 @@ uint32_t Writer::getSizeOfInitializedData() {
void Writer::addBaserels() {
if (!Config->Relocatable)
return;
RelocSec->Chunks.clear();
std::vector<Baserel> V;
for (OutputSection *Sec : OutputSections) {
if (Sec->Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
continue;
// Collect all locations for base relocations.
for (Chunk *C : Sec->getChunks())
for (Chunk *C : Sec->Chunks)
C->getBaserels(&V);
// Add the addresses to .reloc section.
if (!V.empty())
Expand Down
5 changes: 3 additions & 2 deletions lld/COFF/Writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ class OutputSection {
void addChunk(Chunk *C);
void insertChunkAtStart(Chunk *C);
void merge(OutputSection *Other);
ArrayRef<Chunk *> getChunks() { return Chunks; }
void addPermissions(uint32_t C);
void setPermissions(uint32_t C);
uint64_t getRVA() { return Header.VirtualAddress; }
Expand All @@ -63,9 +62,11 @@ class OutputSection {
llvm::StringRef Name;
llvm::object::coff_section Header = {};

std::vector<Chunk *> Chunks;
std::vector<Chunk *> OrigChunks;

private:
uint32_t StringTableOff = 0;
std::vector<Chunk *> Chunks;
};

}
Expand Down
2 changes: 0 additions & 2 deletions lld/test/COFF/Inputs/far-arm-thumb-abs.s

This file was deleted.

2 changes: 0 additions & 2 deletions lld/test/COFF/Inputs/far-arm-thumb-abs20.s

This file was deleted.

10 changes: 0 additions & 10 deletions lld/test/COFF/arm-thumb-branch-error.s

This file was deleted.

12 changes: 9 additions & 3 deletions lld/test/COFF/arm-thumb-branch20-error.s
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
// REQUIRES: arm
// RUN: llvm-mc -filetype=obj -triple=thumbv7a-windows-gnu %s -o %t.obj
// RUN: llvm-mc -filetype=obj -triple=thumbv7a-windows-gnu %S/Inputs/far-arm-thumb-abs20.s -o %t.far.obj
// RUN: not lld-link -entry:_start -subsystem:console %t.obj %t.far.obj -out:%t.exe 2>&1 | FileCheck %s
// RUN: not lld-link -entry:_start -subsystem:console %t.obj -out:%t.exe 2>&1 | FileCheck %s
.syntax unified
.globl _start
_start:
bne too_far20
.space 0x100000
.section .text$a, "xr"
too_far20:
bx lr

// CHECK: relocation out of range
// When trying to add a thunk at the end of the section, the thunk itself
// will be too far away, so this won't converge.

// CHECK: adding thunks hasn't converged
70 changes: 70 additions & 0 deletions lld/test/COFF/arm-thumb-thunks-multipass.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// REQUIRES: arm
// RUN: llvm-mc -filetype=obj -triple=thumbv7-windows %s -o %t.obj
// RUN: lld-link -entry:main -subsystem:console %t.obj -out:%t.exe -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
// RUN: llvm-objdump -d %t.exe -start-address=0x403000 -stop-address=0x403008 | FileCheck -check-prefix=FUNC01 %s
// RUN: llvm-objdump -d %t.exe -start-address=0x404ffa -stop-address=0x405012 | FileCheck -check-prefix=FUNC01-THUNKS %s

// VERBOSE: Added {{.*}} thunks with margin 204800 in 2 passes

.syntax unified
.globl main
.text
main:
b func01
bx lr

.irp i, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, 16, 17, 18
.section .text$\i\()a, "xr"
.balign 8192
func\i:
bne far_func\i
bne func_within_margin\i
// Originally, the first section is less than 8192 bytes large, and the
// second one follows almost directly. After adding one thunk after
// the first section, the second one will move forward by 8192 bytes
// due to the alignment.
.space 8192 - 8 - 4

.section .text$\i\()b, "xr"
.balign 8192
align\i:
nop
.endr

.section .text$999, "xr"
tail:
.space 0x100000 - 100*1024 - 18*8192*2
// Initially, these symbols are within range from all the sections above,
// even when taking the initial margin into account. After adding thunks
// to all the sections above, some of these are also out of range, forcing
// running a second pass.
.irp i, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, 16, 17, 18
func_within_margin\i:
nop
.endr
.space 0x100000

// These are always out of range.
.irp i, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, 16, 17, 18
far_func\i:
nop
.endr
bx lr

// FUNC01: 403000: 41 f0 fc 87 bne.w #8184 <.text+0x3ffc>
// FUNC01: 403004: 41 f0 ff 87 bne.w #8190 <.text+0x4006>

// Check that we only have two thunks here, even if we created the first
// thunk twice (once in the first pass, then thrown away and recreated
// in the second pass).

// FUNC01-THUNKS: 404ffa: 00 00 movs r0, r0
// The instruction above is padding from the .space
// FUNC01-THUNKS: 404ffc: 47 f2 1e 0c movw r12, #28702
// FUNC01-THUNKS: 405000: c0 f2 20 0c movt r12, #32
// FUNC01-THUNKS: 405004: e7 44 add pc, r12
// FUNC01-THUNKS: 405006: 46 f6 f0 7c movw r12, #28656
// FUNC01-THUNKS: 40500a: c0 f2 10 0c movt r12, #16
// FUNC01-THUNKS: 40500e: e7 44 add pc, r12
// The instruction below is padding from the .balign
// FUNC01-THUNKS: 405010: cc cc ldm r4!, {r2, r3, r6, r7}
75 changes: 75 additions & 0 deletions lld/test/COFF/arm-thumb-thunks.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// REQUIRES: arm
// RUN: llvm-mc -filetype=obj -triple=thumbv7-windows %s -o %t.obj
// RUN: lld-link -entry:main -subsystem:console %t.obj -out:%t.exe -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
// RUN: llvm-objdump -d %t.exe -start-address=0x401000 -stop-address=0x401022 | FileCheck -check-prefix=MAIN %s
// RUN: llvm-objdump -d %t.exe -start-address=0x501022 -stop-address=0x501032 | FileCheck -check-prefix=FUNC1 %s
// RUN: llvm-objdump -d %t.exe -start-address=0x601032 | FileCheck -check-prefix=FUNC2 %s

// VERBOSE: Added 3 thunks with margin {{.*}} in 1 passes

.syntax unified
.globl main
.globl func1
.text
main:
bne func1
bne func2
// This should reuse the same thunk as func1 above
bne func1_alias
bx lr
.section .text$a, "xr"
.space 0x100000
.section .text$b, "xr"
func1:
func1_alias:
// This shouldn't reuse the func2 thunk from above, since it is out
// of range.
bne func2
bx lr
.section .text$c, "xr"
.space 0x100000
.section .text$d, "xr"
func2:
// Test using string tail merging. This is irrelevant to the thunking itself,
// but running multiple passes of assignAddresses() calls finalizeAddresses()
// multiple times; check that MergeChunk handles this correctly.
movw r0, :lower16:"??_C@string1"
movt r0, :upper16:"??_C@string1"
movw r1, :lower16:"??_C@string2"
movt r1, :upper16:"??_C@string2"
bx lr

.section .rdata,"dr",discard,"??_C@string1"
.globl "??_C@string1"
"??_C@string1":
.asciz "foobar"
.section .rdata,"dr",discard,"??_C@string2"
.globl "??_C@string2"
"??_C@string2":
.asciz "bar"

// MAIN: 401000: 40 f0 05 80 bne.w #10 <.text+0xe>
// MAIN: 401004: 40 f0 08 80 bne.w #16 <.text+0x18>
// MAIN: 401008: 40 f0 01 80 bne.w #2 <.text+0xe>
// MAIN: 40100c: 70 47 bx lr
// func1 thunk
// MAIN: 40100e: 40 f2 08 0c movw r12, #8
// MAIN: 401012: c0 f2 10 0c movt r12, #16
// MAIN: 401016: e7 44 add pc, r12
// func2 thunk
// MAIN: 401018: 40 f2 0e 0c movw r12, #14
// MAIN: 40101c: c0 f2 20 0c movt r12, #32
// MAIN: 401020: e7 44 add pc, r12

// FUNC1: 501022: 40 f0 01 80 bne.w #2 <.text+0x100028>
// FUNC1: 501026: 70 47 bx lr
// func2 thunk
// FUNC1: 501028: 4f f6 fe 7c movw r12, #65534
// FUNC1: 50102c: c0 f2 0f 0c movt r12, #15
// FUNC1: 501030: e7 44 add pc, r12

// FUNC2: 601032: 42 f2 00 00 movw r0, #8192
// FUNC2: 601036: c0 f2 60 00 movt r0, #96
// FUNC2: 60103a: 42 f2 03 01 movw r1, #8195
// FUNC2: 60103e: c0 f2 60 01 movt r1, #96
// FUNC2: 601042: 70 47 bx lr