Skip to content

Commit

Permalink
Parallelize string merging.
Browse files Browse the repository at this point in the history
String merging is one of the most time-consuming functions in lld.
This patch parallelize it to speed it up. On my 2-socket 20-core
40-threads Xeon E5-2680 @ 2.8 GHz machine, this patch shorten the
clang debug build link time from 7.11s to 5.16s. It's a 27%
improvement and actually pretty noticeable. In this test condition,
lld is now 4x faster than gold.

Differential Revision: https://reviews.llvm.org/D38266

llvm-svn: 314588
  • Loading branch information
rui314 committed Sep 30, 2017
1 parent 4db732a commit c97a70c
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 30 deletions.
79 changes: 64 additions & 15 deletions lld/ELF/SyntheticSections.cpp
Expand Up @@ -37,6 +37,7 @@
#include "llvm/Support/SHA1.h"
#include "llvm/Support/xxhash.h"
#include <cstdlib>
#include <thread>

using namespace llvm;
using namespace llvm::dwarf;
Expand All @@ -48,6 +49,8 @@ using namespace llvm::support::endian;
using namespace lld;
using namespace lld::elf;

const size_t MergeNoTailSection::NumShards;

uint64_t SyntheticSection::getVA() const {
if (OutputSection *Sec = getParent())
return Sec->Addr + OutSecOff;
Expand Down Expand Up @@ -2181,19 +2184,19 @@ template <class ELFT> bool VersionNeedSection<ELFT>::empty() const {
return getNeedNum() == 0;
}

MergeSyntheticSection::MergeSyntheticSection(StringRef Name, uint32_t Type,
uint64_t Flags, uint32_t Alignment)
: SyntheticSection(Flags, Type, Alignment, Name),
Builder(StringTableBuilder::RAW, Alignment) {}

void MergeSyntheticSection::addSection(MergeInputSection *MS) {
MS->Parent = this;
Sections.push_back(MS);
}

size_t MergeSyntheticSection::getSize() const { return Builder.getSize(); }
MergeTailSection::MergeTailSection(StringRef Name, uint32_t Type,
uint64_t Flags, uint32_t Alignment)
: MergeSyntheticSection(Name, Type, Flags, Alignment),
Builder(StringTableBuilder::RAW, Alignment) {}

size_t MergeTailSection::getSize() const { return Builder.getSize(); }

void MergeSyntheticSection::writeTo(uint8_t *Buf) { Builder.write(Buf); }
void MergeTailSection::writeTo(uint8_t *Buf) { Builder.write(Buf); }

void MergeTailSection::finalizeContents() {
// Add all string pieces to the string table builder to create section
Expand All @@ -2215,17 +2218,63 @@ void MergeTailSection::finalizeContents() {
Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I));
}

void MergeNoTailSection::writeTo(uint8_t *Buf) {
for (size_t I = 0; I < NumShards; ++I)
Shards[I].write(Buf + ShardOffsets[I]);
}

// This function is very hot (i.e. it can take several seconds to finish)
// because sometimes the number of inputs is in an order of magnitude of
// millions. So, we use multi-threading.
//
// For any strings S and T, we know S is not mergeable with T if S's hash
// value is different from T's. If that's the case, we can safely put S and
// T into different string builders without worrying about merge misses.
// We do it in parallel.
void MergeNoTailSection::finalizeContents() {
// Add all string pieces to the string table builder to create section
// contents. Because we are not tail-optimizing, offsets of strings are
// fixed when they are added to the builder (string table builder contains
// a hash table from strings to offsets).
for (MergeInputSection *Sec : Sections)
// Initializes string table builders.
for (size_t I = 0; I < NumShards; ++I)
Shards.emplace_back(StringTableBuilder::RAW, Alignment);

// Concurrency level. Must be a power of 2.
size_t Concurrency = 1;
if (Config->Threads)
if (int N = std::thread::hardware_concurrency())
Concurrency = std::min(PowerOf2Floor(N), NumShards);

// Add section pieces to the builders.
parallelForEachN(0, Concurrency, [&](size_t ThreadId) {
for (MergeInputSection *Sec : Sections) {
for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) {
if (!Sec->Pieces[I].Live)
continue;
CachedHashStringRef Str = Sec->getData(I);
size_t ShardId = getShardId(Str.hash());
if ((ShardId & (Concurrency - 1)) == ThreadId)
Sec->Pieces[I].OutputOff = Shards[ShardId].add(Str);
}
}
});

// Compute an in-section offset for each shard.
size_t Off = 0;
for (size_t I = 0; I < NumShards; ++I) {
Shards[I].finalizeInOrder();
if (Shards[I].getSize() > 0)
Off = alignTo(Off, Alignment);
ShardOffsets[I] = Off;
Off += Shards[I].getSize();
}
Size = Off;

// So far, section pieces have offsets from beginning of shards, but
// we want offsets from beginning of the whole section. Fix them.
parallelForEach(Sections, [&](MergeInputSection *Sec) {
for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I)
if (Sec->Pieces[I].Live)
Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I));

Builder.finalizeInOrder();
Sec->Pieces[I].OutputOff +=
ShardOffsets[getShardId(Sec->getData(I).hash())];
});
}

static MergeSyntheticSection *createMergeSynthetic(StringRef Name,
Expand Down
34 changes: 28 additions & 6 deletions lld/ELF/SyntheticSections.h
Expand Up @@ -668,24 +668,26 @@ template <class ELFT> class VersionNeedSection final : public SyntheticSection {
class MergeSyntheticSection : public SyntheticSection {
public:
void addSection(MergeInputSection *MS);
size_t getSize() const override;
void writeTo(uint8_t *Buf) override;

protected:
MergeSyntheticSection(StringRef Name, uint32_t Type, uint64_t Flags,
uint32_t Alignment);
uint32_t Alignment)
: SyntheticSection(Flags, Type, Alignment, Name) {}

std::vector<MergeInputSection *> Sections;
llvm::StringTableBuilder Builder;
};

class MergeTailSection final : public MergeSyntheticSection {
public:
MergeTailSection(StringRef Name, uint32_t Type, uint64_t Flags,
uint32_t Alignment)
: MergeSyntheticSection(Name, Type, Flags, Alignment) {}
uint32_t Alignment);

size_t getSize() const override;
void writeTo(uint8_t *Buf) override;
void finalizeContents() override;

private:
llvm::StringTableBuilder Builder;
};

class MergeNoTailSection final : public MergeSyntheticSection {
Expand All @@ -694,7 +696,27 @@ class MergeNoTailSection final : public MergeSyntheticSection {
uint32_t Alignment)
: MergeSyntheticSection(Name, Type, Flags, Alignment) {}

size_t getSize() const override { return Size; }
void writeTo(uint8_t *Buf) override;
void finalizeContents() override;

private:
// We use the most significant bits of a hash as a shard ID.
// The reason why we don't want to use the least significant bits is
// because DenseMap also uses lower bits to determine a bucket ID.
// If we use lower bits, it significantly increases the probability of
// hash collisons.
size_t getShardId(uint32_t Hash) {
return Hash >> (32 - llvm::countTrailingZeros(NumShards));
}

// Section size
size_t Size;

// String table contents
constexpr static size_t NumShards = 32;
std::vector<llvm::StringTableBuilder> Shards;
size_t ShardOffsets[NumShards];
};

// .MIPS.abiflags section.
Expand Down
3 changes: 1 addition & 2 deletions lld/test/ELF/comment-gc.s
Expand Up @@ -5,8 +5,7 @@
# RUN: llvm-objdump -s %t1 | FileCheck %s

# CHECK: Contents of section .comment:
# CHECK-NEXT: 0000 00666f6f 00626172 004c4c44 20312e30 .foo.bar.LLD 1.0
# CHECK-NEXT: 0010 00 .
# CHECK-NEXT: foo.LLD 1.0..bar

.ident "foo"

Expand Down
8 changes: 4 additions & 4 deletions lld/test/ELF/compressed-debug-input.s
Expand Up @@ -61,11 +61,11 @@
# DATA-NEXT: AddressAlignment: 1
# DATA-NEXT: EntrySize: 0
# DATA-NEXT: SectionData (
# DATA-NEXT: 0000: 73686F72 7420756E 7369676E 65642069 |short unsigned i|
# DATA-NEXT: 0010: 6E740075 6E736967 6E656420 696E7400 |nt.unsigned int.|
# DATA-NEXT: 0000: 756E7369 676E6564 20696E74 00636861 |unsigned int.cha|
# DATA-NEXT: 0010: 7200756E 7369676E 65642063 68617200 |r.unsigned char.|
# DATA-NEXT: 0020: 6C6F6E67 20756E73 69676E65 6420696E |long unsigned in|
# DATA-NEXT: 0030: 74006368 61720075 6E736967 6E656420 |t.char.unsigned |
# DATA-NEXT: 0040: 63686172 00 |char.|
# DATA-NEXT: 0030: 74007368 6F727420 756E7369 676E6564 |t.short unsigned|
# DATA-NEXT: 0040: 20696E74 00 | int.|
# DATA-NEXT: )
# DATA-NEXT: }

Expand Down
2 changes: 1 addition & 1 deletion lld/test/ELF/merge-string.s
Expand Up @@ -54,7 +54,7 @@ zed:
// NOTAIL-NEXT: AddressAlignment: 1
// NOTAIL-NEXT: EntrySize: 0
// NOTAIL-NEXT: SectionData (
// NOTAIL-NEXT: 0000: 61626300 626300 |abc.bc.|
// NOTAIL-NEXT: 0000: 62630061 626300 |bc.abc.|
// NOTAIL-NEXT: )

// NOMERGE: Name: .rodata1
Expand Down
4 changes: 2 additions & 2 deletions lld/test/ELF/string-gc.s
Expand Up @@ -14,7 +14,7 @@
// CHECK-NEXT: }
// CHECK-NEXT: Symbol {
// CHECK-NEXT: Name: s3
// CHECK-NEXT: Value: 0x200125
// CHECK-NEXT: Value: 0x200120
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local (0x0)
// CHECK-NEXT: Type: Object (0x1)
Expand All @@ -23,7 +23,7 @@
// CHECK-NEXT: }
// CHECK-NEXT: Symbol {
// CHECK-NEXT: Name: s1
// CHECK-NEXT: Value: 0x200120
// CHECK-NEXT: Value: 0x200125
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local (0x0)
// CHECK-NEXT: Type: Object (0x1)
Expand Down

0 comments on commit c97a70c

Please sign in to comment.