Skip to content

Commit

Permalink
Optimize
Browse files Browse the repository at this point in the history
  • Loading branch information
rui314 committed Apr 25, 2024
1 parent 002d619 commit d714301
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 15 deletions.
30 changes: 22 additions & 8 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,12 +535,22 @@ inline bool remove_prefix(std::string_view &s, std::string_view prefix) {
template <typename T>
class ConcurrentMap {
public:
ConcurrentMap() {}
ConcurrentMap() = default;

ConcurrentMap(i64 nbuckets) {
resize(nbuckets);
}

~ConcurrentMap() {
if (entries) {
#ifdef _WIN32
_aligned_free(entries);
#else
munmap(entries, sizeof(Entry) * nbuckets);
#endif
}
}

// In order to avoid unnecessary cache-line false sharing, we want
// to make this object to be aligned to a reasonably large
// power-of-two address.
Expand All @@ -551,14 +561,19 @@ class ConcurrentMap {
};

void resize(i64 nbuckets) {
assert(!entries);
this->nbuckets = std::max<i64>(MIN_NBUCKETS, bit_ceil(nbuckets));
i64 bufsize = sizeof(Entry) * this->nbuckets;

// Even though std::aligned_alloc is defined in C++17, MSVC doesn't
// seem to provide that function. C11's aligned_alloc may not always be
// available. Therefore, we'll align the buffer ourselves.
entries_buf.clear();
entries_buf.resize(sizeof(Entry) * this->nbuckets + alignof(Entry) - 1);
entries = (Entry *)align_to((uintptr_t)&entries_buf[0], alignof(Entry));
// Allocate a zero-initialized buffer. We use mmap() if available
// because it's faster than malloc() and memset().
#ifdef _WIN32
entries = (Entry *)_aligned_malloc(bufsize, alignof(Entry));
memset((void *)entries, 0, bufsize);
#else
entries = (Entry *)mmap(nullptr, bufsize, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#endif
}

std::pair<T *, bool> insert(std::string_view key, u64 hash, const T &val) {
Expand Down Expand Up @@ -669,7 +684,6 @@ class ConcurrentMap {
static constexpr i64 NUM_SHARDS = 16;
static constexpr i64 MAX_RETRY = 128;

std::vector<u8> entries_buf;
Entry *entries = nullptr;
i64 nbuckets = 0;

Expand Down
3 changes: 1 addition & 2 deletions elf/mold.h
Original file line number Diff line number Diff line change
Expand Up @@ -798,14 +798,13 @@ class MergedSection : public Chunk<E> {
void write_to(Context<E> &ctx, u8 *buf) override;
void print_stats(Context<E> &ctx);

ConcurrentMap<SectionFragment<E>> map;
HyperLogLog estimator;

private:
MergedSection(std::string_view name, i64 flags, i64 type, i64 entsize);

ConcurrentMap<SectionFragment<E>> map;
std::vector<i64> shard_offsets;
std::once_flag once_flag;
};

template <typename E>
Expand Down
5 changes: 0 additions & 5 deletions elf/output-chunks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1933,11 +1933,6 @@ template <typename E>
SectionFragment<E> *
MergedSection<E>::insert(Context<E> &ctx, std::string_view data, u64 hash,
i64 p2align) {
std::call_once(once_flag, [&] {
// We aim 2/3 occupation ratio
map.resize(estimator.get_cardinality() * 3 / 2);
});

// Even if GC is enabled, we garbage-collect only memory-mapped strings.
// Non-memory-allocated strings are typically identifiers used by debug info.
// To remove such strings, use the `strip` command.
Expand Down
7 changes: 7 additions & 0 deletions elf/passes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,10 @@ template <typename E>
void resolve_section_pieces(Context<E> &ctx) {
Timer t(ctx, "resolve_section_pieces");

// We aim 2/3 occupation ratio
for (std::unique_ptr<MergedSection<E>> &sec : ctx.merged_sections)
sec->map.resize(sec->estimator.get_cardinality() * 3 / 2);

tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
file->resolve_section_pieces(ctx);
});
Expand Down Expand Up @@ -441,6 +445,9 @@ void add_comment_string(Context<E> &ctx, std::string str) {
MergedSection<E>::get_instance(ctx, ".comment", SHT_PROGBITS,
SHF_MERGE | SHF_STRINGS, 1, 1);

if (sec->map.nbuckets == 0)
sec->map.resize(4096);

std::string_view buf = save_string(ctx, str);
std::string_view data(buf.data(), buf.size() + 1);
sec->insert(ctx, data, hash_string(data), 0);
Expand Down

0 comments on commit d714301

Please sign in to comment.