-
-
Notifications
You must be signed in to change notification settings - Fork 457
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Linking clang-13 with debug info takes ~3.6 seconds on a simulated 10-core/20-threads machine. mold spends most of its time (~2.3 seconds) merging string literals in .debug_str. Input .debug_str sections contain 70 million string literals in total, which is reduced to 2 million after de-duplication. The input object files contain a lot of duplicates. clang-13 with debug info is enormous -- it is ~3.1 GiB after linking. It looks like TBB's concurrent hashmap doesn't scale well with the input. In this patch, I implemented our own concurrent hashmap. The hashmap is extremely lightweight and support only the key-value insertion operation. It doesn't even support rehashing. It aborts once the hash table becomes full. In order to know the correct size for the hashmap before inserting strings into it, I also implemented HyperLogLog algorithm in this patch. HyperLogLog is an algorithm that gives a fairly accurate estimate on the number of unique elements. With this patch, mold can link clang-13 in ~2.5 seconds, which is ~30% faster than before. #73
- Loading branch information
Showing
8 changed files
with
288 additions
and
111 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#include "mold.h" | ||
|
||
static const char *locked = (char *)-1; | ||
|
||
static constexpr i64 MIN_NBUCKETS = 256; | ||
|
||
template <typename T> | ||
ConcurrentMap<T>::ConcurrentMap() {} | ||
|
||
template <typename T> | ||
ConcurrentMap<T>::ConcurrentMap(i64 nbuckets) { | ||
resize(nbuckets); | ||
} | ||
|
||
template <typename T> | ||
void ConcurrentMap<T>::resize(i64 nbuckets) { | ||
this->~ConcurrentMap(); | ||
|
||
nbuckets = std::max<i64>(MIN_NBUCKETS, next_power_of_two(nbuckets)); | ||
|
||
this->nbuckets = nbuckets; | ||
keys = (std::atomic<const char *> *)calloc(nbuckets, sizeof(keys[0])); | ||
sizes = (u32 *)calloc(nbuckets, sizeof(sizes[0])); | ||
values = (T *)calloc(nbuckets, sizeof(values[0])); | ||
} | ||
|
||
template <typename T> | ||
ConcurrentMap<T>::~ConcurrentMap() { | ||
if (keys) { | ||
free((void *)keys); | ||
free((void *)sizes); | ||
free((void *)values); | ||
} | ||
} | ||
|
||
template <typename T> | ||
std::pair<T *, bool> | ||
ConcurrentMap<T>::insert(std::string_view key, u64 hash, const T &val) { | ||
if (!keys) | ||
return {nullptr, false}; | ||
|
||
ASSERT(__builtin_popcount(nbuckets) == 1); | ||
i64 idx = hash & (nbuckets - 1); | ||
i64 nretry = 0; | ||
|
||
while (nretry < MIN_NBUCKETS) { | ||
const char *ptr = keys[idx]; | ||
if (ptr == locked) { | ||
#ifdef __x86_64__ | ||
asm volatile("pause" ::: "memory"); | ||
#endif | ||
continue; | ||
} | ||
|
||
if (ptr == nullptr) { | ||
if (!keys[idx].compare_exchange_strong(ptr, locked)) | ||
continue; | ||
new (values + idx) T(val); | ||
sizes[idx] = key.size(); | ||
keys[idx] = key.data(); | ||
return {values + idx, true}; | ||
} | ||
|
||
if (key.size() == sizes[idx] && memcmp(ptr, key.data(), sizes[idx]) == 0) | ||
return {values + idx, false}; | ||
|
||
idx = (idx + 1) & (nbuckets - 1); | ||
nretry++; | ||
} | ||
|
||
ASSERT(false && "ConcurrentMap is full"); | ||
return {nullptr, false}; | ||
} | ||
|
||
#define INSTANTIATE(E) \ | ||
template class ConcurrentMap<SectionFragment<E>>; | ||
|
||
INSTANTIATE(X86_64); | ||
INSTANTIATE(I386); | ||
INSTANTIATE(AARCH64); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// This file implements HyperLogLog algorithm, which estimates | ||
// the number of unique items in a given multiset. | ||
// | ||
// For more info, read | ||
// https://engineering.fb.com/2018/12/13/data-infrastructure/hyperloglog | ||
|
||
#include "mold.h" | ||
|
||
#include <cmath> | ||
|
||
i64 HyperLogLog::get_cardinality() const { | ||
double z = 0; | ||
for (i64 val : buckets) | ||
z += pow(2, -val); | ||
return ALPHA * NBUCKETS * NBUCKETS / z; | ||
} | ||
|
||
void HyperLogLog::merge(const HyperLogLog &other) { | ||
for (i64 i = 0; i < NBUCKETS; i++) | ||
merge_one(i, other.buckets[i]); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.