diff --git a/Makefile b/Makefile
index c219ca3f05..1b369438c0 100644
--- a/Makefile
+++ b/Makefile
@@ -8,11 +8,12 @@ CPPFLAGS = -g -Imimalloc/include -pthread -std=c++20 \
            -DGIT_HASH=\"$(GIT_HASH)\" \
 	   $(EXTRA_CPPFLAGS)
 LDFLAGS += $(EXTRA_LDFLAGS) -rdynamic
-LIBS = -Wl,-as-needed -lcrypto -pthread -lz -lxxhash -ldl
+LIBS = -Wl,-as-needed -lcrypto -pthread -lz -lxxhash -ldl -lm
 OBJS = main.o object_file.o input_sections.o output_chunks.o \
        mapfile.o perf.o linker_script.o archive_file.o output_file.o \
        subprocess.o gc_sections.o icf.o symbols.o cmdline.o filepath.o \
        passes.o tar.o compress.o memory_mapped_file.o relocatable.o \
+       concurrent_map.o hyperloglog.o \
        arch_x86_64.o arch_i386.o arch_aarch64.o
 
 PREFIX ?= /usr
diff --git a/concurrent_map.cc b/concurrent_map.cc
new file mode 100644
index 0000000000..ee59270026
--- /dev/null
+++ b/concurrent_map.cc
@@ -0,0 +1,80 @@
+#include "mold.h"
+
+static const char *locked = (char *)-1;
+
+static constexpr i64 MIN_NBUCKETS = 256;
+
+template <typename T>
+ConcurrentMap<T>::ConcurrentMap() {}
+
+template <typename T>
+ConcurrentMap<T>::ConcurrentMap(i64 nbuckets) {
+  resize(nbuckets);
+}
+
+template <typename T>
+void ConcurrentMap<T>::resize(i64 nbuckets) {
+  this->~ConcurrentMap();
+
+  nbuckets = std::max<i64>(MIN_NBUCKETS, next_power_of_two(nbuckets));
+
+  this->nbuckets = nbuckets;
+  keys = (std::atomic<const char *> *)calloc(nbuckets, sizeof(keys[0]));
+  sizes = (u32 *)calloc(nbuckets, sizeof(sizes[0]));
+  values = (T *)calloc(nbuckets, sizeof(values[0]));
+}
+
+template <typename T>
+ConcurrentMap<T>::~ConcurrentMap() {
+  if (keys) {
+    free((void *)keys);
+    free((void *)sizes);
+    free((void *)values);
+  }
+}
+
+template <typename T>
+std::pair<T *, bool>
+ConcurrentMap<T>::insert(std::string_view key, u64 hash, const T &val) {
+  if (!keys)
+    return {nullptr, false};
+
+  ASSERT(__builtin_popcount(nbuckets) == 1);
+  i64 idx = hash & (nbuckets - 1);
+  i64 nretry = 0;
+
+  while (nretry < MIN_NBUCKETS) {
+    const char *ptr = keys[idx];
+    if (ptr == locked) {
+#ifdef __x86_64__
+      asm volatile("pause" ::: "memory");
+#endif
+      continue;
+    }
+
+    if (ptr == nullptr) {
+      if (!keys[idx].compare_exchange_strong(ptr, locked))
+        continue;
+      new (values + idx) T(val);
+      sizes[idx] = key.size();
+      keys[idx] = key.data();
+      return {values + idx, true};
+    }
+
+    if (key.size() == sizes[idx] && memcmp(ptr, key.data(), sizes[idx]) == 0)
+      return {values + idx, false};
+
+    idx = (idx + 1) & (nbuckets - 1);
+    nretry++;
+  }
+
+  ASSERT(false && "ConcurrentMap is full");
+  return {nullptr, false};
+}
+
+#define INSTANTIATE(E)                          \
+  template class ConcurrentMap<SectionFragment<E>>;
+
+INSTANTIATE(X86_64);
+INSTANTIATE(I386);
+INSTANTIATE(AARCH64);
diff --git a/hyperloglog.cc b/hyperloglog.cc
new file mode 100644
index 0000000000..6cff1d7074
--- /dev/null
+++ b/hyperloglog.cc
@@ -0,0 +1,21 @@
+// This file implements HyperLogLog algorithm, which estimates
+// the number of unique items in a given multiset.
+//
+// For more info, read
+// https://engineering.fb.com/2018/12/13/data-infrastructure/hyperloglog
+
+#include "mold.h"
+
+#include <cmath>
+
+i64 HyperLogLog::get_cardinality() const {
+  double z = 0;
+  for (i64 val : buckets)
+    z += pow(2, -val);
+  return ALPHA * NBUCKETS * NBUCKETS / z;
+}
+
+void HyperLogLog::merge(const HyperLogLog &other) {
+  for (i64 i = 0; i < NBUCKETS; i++)
+    merge_one(i, other.buckets[i]);
+}
diff --git a/main.cc b/main.cc
index 1ffa10be8b..60e54809a5 100644
--- a/main.cc
+++ b/main.cc
@@ -415,6 +415,13 @@ int do_main(int argc, char **argv) {
   if (ctx.objs.empty())
     Fatal(ctx) << "no input files";
 
+  {
+    Timer t(ctx, "register_section_pieces");
+    tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
+      file->register_section_pieces(ctx);
+    });
+  }
+
   // Uniquify shared object files by soname
   {
     std::unordered_set<std::string_view> seen;
diff --git a/mold.h b/mold.h
index b577e78c8a..550545cadf 100644
--- a/mold.h
+++ b/mold.h
@@ -63,6 +63,8 @@ template <typename E> class ROutputShdr;
 template <typename E> class RStrtabSection;
 template <typename E> class RSymtabSection;
 
+template <typename T> class ConcurrentMap;
+
 class ZlibCompressor;
 class GzipCompressor;
 class TarFile;
@@ -302,6 +304,59 @@ class InputSection {
   void report_undef(Context<E> &ctx, Symbol<E> &sym);
 };
 
+//
+// hyperloglog.cc
+//
+
+class HyperLogLog {
+public:
+  HyperLogLog() : buckets(NBUCKETS) {}
+
+  void insert(u32 hash) {
+    merge_one(hash & (NBUCKETS - 1), __builtin_clz(hash) + 1);
+  }
+
+  void merge_one(i64 idx, u8 newval) {
+    u8 cur = buckets[idx];
+    while (cur < newval)
+      if (buckets[idx].compare_exchange_strong(cur, newval))
+        break;
+  }
+
+  i64 get_cardinality() const;
+  void merge(const HyperLogLog &other);
+
+private:
+  static constexpr i64 NBUCKETS = 2048;
+  static constexpr double ALPHA = 0.79402;
+
+  std::vector<std::atomic_uint8_t> buckets;
+};
+
+//
+// concurrent_map.cc
+//
+
+template <typename T>
+class ConcurrentMap {
+public:
+  ConcurrentMap();
+  ConcurrentMap(i64 nbuckets);
+  ~ConcurrentMap();
+
+  void resize(i64 nbuckets);
+  std::pair<T *, bool> insert(std::string_view key, u64 hash, const T &val);
+
+  bool has_key(i64 idx) {
+    return keys[idx];
+  }
+
+  i64 nbuckets = 0;
+  std::atomic<const char *> *keys = nullptr;
+  u32 *sizes = nullptr;
+  T *values = nullptr;
+};
+
 //
 // output_chunks.cc
 //
@@ -645,27 +700,22 @@ class MergedSection : public OutputChunk<E> {
   static MergedSection<E> *
   get_instance(Context<E> &ctx, std::string_view name, u64 type, u64 flags);
 
-  SectionFragment<E> *insert(std::string_view data, i64 alignment);
-  void assign_offsets();
+  SectionFragment<E> *insert(std::string_view data, u64 hash, i64 alignment);
+  void assign_offsets(Context<E> &ctx);
   void copy_buf(Context<E> &ctx) override;
   void write_to(Context<E> &ctx, u8 *buf) override;
 
-private:
-  using MapTy =
-    tbb::concurrent_unordered_map<std::string_view, SectionFragment<E>>;
+  HyperLogLog estimator;
 
+private:
   static constexpr i64 NUM_SHARDS = 64;
 
-  MergedSection(std::string_view name, u64 flags, u32 type)
-    : OutputChunk<E>(this->SYNTHETIC) {
-    this->name = name;
-    this->shdr.sh_flags = flags;
-    this->shdr.sh_type = type;
-  }
+  MergedSection(std::string_view name, u64 flags, u32 type);
 
-  MapTy maps[NUM_SHARDS];
+  ConcurrentMap<SectionFragment<E>> map;
   i64 shard_offsets[NUM_SHARDS + 1] = {};
   tbb::enumerable_thread_specific<i64> max_alignments;
+  std::once_flag once_flag;
 };
 
 template <typename E>
@@ -869,6 +919,16 @@ struct ComdatGroup {
   std::atomic_uint32_t owner = -1;
 };
 
+template <typename E>
+struct MergeableSection {
+  MergedSection<E> *parent;
+  ElfShdr<E> shdr;
+  std::vector<std::string_view> strings;
+  std::vector<u64> hashes;
+  std::vector<u32> frag_offsets;
+  std::vector<SectionFragment<E> *> fragments;
+};
+
 // InputFile is the base class of ObjectFile and SharedFile.
 template <typename E>
 class InputFile {
@@ -911,6 +971,7 @@ class ObjectFile : public InputFile<E> {
   static ObjectFile<E> *create_internal_file(Context<E> &ctx);
 
   void parse(Context<E> &ctx);
+  void register_section_pieces(Context<E> &ctx);
   void resolve_lazy_symbols(Context<E> &ctx);
   void resolve_regular_symbols(Context<E> &ctx);
   void mark_live_objects(Context<E> &ctx,
@@ -981,6 +1042,7 @@ class ObjectFile : public InputFile<E> {
   std::string_view symbol_strtab;
   const ElfShdr<E> *symtab_sec;
   std::span<u32> symtab_shndx_sec;
+  std::vector<std::unique_ptr<MergeableSection<E>>> mergeable_sections;
 };
 
 // SharedFile represents an input .so file.
diff --git a/object_file.cc b/object_file.cc
index 9fdca0cab2..b353a154c3 100644
--- a/object_file.cc
+++ b/object_file.cc
@@ -510,12 +510,6 @@ void ObjectFile<E>::initialize_symbols(Context<E> &ctx) {
   }
 }
 
-template <typename E>
-struct MergeableSection {
-  std::vector<SectionFragment<E> *> fragments;
-  std::vector<u32> frag_offsets;
-};
-
 static size_t find_null(std::string_view data, u64 entsize) {
   if (entsize == 1)
     return data.find('\0');
@@ -545,17 +539,17 @@ static size_t find_null(std::string_view data, u64 entsize) {
 //
 // We do not support mergeable sections that have relocations.
 template <typename E>
-static MergeableSection<E>
+static std::unique_ptr<MergeableSection<E>>
 split_section(Context<E> &ctx, InputSection<E> &sec) {
-  MergeableSection<E> rec;
-
-  MergedSection<E> *parent =
-    MergedSection<E>::get_instance(ctx, sec.name(), sec.shdr.sh_type,
-                                   sec.shdr.sh_flags);
+  std::unique_ptr<MergeableSection<E>> rec(new MergeableSection<E>);
+  rec->parent = MergedSection<E>::get_instance(ctx, sec.name(), sec.shdr.sh_type,
+                                               sec.shdr.sh_flags);
+  rec->shdr = sec.shdr;
 
   std::string_view data = sec.contents;
   const char *begin = data.data();
   u64 entsize = sec.shdr.sh_entsize;
+  HyperLogLog estimator;
 
   static_assert(sizeof(SectionFragment<E>::alignment) == 2);
   if (sec.shdr.sh_addralign >= UINT16_MAX)
@@ -570,9 +564,12 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
       std::string_view substr = data.substr(0, end + entsize);
       data = data.substr(end + entsize);
 
-      SectionFragment<E> *frag = parent->insert(substr, sec.shdr.sh_addralign);
-      rec.fragments.push_back(frag);
-      rec.frag_offsets.push_back(substr.data() - begin);
+      rec->strings.push_back(substr);
+      rec->frag_offsets.push_back(substr.data() - begin);
+
+      u64 hash = hash_string(substr);
+      rec->hashes.push_back(hash);
+      estimator.insert(hash);
     }
   } else {
     if (data.size() % entsize)
@@ -582,15 +579,19 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
       std::string_view substr = data.substr(0, entsize);
       data = data.substr(entsize);
 
-      SectionFragment<E> *frag = parent->insert(substr, sec.shdr.sh_addralign);
-      rec.fragments.push_back(frag);
-      rec.frag_offsets.push_back(substr.data() - begin);
+      rec->strings.push_back(substr);
+      rec->frag_offsets.push_back(substr.data() - begin);
+
+      u64 hash = hash_string(substr);
+      rec->hashes.push_back(hash);
+      estimator.insert(hash);
     }
   }
 
-  static Counter counter("string_fragments");
-  counter += rec.fragments.size();
+  rec->parent->estimator.merge(estimator);
 
+  static Counter counter("string_fragments");
+  counter += rec->fragments.size();
   return rec;
 }
 
@@ -638,7 +639,7 @@ split_section(Context<E> &ctx, InputSection<E> &sec) {
 // is attached to the symbol.
 template <typename E>
 void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
-  std::vector<MergeableSection<E>> mergeable_sections(sections.size());
+  mergeable_sections.resize(sections.size());
 
   for (i64 i = 0; i < sections.size(); i++) {
     std::unique_ptr<InputSection<E>> &isec = sections[i];
@@ -648,6 +649,15 @@ void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
       isec->is_alive = false;
     }
   }
+}
+
+template <typename E>
+void ObjectFile<E>::register_section_pieces(Context<E> &ctx) {
+  for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections)
+    if (m)
+      for (i64 i = 0; i < m->strings.size(); i++)
+        m->fragments.push_back(m->parent->insert(m->strings[i], m->hashes[i],
+                                                 m->shdr.sh_addralign));
 
   // Initialize rel_fragments
   for (std::unique_ptr<InputSection<E>> &isec : sections) {
@@ -663,13 +673,10 @@ void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
     for (i64 i = 0; i < rels.size(); i++) {
       const ElfRel<E> &rel = rels[i];
       const ElfSym<E> &esym = elf_syms[rel.r_sym];
-
-      if (esym.st_type == STT_SECTION) {
-        MergeableSection<E> &m = mergeable_sections[get_shndx(esym)];
-        if (!m.fragments.empty())
-          len++;
-      }
+      if (esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)])
+        len++;
     }
+
     if (len == 0)
       continue;
 
@@ -683,19 +690,20 @@ void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
       if (esym.st_type != STT_SECTION)
         continue;
 
-      MergeableSection<E> &m = mergeable_sections[get_shndx(esym)];
-      if (m.fragments.empty())
+      std::unique_ptr<MergeableSection<E>> &m =
+        mergeable_sections[get_shndx(esym)];
+      if (!m)
         continue;
 
       i64 offset = esym.st_value + isec->get_addend(rel);
-      std::span<u32> offsets = m.frag_offsets;
+      std::span<u32> offsets = m->frag_offsets;
 
       auto it = std::upper_bound(offsets.begin(), offsets.end(), offset);
       if (it == offsets.begin())
         Fatal(ctx) << *this << ": bad relocation at " << rel.r_sym;
       i64 idx = it - 1 - offsets.begin();
 
-      isec->rel_fragments[frag_idx++] = {m.fragments[idx], (i32)i,
+      isec->rel_fragments[frag_idx++] = {m->fragments[idx], (i32)i,
                                          (i32)(offset - offsets[idx])};
     }
 
@@ -708,11 +716,12 @@ void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
     if (esym.is_abs() || esym.is_common())
       continue;
 
-    MergeableSection<E> &m = mergeable_sections[get_shndx(esym)];
-    if (m.fragments.empty())
+    std::unique_ptr<MergeableSection<E>> &m =
+      mergeable_sections[get_shndx(esym)];
+    if (!m)
       continue;
 
-    std::span<u32> offsets = m.frag_offsets;
+    std::span<u32> offsets = m->frag_offsets;
 
     auto it = std::upper_bound(offsets.begin(), offsets.end(), esym.st_value);
     if (it == offsets.begin())
@@ -722,12 +731,13 @@ void ObjectFile<E>::initialize_mergeable_sections(Context<E> &ctx) {
     if (i < first_global)
       this->symbols[i]->value = esym.st_value - offsets[idx];
 
-    sym_fragments[i].frag = m.fragments[idx];
+    sym_fragments[i].frag = m->fragments[idx];
     sym_fragments[i].addend = esym.st_value - offsets[idx];
   }
 
-  for (MergeableSection<E> &m : mergeable_sections)
-    fragments.insert(fragments.end(), m.fragments.begin(), m.fragments.end());
+  for (std::unique_ptr<MergeableSection<E>> &m : mergeable_sections)
+    if (m)
+      fragments.insert(fragments.end(), m->fragments.begin(), m->fragments.end());
 }
 
 template <typename E>
diff --git a/output_chunks.cc b/output_chunks.cc
index b9825b99e5..11cc5f6607 100644
--- a/output_chunks.cc
+++ b/output_chunks.cc
@@ -1115,6 +1115,14 @@ void GnuHashSection<E>::copy_buf(Context<E> &ctx) {
   }
 }
 
+template <typename E>
+MergedSection<E>::MergedSection(std::string_view name, u64 flags, u32 type)
+  : OutputChunk<E>(this->SYNTHETIC) {
+  this->name = name;
+  this->shdr.sh_flags = flags;
+  this->shdr.sh_type = type;
+}
+
 template <typename E>
 MergedSection<E> *
 MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
@@ -1150,78 +1158,63 @@ MergedSection<E>::get_instance(Context<E> &ctx, std::string_view name,
 
 template <typename E>
 SectionFragment<E> *
-MergedSection<E>::insert(std::string_view data, i64 alignment) {
+MergedSection<E>::insert(std::string_view data, u64 hash, i64 alignment) {
   ASSERT(alignment < UINT16_MAX);
 
-  std::string_view suffix = data;
-  if (suffix.size() > 32)
-    suffix = suffix.substr(suffix.size() - 32);
-  i64 shard = hash_string(suffix) % NUM_SHARDS;
+  std::call_once(once_flag, [&]() {
+    // We aim 2/3 occupation ratio
+    map.resize(estimator.get_cardinality() * 3 / 2);
+  });
 
   SectionFragment<E> *frag;
-  {
-    auto [it, inserted] =
-      maps[shard].insert(std::pair(data, SectionFragment(this, data)));
-    frag = &it->second;
-  }
+  bool inserted;
+  std::tie(frag, inserted) = map.insert(data, hash, SectionFragment(this, data));
+  ASSERT(frag);
 
   for (u16 cur = frag->alignment; cur < alignment;)
     if (frag->alignment.compare_exchange_strong(cur, alignment))
       break;
-
-  max_alignments.local() = std::max(max_alignments.local(), alignment);
   return frag;
 }
 
 template <typename E>
-void MergedSection<E>::assign_offsets() {
-  std::vector<SectionFragment<E> *> fragments[NUM_SHARDS];
-  i64 sizes[NUM_SHARDS] = {};
-
-  tbb::parallel_for((i64)0, NUM_SHARDS, [&](i64 i) {
-    for (auto it = maps[i].begin(); it != maps[i].end(); it++)
-      if (SectionFragment<E> &frag = it->second; frag.is_alive)
-        fragments[i].push_back(&frag);
+void MergedSection<E>::assign_offsets(Context<E> &ctx) {
+  std::vector<SectionFragment<E> *> fragments(map.nbuckets);
+  for (i64 i = 0; i < map.nbuckets; i++)
+    fragments[i] = map.values + i;
 
-    // Sort section fragments to make an output deterministic.
-    std::sort(fragments[i].begin(), fragments[i].end(),
-              [&](SectionFragment<E> *a, SectionFragment<E> *b) {
-                if (a->alignment != b->alignment)
-                  return a->alignment > b->alignment;
-                if (a->data.size() != b->data.size())
-                  return a->data.size() < b->data.size();
-                return a->data < b->data;
-              });
-
-    i64 offset = 0;
-    for (SectionFragment<E> *frag : fragments[i]) {
-      offset = align_to(offset, frag->alignment);
-      frag->offset = offset;
-      offset += frag->data.size();
-    }
-
-    sizes[i] = offset;
+  // Sort fragments to make output deterministic.
+  tbb::parallel_sort(fragments.begin(), fragments.end(),
+                     [](SectionFragment<E> *a, SectionFragment<E> *b) {
+    if (!a->is_alive || !b->is_alive)
+      return a->is_alive && !b->is_alive;
+    if (a->alignment != b->alignment)
+      return a->alignment < b->alignment;
+    if (a->data.size() != b->data.size())
+      return a->data.size() < b->data.size();
+    return a->data < b->data;
   });
 
-  i64 alignment = 1;
-  for (i64 x : max_alignments)
-    alignment = std::max(alignment, x);
-
-  for (i64 i = 1; i < NUM_SHARDS + 1; i++)
-    shard_offsets[i] =
-      align_to(shard_offsets[i - 1] + sizes[i - 1], alignment);
-
-  tbb::parallel_for((i64)1, NUM_SHARDS, [&](i64 i) {
-    for (SectionFragment<E> *frag : fragments[i])
-      frag->offset += shard_offsets[i];
+  // Remove dead fragments.
+  auto mid = std::partition_point(fragments.begin(), fragments.end(),
+                                  [](SectionFragment<E> *frag) -> bool {
+    return frag->is_alive;
   });
+  fragments.resize(mid - fragments.begin());
 
-  this->shdr.sh_size = shard_offsets[NUM_SHARDS];
-  this->shdr.sh_addralign = alignment;
+  // Assign offsets.
+  i64 offset = 0;
+  for (SectionFragment<E> *frag : fragments) {
+    offset = align_to(offset, frag->alignment);
+    frag->offset = offset;
+    offset += frag->data.size();
+    this->shdr.sh_addralign =
+      std::max<i64>(this->shdr.sh_addralign, frag->alignment);
+  }
+  this->shdr.sh_size = offset;
 
   static Counter merged_strings("merged_strings");
-  for (std::span<SectionFragment<E> *> span : fragments)
-    merged_strings += span.size();
+  merged_strings += fragments.size();
 }
 
 template <typename E>
@@ -1231,11 +1224,12 @@ void MergedSection<E>::copy_buf(Context<E> &ctx) {
 
 template <typename E>
 void MergedSection<E>::write_to(Context<E> &ctx, u8 *buf) {
-  tbb::parallel_for((i64)0, NUM_SHARDS, [&](i64 i) {
-    memset(buf + shard_offsets[i], 0, shard_offsets[i + 1] - shard_offsets[i]);
-    for (auto it = maps[i].begin(); it != maps[i].end(); it++)
-      if (SectionFragment<E> &frag = it->second; frag.is_alive)
-        memcpy(buf + frag.offset, frag.data.data(), frag.data.size());
+  memset(buf, 0, this->shdr.sh_size);
+
+  tbb::parallel_for_each(map.values, map.values + map.nbuckets,
+                         [&](SectionFragment<E> &frag) {
+    if (frag.is_alive)
+      memcpy(buf + frag.offset, frag.data.data(), frag.data.size());
   });
 }
 
diff --git a/passes.cc b/passes.cc
index 168e1a3f29..287ebf854c 100644
--- a/passes.cc
+++ b/passes.cc
@@ -198,7 +198,8 @@ void add_comment_string(Context<E> &ctx, std::string str) {
   std::string_view buf = save_string(ctx, str);
   MergedSection<E> *sec =
     MergedSection<E>::get_instance(ctx, ".comment", SHT_PROGBITS, 0);
-  SectionFragment<E> *frag = sec->insert({buf.data(), buf.size() + 1}, 1);
+  std::string_view data(buf.data(), buf.size() + 1);
+  SectionFragment<E> *frag = sec->insert(data, hash_string(data), 1);
   frag->is_alive = true;
 }
 
@@ -221,9 +222,10 @@ void compute_merged_section_sizes(Context<E> &ctx) {
   if (char *env = getenv("MOLD_DEBUG"); env && env[0])
     add_comment_string(ctx, "mold command line: " + get_cmdline_args(ctx));
 
+  Timer t2(ctx, "MergedSection assign_offsets");
   tbb::parallel_for_each(ctx.merged_sections,
-                         [](std::unique_ptr<MergedSection<E>> &sec) {
-    sec->assign_offsets();
+                         [&](std::unique_ptr<MergedSection<E>> &sec) {
+    sec->assign_offsets(ctx);
   });
 }