Skip to content

Commit

Permalink
[lld-macho] Implement ICF
Browse files Browse the repository at this point in the history
ICF = Identical C(ode|OMDAT) Folding

This is the LLD ELF/COFF algorithm, adapted for MachO. So far, only `-icf all` is supported. In order to support `-icf safe`, we will need to port address-significance tables (`.addrsig` directives) to MachO, which will come in later diffs.

`check-{llvm,clang,lld}` have 0 regressions for `lld -icf all` vs. baseline ld64.

We only run ICF on `__TEXT,__text` for reasons explained in the block comment in `ConcatOutputSection.cpp`.

Here is the perf impact for linking `chromium_framekwork` on a Mac Pro (16-core Xeon W) for the non-ICF case vs. pre-ICF:
```
    N           Min           Max        Median           Avg        Stddev
x  20          4.27          4.44          4.34         4.349   0.043029977
+  20          4.37          4.46         4.405        4.4115   0.025188761
Difference at 95.0% confidence
        0.0625 +/- 0.0225658
        1.43711% +/- 0.518873%
        (Student's t, pooled s = 0.0352566)
```

Reviewed By: #lld-macho, int3

Differential Revision: https://reviews.llvm.org/D103292
  • Loading branch information
gkmhub committed Jun 17, 2021
1 parent 734d688 commit f27e454
Show file tree
Hide file tree
Showing 20 changed files with 849 additions and 18 deletions.
1 change: 1 addition & 0 deletions lld/MachO/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ add_lld_library(lldMachO2
DriverUtils.cpp
Dwarf.cpp
ExportTrie.cpp
ICF.cpp
InputFiles.cpp
InputSection.cpp
LTO.cpp
Expand Down
12 changes: 10 additions & 2 deletions lld/MachO/ConcatOutputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
#include "lld/Common/Memory.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/ScopedPrinter.h"

#include <algorithm>
#include "llvm/Support/TimeProfiler.h"

using namespace llvm;
using namespace llvm::MachO;
Expand Down Expand Up @@ -357,3 +356,12 @@ void ConcatOutputSection::mergeFlags(InputSection *input) {
flags |= input->flags;
flags &= pureMask;
}

void ConcatOutputSection::eraseOmittedInputSections() {
// Remove the duplicates from inputs
inputs.erase(std::remove_if(inputs.begin(), inputs.end(),
[](const ConcatInputSection *isec) -> bool {
return isec->shouldOmitFromOutput();
}),
inputs.end());
}
1 change: 1 addition & 0 deletions lld/MachO/ConcatOutputSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class ConcatOutputSection final : public OutputSection {
void finalize() override;
bool needsThunks() const;
uint64_t estimateStubsInRangeVA(size_t callIdx) const;
void eraseOmittedInputSections();

void writeTo(uint8_t *buf) const override;

Expand Down
8 changes: 8 additions & 0 deletions lld/MachO/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ enum class UndefinedSymbolTreatment {
dynamic_lookup,
};

enum class ICFLevel {
unknown,
none,
safe,
all,
};

struct SectionAlign {
llvm::StringRef segName;
llvm::StringRef sectName;
Expand Down Expand Up @@ -126,6 +133,7 @@ struct Configuration {
NamespaceKind namespaceKind = NamespaceKind::twolevel;
UndefinedSymbolTreatment undefinedSymbolTreatment =
UndefinedSymbolTreatment::error;
ICFLevel icfLevel = ICFLevel::none;
llvm::MachO::HeaderFileType outputType;
std::vector<llvm::StringRef> systemLibraryRoots;
std::vector<llvm::StringRef> librarySearchPaths;
Expand Down
25 changes: 25 additions & 0 deletions lld/MachO/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,29 @@ getUndefinedSymbolTreatment(const ArgList &args) {
return treatment;
}

static ICFLevel getICFLevel(const ArgList &args) {
bool noDeduplicate = args.hasArg(OPT_no_deduplicate);
StringRef icfLevelStr = args.getLastArgValue(OPT_icf);
auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
.Cases("none", "", ICFLevel::none)
.Case("safe", ICFLevel::safe)
.Case("all", ICFLevel::all)
.Default(ICFLevel::unknown);
if (icfLevel == ICFLevel::unknown) {
warn(Twine("unknown -icf OPTION `") + icfLevelStr +
"', defaulting to `none'");
icfLevel = ICFLevel::none;
} else if (icfLevel != ICFLevel::none && noDeduplicate) {
warn(Twine("`-icf " + icfLevelStr +
"' conflicts with -no_deduplicate, setting to `none'"));
icfLevel = ICFLevel::none;
} else if (icfLevel == ICFLevel::safe) {
warn(Twine("`-icf safe' is not yet implemented, reverting to `none'"));
icfLevel = ICFLevel::none;
}
return icfLevel;
}

static void warnIfDeprecatedOption(const Option &opt) {
if (!opt.getGroup().isValid())
return;
Expand Down Expand Up @@ -1096,6 +1119,8 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,

config->undefinedSymbolTreatment = getUndefinedSymbolTreatment(args);

config->icfLevel = getICFLevel(args);

if (config->outputType == MH_EXECUTE)
config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"),
/*file=*/nullptr,
Expand Down
257 changes: 257 additions & 0 deletions lld/MachO/ICF.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
//===- ICF.cpp ------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "ICF.h"
#include "ConcatOutputSection.h"
#include "InputSection.h"
#include "Symbols.h"
#include "llvm/Support/Parallel.h"

#include <atomic>

using namespace llvm;
using namespace lld;
using namespace lld::macho;

ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
icfInputs.assign(inputs.begin(), inputs.end());
}

// ICF = Identical Code Folding
//
// We only fold __TEXT,__text, so this is really "code" folding, and not
// "COMDAT" folding. String and scalar constant literals are deduplicated
// elsewhere.
//
// Summary of segments & sections:
//
// Since folding never occurs across output-section boundaries,
// ConcatOutputSection is the natural input for ICF.
//
// The __TEXT segment is readonly at the MMU. Some sections are already
// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
// synthetic and inherently free of duplicates (__TEXT,__stubs &
// __TEXT,__unwind_info). We only run ICF on __TEXT,__text. One might hope ICF
// could work on __TEXT,__concat, but doing so induces many test failures.
//
// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
// thus ineligible for ICF.
//
// The __DATA_CONST segment is read/write at the MMU, but is logically const to
// the application after dyld applies fixups to pointer data. Some sections are
// deduplicated elsewhere (__DATA_CONST,__cfstring), and some are synthetic
// (__DATA_CONST,__got). There are no ICF opportunities here.
//
// The __DATA segment is read/write at the MMU, and as application-writeable
// data, none of its sections are eligible for ICF.
//
// Please see the large block comment in lld/ELF/ICF.cpp for an explanation
// of the segregation algorithm.
//
// FIXME(gkm): implement keep-unique attributes
// FIXME(gkm): implement address-significance tables for MachO object files

static unsigned icfPass = 0;
static std::atomic<bool> icfRepeat{false};

// Compare everything except the relocation referents
static bool equalsConstant(const ConcatInputSection *ia,
const ConcatInputSection *ib) {
if (ia->data.size() != ib->data.size())
return false;
if (ia->data != ib->data)
return false;
if (ia->flags != ib->flags)
return false;
if (ia->relocs.size() != ib->relocs.size())
return false;
auto f = [&](const Reloc &ra, const Reloc &rb) {
if (ra.type != rb.type)
return false;
if (ra.pcrel != rb.pcrel)
return false;
if (ra.length != rb.length)
return false;
if (ra.offset != rb.offset)
return false;
if (ra.addend != rb.addend)
return false;
if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
return false; // a nice place to breakpoint
return true;
};
return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
f);
}

// Compare only the relocation referents
static bool equalsVariable(const ConcatInputSection *ia,
const ConcatInputSection *ib) {
assert(ia->relocs.size() == ib->relocs.size());
auto f = [&](const Reloc &ra, const Reloc &rb) {
if (ra.referent == rb.referent)
return true;
if (ra.referent.is<Symbol *>()) {
const auto *sa = ra.referent.get<Symbol *>();
const auto *sb = rb.referent.get<Symbol *>();
if (sa->kind() != sb->kind())
return false;
if (isa<Defined>(sa)) {
const auto *da = dyn_cast<Defined>(sa);
const auto *db = dyn_cast<Defined>(sb);
if (da->value != db->value)
return false;
if (da->isAbsolute() != da->isAbsolute())
return false;
if (da->isec)
if (da->isec->icfEqClass[icfPass % 2] !=
db->isec->icfEqClass[icfPass % 2])
return false;
} else if (isa<DylibSymbol>(sa)) {
// There is one DylibSymbol per gotIndex and we already checked for
// symbol equality, thus we know that these must be different.
return false;
} else {
llvm_unreachable("equalsVariable symbol kind");
}
} else {
const auto *sa = ra.referent.get<InputSection *>();
const auto *sb = rb.referent.get<InputSection *>();
if (sa->icfEqClass[icfPass % 2] != sb->icfEqClass[icfPass % 2])
return false;
}
return true;
};
return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
f);
}

// Find the first InputSection after BEGIN whose equivalence class differs
size_t ICF::findBoundary(size_t begin, size_t end) {
uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
for (size_t i = begin + 1; i < end; ++i)
if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
return i;
return end;
}

// Invoke FUNC on subranges with matching equivalence class
void ICF::forEachClassRange(size_t begin, size_t end,
std::function<void(size_t, size_t)> func) {
while (begin < end) {
size_t mid = findBoundary(begin, end);
func(begin, mid);
begin = mid;
}
}

// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
// with matching equivalence class
void ICF::forEachClass(std::function<void(size_t, size_t)> func) {
// Only use threads when the benefits outweigh the overhead.
const size_t threadingThreshold = 1024;
if (icfInputs.size() < threadingThreshold) {
forEachClassRange(0, icfInputs.size(), func);
++icfPass;
return;
}

// Shard into non-overlapping intervals, and call FUNC in parallel. The
// sharding must be completed before any calls to FUNC are made so that FUNC
// can modify the InputSection in its shard without causing data races.
const size_t shards = 256;
size_t step = icfInputs.size() / shards;
size_t boundaries[shards + 1];
boundaries[0] = 0;
boundaries[shards] = icfInputs.size();
parallelForEachN(1, shards, [&](size_t i) {
boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
});
parallelForEachN(1, shards + 1, [&](size_t i) {
if (boundaries[i - 1] < boundaries[i]) {
forEachClassRange(boundaries[i - 1], boundaries[i], func);
}
});
++icfPass;
}

void ICF::run() {
// Into each origin-section hash, combine all reloc referent section hashes.
for (icfPass = 0; icfPass < 2; ++icfPass) {
parallelForEach(icfInputs, [&](InputSection *isec) {
uint64_t hash = isec->icfEqClass[icfPass % 2];
for (const Reloc &r : isec->relocs) {
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
if (auto *dylibSym = dyn_cast<DylibSymbol>(sym))
hash += dylibSym->stubsHelperIndex;
else if (auto *defined = dyn_cast<Defined>(sym))
hash +=
defined->value +
(defined->isec ? defined->isec->icfEqClass[icfPass % 2] : 0);
else
llvm_unreachable("foldIdenticalSections symbol kind");
}
}
// Set MSB to 1 to avoid collisions with non-hashed classes.
isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63);
});
}

llvm::stable_sort(icfInputs,
[](const InputSection *a, const InputSection *b) {
return a->icfEqClass[0] < b->icfEqClass[0];
});
forEachClass(
[&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); });

// Split equivalence groups by comparing relocations until convergence
do {
icfRepeat = false;
forEachClass([&](size_t begin, size_t end) {
segregate(begin, end, equalsVariable);
});
} while (icfRepeat);
log("ICF needed " + Twine(icfPass) + " iterations");

// Fold sections within equivalence classes
forEachClass([&](size_t begin, size_t end) {
if (end - begin < 2)
return;
ConcatInputSection *beginIsec = icfInputs[begin];
for (size_t i = begin + 1; i < end; ++i)
beginIsec->foldIdentical(icfInputs[i]);
});
}

// Split an equivalence class into smaller classes.
void ICF::segregate(
size_t begin, size_t end,
std::function<bool(const ConcatInputSection *, const ConcatInputSection *)>
equals) {
while (begin < end) {
// Divide [begin, end) into two. Let mid be the start index of the
// second group.
auto bound = std::stable_partition(icfInputs.begin() + begin + 1,
icfInputs.begin() + end,
[&](ConcatInputSection *isec) {
return equals(icfInputs[begin], isec);
});
size_t mid = bound - icfInputs.begin();

// Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
// equivalence class ID because every group ends with a unique index.
for (size_t i = begin; i < mid; ++i)
icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;

// If we created a group, we need to iterate the main loop again.
if (mid != end)
icfRepeat = true;

begin = mid;
}
}
Loading

0 comments on commit f27e454

Please sign in to comment.