diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 7257ebd0fac99..c66317f5e8479 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2393,12 +2393,6 @@ static void readSymbolPartitionSection(InputSectionBase *s) { sym->partition = newPart.getNumber(); } -static Symbol *addUnusedUndefined(StringRef name, - uint8_t binding = STB_GLOBAL) { - return symtab.addSymbol( - Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0}); -} - static void markBuffersAsDontNeed(bool skipLinkedOutput) { // With --thinlto-index-only, all buffers are nearly unused from now on // (except symbol/section names used by infrequent passes). Mark input file @@ -2485,15 +2479,15 @@ static std::vector addWrappedSymbols(opt::InputArgList &args) { continue; Symbol *wrap = - addUnusedUndefined(saver().save("__wrap_" + name), sym->binding); + symtab.addUnusedUndefined(saver().save("__wrap_" + name), sym->binding); // If __real_ is referenced, pull in the symbol if it is lazy. Do this after // processing __wrap_ as that may have referenced __real_. StringRef realName = saver().save("__real_" + name); if (symtab.find(realName)) - addUnusedUndefined(name, sym->binding); + symtab.addUnusedUndefined(name, sym->binding); - Symbol *real = addUnusedUndefined(realName); + Symbol *real = symtab.addUnusedUndefined(realName); v.push_back({sym, real, wrap}); // We want to tell LTO not to inline symbols to be overwritten @@ -2723,7 +2717,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // Handle -u/--undefined before input files. If both a.a and b.so define foo, // -u foo a.a b.so will extract a.a. for (StringRef name : config->undefined) - addUnusedUndefined(name)->referenced = true; + symtab.addUnusedUndefined(name)->referenced = true; // Add all files to the symbol table. This will add almost all // symbols that we need to the symbol table. This process might @@ -2748,13 +2742,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { config->hasDynSymTab = !ctx.sharedFiles.empty() || config->isPic || config->exportDynamic; - // Some symbols (such as __ehdr_start) are defined lazily only when there - // are undefined symbols for them, so we add these to trigger that logic. - for (StringRef name : script->referencedSymbols) { - Symbol *sym = addUnusedUndefined(name); - sym->isUsedInRegularObj = true; - sym->referenced = true; - } + script->addScriptReferencedSymbolsToSymTable(); // Prevent LTO from removing any definition referenced by -u. for (StringRef name : config->undefined) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 3af09a32b6511..f815b3ac6feed 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -198,15 +198,7 @@ static bool shouldDefineSym(SymbolAssignment *cmd) { if (cmd->name == ".") return false; - if (!cmd->provide) - return true; - - // If a symbol was in PROVIDE(), we need to define it only - // when it is a referenced undefined symbol. - Symbol *b = symtab.find(cmd->name); - if (b && !b->isDefined() && !b->isCommon()) - return true; - return false; + return !cmd->provide || LinkerScript::shouldAddProvideSym(cmd->name); } // Called by processSymbolAssignments() to assign definitions to @@ -1517,3 +1509,41 @@ void LinkerScript::checkFinalScriptConditions() const { checkMemoryRegion(lmaRegion, sec, sec->getLMA()); } } + +void LinkerScript::addScriptReferencedSymbolsToSymTable() { + // Some symbols (such as __ehdr_start) are defined lazily only when there + // are undefined symbols for them, so we add these to trigger that logic. + auto reference = [](StringRef name) { + Symbol *sym = symtab.addUnusedUndefined(name); + sym->isUsedInRegularObj = true; + sym->referenced = true; + }; + for (StringRef name : referencedSymbols) + reference(name); + + // Keeps track of references from which PROVIDE symbols have been added to the + // symbol table. + DenseSet added; + SmallVector *, 0> symRefsVec; + for (const auto &[name, symRefs] : provideMap) + if (LinkerScript::shouldAddProvideSym(name) && added.insert(name).second) + symRefsVec.push_back(&symRefs); + while (symRefsVec.size()) { + for (StringRef name : *symRefsVec.pop_back_val()) { + reference(name); + // Prevent the symbol from being discarded by --gc-sections. + script->referencedSymbols.push_back(name); + auto it = script->provideMap.find(name); + if (it != script->provideMap.end() && + LinkerScript::shouldAddProvideSym(name) && + added.insert(name).second) { + symRefsVec.push_back(&it->second); + } + } + } +} + +bool LinkerScript::shouldAddProvideSym(StringRef symName) { + Symbol *sym = symtab.find(symName); + return sym && !sym->isDefined() && !sym->isCommon(); +} diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 18eaf58b785e3..fa7c6eb9c0d8f 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" #include @@ -348,6 +349,18 @@ class LinkerScript final { // Check backward location counter assignment and memory region/LMA overflows. void checkFinalScriptConditions() const; + // Add symbols that are referenced in the linker script to the symbol table. + // Symbols referenced in a PROVIDE command are only added to the symbol table + // if the PROVIDE command actually provides the symbol. + // It also adds the symbols referenced by the used PROVIDE symbols to the + // linker script referenced symbols list. + void addScriptReferencedSymbolsToSymTable(); + + // Returns true if the PROVIDE symbol should be added to the link. + // A PROVIDE symbol is added to the link only if it satisfies an + // undefined reference. + static bool shouldAddProvideSym(StringRef symName); + // SECTIONS command list. SmallVector sectionCommands; @@ -379,6 +392,14 @@ class LinkerScript final { // Sections that will be warned/errored by --orphan-handling. SmallVector orphanSections; + + // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE + // expression. For example, if the PROVIDE command is: + // + // PROVIDE(v = a + b + c); + // + // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c'] + llvm::MapVector> provideMap; }; LLVM_LIBRARY_VISIBILITY extern std::unique_ptr script; diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 3bb1de99480f3..f90ce6fa74075 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/TimeProfiler.h" #include #include +#include #include using namespace llvm; @@ -138,6 +139,10 @@ class ScriptParser final : ScriptLexer { // A set to detect an INCLUDE() cycle. StringSet<> seen; + + // If we are currently parsing a PROVIDE|PROVIDE_HIDDEN command, + // then this member is set to the PROVIDE symbol name. + std::optional activeProvideSym; }; } // namespace @@ -1055,6 +1060,9 @@ SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) { ; return nullptr; } + llvm::SaveAndRestore saveActiveProvideSym(activeProvideSym); + if (provide) + activeProvideSym = name; SymbolAssignment *cmd = readSymbolAssignment(name); cmd->provide = provide; cmd->hidden = hidden; @@ -1570,7 +1578,10 @@ Expr ScriptParser::readPrimary() { tok = unquote(tok); else if (!isValidSymbolName(tok)) setError("malformed number: " + tok); - script->referencedSymbols.push_back(tok); + if (activeProvideSym) + script->provideMap[*activeProvideSym].push_back(tok); + else + script->referencedSymbols.push_back(tok); return [=] { return script->getSymbolValue(tok, location); }; } diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index b3d97e4de7790..258a78ab40bb5 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -333,3 +333,7 @@ void SymbolTable::scanVersionScript() { // --dynamic-list. handleDynamicList(); } + +Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) { + return addSymbol(Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0}); +} diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index 37e31d2372963..269f7f284bc73 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -57,6 +57,9 @@ class SymbolTable { void handleDynamicList(); + Symbol *addUnusedUndefined(StringRef name, + uint8_t binding = llvm::ELF::STB_GLOBAL); + // Set of .so files to not link the same shared object file more than once. llvm::DenseMap soNames; diff --git a/lld/test/ELF/gc-sections-with-provide.s b/lld/test/ELF/gc-sections-with-provide.s new file mode 100644 index 0000000000000..3e5b1b1efe6ca --- /dev/null +++ b/lld/test/ELF/gc-sections-with-provide.s @@ -0,0 +1,60 @@ +# REQUIRES: x86 + +# This test verifies that garbage-collection is correctly garbage collecting +# unused sections when the symbol of the unused section is only referred by +# an unused PROVIDE symbol. + +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o +# RUN: ld.lld -o a_nogc a.o -T script.t +# RUN: llvm-nm a_nogc | FileCheck -check-prefix=NOGC %s +# RUN: ld.lld -o a_gc a.o --gc-sections --print-gc-sections -T script.t | FileCheck --check-prefix=GC_LINK %s +# RUN: llvm-nm a_gc | FileCheck -check-prefix=GC %s + +NOGC-NOT: another_unused +NOGC: another_used +NOGC: bar +NOGC: baz +NOGC: baz_ref +NOGC: foo +NOGC-NOT: unused +NOGC: used + +GC_LINK: removing unused section a.o:(.text.bar) + +GC-NOT: another_unused +GC: another_used +GC-NOT: bar +GC: baz +GC: baz_ref +GC: foo +GC-NOT: unused +GC: used + +#--- a.s +.global _start +_start: + call foo + call used + +.section .text.foo,"ax",@progbits +foo: + nop + +.section .text.bar,"ax",@progbits +.global bar +bar: + nop + +.section .text.baz,"ax",@progbits +.global baz +baz: + nop + + +#--- script.t +PROVIDE(unused = bar + used); +PROVIDE(used = another_used); +PROVIDE(baz_ref = baz); +PROVIDE(another_used = baz_ref); +PROVIDE(another_unused = unused + bar + 0x1); diff --git a/lld/test/ELF/linkerscript/symbolreferenced.s b/lld/test/ELF/linkerscript/symbolreferenced.s index ba7a7721ea969..6f583d20e2764 100644 --- a/lld/test/ELF/linkerscript/symbolreferenced.s +++ b/lld/test/ELF/linkerscript/symbolreferenced.s @@ -21,11 +21,31 @@ # RUN: ld.lld -o chain -T chain.t a.o # RUN: llvm-nm chain | FileCheck %s -# CHECK: 0000000000001000 a f1 -# CHECK-NEXT: 0000000000001000 A f2 -# CHECK-NEXT: 0000000000001000 a g1 -# CHECK-NEXT: 0000000000001000 A g2 -# CHECK-NEXT: 0000000000001000 A newsym +# CHECK-NOT: another_unused +# CHECK: 0000000000007000 a f1 +# CHECK-NEXT: 0000000000007000 A f2 +# CHECK-NEXT: 0000000000007000 A f3 +# CHECK-NEXT: 0000000000007000 A f4 +# CHECK-NEXT: 0000000000006000 A f5 +# CHECK-NEXT: 0000000000003000 A f6 +# CHECK-NEXT: 0000000000001000 A f7 +# CHECK-NOT: g1 +# CHECK-NOT: g2 +# CHECK-NEXT: 0000000000007500 A newsym +# CHECK: 0000000000002000 A u +# CHECK-NOT: unused +# CHECK-NEXT: 0000000000002000 A v +# CHECK-NEXT: 0000000000002000 A w + + +# RUN: ld.lld -o chain_with_cycle -T chain_with_cycle.t a.o +# RUN: llvm-nm chain_with_cycle | FileCheck %s --check-prefix=CHAIN_WITH_CYCLE + +# CHAIN_WITH_CYCLE: 000 A f1 +# CHAIN_WITH_CYCLE: 000 A f2 +# CHAIN_WITH_CYCLE: 000 A f3 +# CHAIN_WITH_CYCLE: 000 A f4 +# CHAIN_WITH_CYCLE: 000 A newsym # RUN: not ld.lld -T chain2.t a.o 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: # ERR-COUNT-3: error: chain2.t:1: symbol not found: undef @@ -40,13 +60,30 @@ patatino: movl newsym, %eax #--- chain.t -PROVIDE(f2 = 0x1000); +PROVIDE(f7 = 0x1000); +PROVIDE(f5 = f6 + 0x3000); +PROVIDE(f6 = f7 + 0x2000); +PROVIDE(f4 = f5 + 0x1000); +PROVIDE(f3 = f4); +PROVIDE(f2 = f3); PROVIDE_HIDDEN(f1 = f2); -PROVIDE(newsym = f1); +PROVIDE(newsym = f1 + 0x500); + +u = v; +PROVIDE(w = 0x2000); +PROVIDE(v = w); PROVIDE(g2 = 0x1000); PROVIDE_HIDDEN(g1 = g2); PROVIDE(unused = g1); +PROVIDE_HIDDEN(another_unused = g1); + +#--- chain_with_cycle.t +PROVIDE(f1 = f2 + f3); +PROVIDE(f2 = f3 + f4); +PROVIDE(f3 = f4); +PROVIDE(f4 = f1); +PROVIDE(newsym = f1); #--- chain2.t PROVIDE(f2 = undef);