99 changes: 92 additions & 7 deletions lld/wasm/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,12 @@ void LinkerDriver::addFile(StringRef path) {
case file_magic::wasm_object:
files.push_back(createObjectFile(mbref));
break;
case file_magic::unknown:
if (mbref.getBuffer().starts_with("#STUB\n")) {
files.push_back(make<StubFile>(mbref));
break;
}
[[fallthrough]];
default:
error("unknown file type: " + mbref.getBufferIdentifier());
}
Expand Down Expand Up @@ -450,6 +456,7 @@ static void readConfigs(opt::InputArgList &args) {
parseCachePruningPolicy(args.getLastArgValue(OPT_thinlto_cache_policy)),
"--thinlto-cache-policy: invalid cache policy");
config->unresolvedSymbols = getUnresolvedSymbolPolicy(args);
config->whyExtract = args.getLastArgValue(OPT_why_extract);
errorHandler().verbose = args.hasArg(OPT_verbose);
LLVM_DEBUG(errorHandler().verbose = true);

Expand Down Expand Up @@ -631,7 +638,7 @@ static const char *getReproduceOption(opt::InputArgList &args) {
}

// Force Sym to be entered in the output. Used for -u or equivalent.
static Symbol *handleUndefined(StringRef name) {
static Symbol *handleUndefined(StringRef name, const char *option) {
Symbol *sym = symtab->find(name);
if (!sym)
return nullptr;
Expand All @@ -640,8 +647,11 @@ static Symbol *handleUndefined(StringRef name) {
// eliminate it. Mark the symbol as "used" to prevent it.
sym->isUsedInRegularObj = true;

if (auto *lazySym = dyn_cast<LazySymbol>(sym))
if (auto *lazySym = dyn_cast<LazySymbol>(sym)) {
lazySym->fetch();
if (!config->whyExtract.empty())
config->whyExtractRecords.emplace_back(option, sym->getFile(), *sym);
}

return sym;
}
Expand All @@ -653,8 +663,31 @@ static void handleLibcall(StringRef name) {

if (auto *lazySym = dyn_cast<LazySymbol>(sym)) {
MemoryBufferRef mb = lazySym->getMemberBuffer();
if (isBitcode(mb))
if (isBitcode(mb)) {
if (!config->whyExtract.empty())
config->whyExtractRecords.emplace_back("<libcall>", sym->getFile(),
*sym);
lazySym->fetch();
}
}
}

static void writeWhyExtract() {
if (config->whyExtract.empty())
return;

std::error_code ec;
raw_fd_ostream os(config->whyExtract, ec, sys::fs::OF_None);
if (ec) {
error("cannot open --why-extract= file " + config->whyExtract + ": " +
ec.message());
return;
}

os << "reference\textracted\tsymbol\n";
for (auto &entry : config->whyExtractRecords) {
os << std::get<0>(entry) << '\t' << toString(std::get<1>(entry)) << '\t'
<< toString(std::get<2>(entry)) << '\n';
}
}

Expand Down Expand Up @@ -807,6 +840,53 @@ static void createOptionalSymbols() {
WasmSym::tlsBase = createOptionalGlobal("__tls_base", false);
}

static void processStubLibraries() {
log("-- processStubLibraries");
for (auto &stub_file : symtab->stubFiles) {
LLVM_DEBUG(llvm::dbgs()
<< "processing stub file: " << stub_file->getName() << "\n");
for (auto [name, deps]: stub_file->symbolDependencies) {
auto* sym = symtab->find(name);
if (!sym || !sym->isUndefined() || !sym->isUsedInRegularObj ||
sym->forceImport) {
LLVM_DEBUG(llvm::dbgs() << "stub not in needed: " << name << "\n");
continue;
}
// The first stub library to define a given symbol sets this and
// definitions in later stub libraries are ignored.
sym->forceImport = true;
if (sym->traced)
message(toString(stub_file) + ": importing " + name);
else
LLVM_DEBUG(llvm::dbgs()
<< toString(stub_file) << ": importing " << name << "\n");
for (const auto dep : deps) {
auto* needed = symtab->find(dep);
if (!needed) {
error(toString(stub_file) + ": undefined symbol: " + dep +
". Required by " + toString(*sym));
} else if (needed->isUndefined()) {
error(toString(stub_file) +
": undefined symbol: " + toString(*needed) +
". Required by " + toString(*sym));
} else {
LLVM_DEBUG(llvm::dbgs()
<< "force export: " << toString(*needed) << "\n");
needed->forceExport = true;
needed->isUsedInRegularObj = true;
if (auto *lazy = dyn_cast<LazySymbol>(needed)) {
lazy->fetch();
if (!config->whyExtract.empty())
config->whyExtractRecords.emplace_back(stub_file->getName(),
sym->getFile(), *sym);
}
}
}
}
}
log("-- done processStubLibraries");
}

// Reconstructs command line arguments so that so that you can re-run
// the same command with the same inputs. This is for --reproduce.
static std::string createResponseFile(const opt::InputArgList &args) {
Expand Down Expand Up @@ -1035,16 +1115,16 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {

// Handle the `--undefined <sym>` options.
for (auto *arg : args.filtered(OPT_undefined))
handleUndefined(arg->getValue());
handleUndefined(arg->getValue(), "<internal>");

// Handle the `--export <sym>` options
// This works like --undefined but also exports the symbol if its found
for (auto &iter : config->exportedSymbols)
handleUndefined(iter.first());
handleUndefined(iter.first(), "--export");

Symbol *entrySym = nullptr;
if (!config->relocatable && !config->entry.empty()) {
entrySym = handleUndefined(config->entry);
entrySym = handleUndefined(config->entry, "--entry");
if (entrySym && entrySym->isDefined())
entrySym->forceExport = true;
else
Expand All @@ -1061,7 +1141,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
!WasmSym::callCtors->isUsedInRegularObj &&
WasmSym::callCtors->getName() != config->entry &&
!config->exportedSymbols.count(WasmSym::callCtors->getName())) {
if (Symbol *callDtors = handleUndefined("__wasm_call_dtors")) {
if (Symbol *callDtors =
handleUndefined("__wasm_call_dtors", "<internal>")) {
if (auto *callDtorsFunc = dyn_cast<DefinedFunction>(callDtors)) {
if (callDtorsFunc->signature &&
(!callDtorsFunc->signature->Params.empty() ||
Expand Down Expand Up @@ -1096,12 +1177,16 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (errorCount())
return;

writeWhyExtract();

// Do link-time optimization if given files are LLVM bitcode files.
// This compiles bitcode files into real object files.
symtab->compileBitcodeFiles();
if (errorCount())
return;

processStubLibraries();

createOptionalSymbols();

// Resolve any variant symbols that were created due to signature
Expand Down
43 changes: 43 additions & 0 deletions lld/wasm/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "InputElement.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "lld/Common/Args.h"
#include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/Reproduce.h"
#include "llvm/Object/Binary.h"
Expand Down Expand Up @@ -678,6 +679,48 @@ Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) {
llvm_unreachable("unknown symbol kind");
}


StringRef strip(StringRef s) {
while (s.starts_with(" ")) {
s = s.drop_front();
}
while (s.ends_with(" ")) {
s = s.drop_back();
}
return s;
}

void StubFile::parse() {
bool first = false;

for (StringRef line : args::getLines(mb)) {
// File must begin with #STUB
if (first) {
assert(line == "#STUB\n");
first = false;
}

// Lines starting with # are considered comments
if (line.startswith("#"))
continue;

StringRef sym;
StringRef rest;
std::tie(sym, rest) = line.split(':');
sym = strip(sym);
rest = strip(rest);

symbolDependencies[sym] = {};

while (rest.size()) {
StringRef first;
std::tie(first, rest) = rest.split(',');
first = strip(first);
symbolDependencies[sym].push_back(first);
}
}
}

void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
Expand Down
13 changes: 13 additions & 0 deletions lld/wasm/InputFiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class InputFile {
SharedKind,
ArchiveKind,
BitcodeKind,
StubKind,
};

virtual ~InputFile() {}
Expand Down Expand Up @@ -183,6 +184,18 @@ class BitcodeFile : public InputFile {
static bool doneLTO;
};

// Stub libray (See docs/WebAssembly.rst)
class StubFile : public InputFile {
public:
explicit StubFile(MemoryBufferRef m) : InputFile(StubKind, m) {}

static bool classof(const InputFile *f) { return f->kind() == StubKind; }

void parse();

llvm::DenseMap<StringRef, std::vector<StringRef>> symbolDependencies;
};

inline bool isBitcode(MemoryBufferRef mb) {
return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
}
Expand Down
2 changes: 2 additions & 0 deletions lld/wasm/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ defm whole_archive: B<"whole-archive",
"Force load of all members in a static library",
"Do not force load of all members in a static library (default)">;

def why_extract: JJ<"why-extract=">, HelpText<"Print to a file about why archive members are extracted">;

defm check_features: BB<"check-features",
"Check feature compatibility of linked objects (default)",
"Ignore feature compatibility of linked objects">;
Expand Down
4 changes: 2 additions & 2 deletions lld/wasm/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ static bool requiresGOTAccess(const Symbol *sym) {
}

static bool allowUndefined(const Symbol* sym) {
// Symbols with explicit import names are always allowed to be undefined at
// Symbols that are explicitly imported are always allowed to be undefined at
// link time.
if (sym->importName)
if (sym->isImported())
return true;
if (isa<UndefinedFunction>(sym) && config->importUndefined)
return true;
Expand Down
13 changes: 13 additions & 0 deletions lld/wasm/SymbolTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ void SymbolTable::addFile(InputFile *file) {
return;
}

// stub file
if (auto *f = dyn_cast<StubFile>(file)) {
f->parse();
stubFiles.push_back(f);
return;
}

if (config->trace)
message(toString(file));

Expand Down Expand Up @@ -524,6 +531,9 @@ Symbol *SymbolTable::addUndefinedFunction(StringRef name,
lazy->signature = sig;
} else {
lazy->fetch();
if (!config->whyExtract.empty())
config->whyExtractRecords.emplace_back(toString(file), s->getFile(),
*s);
}
} else {
auto existingFunction = dyn_cast<FunctionSymbol>(s);
Expand Down Expand Up @@ -748,7 +758,10 @@ void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
}

LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
const InputFile *oldFile = s->getFile();
file->addMember(sym);
if (!config->whyExtract.empty())
config->whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
}

bool SymbolTable::addComdat(StringRef name) {
Expand Down
1 change: 1 addition & 0 deletions lld/wasm/SymbolTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ class SymbolTable {
DefinedFunction *createUndefinedStub(const WasmSignature &sig);

std::vector<ObjFile *> objectFiles;
std::vector<StubFile *> stubFiles;
std::vector<SharedFile *> sharedFiles;
std::vector<BitcodeFile *> bitcodeFiles;
std::vector<InputFunction *> syntheticFunctions;
Expand Down
4 changes: 4 additions & 0 deletions lld/wasm/Symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ void Symbol::setHidden(bool isHidden) {
flags |= WASM_SYMBOL_VISIBILITY_DEFAULT;
}

bool Symbol::isImported() const {
return isUndefined() && (importName.has_value() || forceImport);
}

bool Symbol::isExported() const {
if (!isDefined() || isLocal())
return false;
Expand Down
7 changes: 6 additions & 1 deletion lld/wasm/Symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class Symbol {
void setOutputSymbolIndex(uint32_t index);

WasmSymbolType getWasmType() const;
bool isImported() const;
bool isExported() const;
bool isExportedExplicit() const;

Expand All @@ -135,7 +136,8 @@ class Symbol {
Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
: name(name), file(f), symbolKind(k), referenced(!config->gcSections),
requiresGOT(false), isUsedInRegularObj(false), forceExport(false),
canInline(false), traced(false), isStub(false), flags(flags) {}
forceImport(false), canInline(false), traced(false), isStub(false),
flags(flags) {}

StringRef name;
InputFile *file;
Expand All @@ -160,6 +162,8 @@ class Symbol {
// -e/--export command line flag)
bool forceExport : 1;

bool forceImport : 1;

// False if LTO shouldn't inline whatever this symbol points to. If a symbol
// is overwritten after LTO, LTO shouldn't inline the symbol because it
// doesn't know the final contents of the symbol.
Expand Down Expand Up @@ -656,6 +660,7 @@ T *replaceSymbol(Symbol *s, ArgT &&... arg) {
T *s2 = new (s) T(std::forward<ArgT>(arg)...);
s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
s2->forceExport = symCopy.forceExport;
s2->forceImport = symCopy.forceImport;
s2->canInline = symCopy.canInline;
s2->traced = symCopy.traced;
s2->referenced = symCopy.referenced;
Expand Down
4 changes: 2 additions & 2 deletions lld/wasm/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ static bool shouldImport(Symbol *sym) {
if (config->allowUndefinedSymbols.count(sym->getName()) != 0)
return true;

return sym->importName.has_value();
return sym->isImported();
}

void Writer::calculateImports() {
Expand Down Expand Up @@ -1570,7 +1570,7 @@ void Writer::run() {
sym->forceExport = true;
}

// Delay reporting error about explicit exports until after
// Delay reporting errors about explicit exports until after
// addStartStopSymbols which can create optional symbols.
for (auto &name : config->requiredExports) {
Symbol *sym = symtab->find(name);
Expand Down