Skip to content

Commit

Permalink
[WebAssembly] Deduplicate imports of the same module name, field name…
Browse files Browse the repository at this point in the history
…, and type

When two symbols import the same thing, only one import should be emitted in the Wasm file.

Fixes https://bugs.llvm.org/show_bug.cgi?id=50938

Reviewed By: sbc100

Differential Revision: https://reviews.llvm.org/D105519
  • Loading branch information
fitzgen authored and sbc100 committed Jul 19, 2021
1 parent 6ef37b6 commit 4ae575b
Show file tree
Hide file tree
Showing 7 changed files with 366 additions and 8 deletions.
57 changes: 57 additions & 0 deletions lld/test/wasm/duplicate-function-imports.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o
# RUN: wasm-ld -o %t1.wasm %t.o
# RUN: obj2yaml %t1.wasm | FileCheck %s

.globl f1
.import_module f1, env
.import_name f1, f
.functype f1 () -> (i32)

# Same import module/name/type as `f1`, should be de-duped.
.globl f2
.import_module f2, env
.import_name f2, f
.functype f2 () -> (i32)

# Same import module/name, but different type. Should not be de-duped.
.globl f3
.import_module f3, env
.import_name f3, f
.functype f3 () -> (f32)

.globl _start
_start:
.functype _start () -> ()
call f1
drop
call f2
drop
call f3
drop
end_function


# CHECK: - Type: TYPE
# CHECK-NEXT: Signatures:
# CHECK-NEXT: - Index: 0
# CHECK-NEXT: ParamTypes: []
# CHECK-NEXT: ReturnTypes:
# CHECK-NEXT: - I32
# CHECK-NEXT: - Index: 1
# CHECK-NEXT: ParamTypes: []
# CHECK-NEXT: ReturnTypes:
# CHECK-NEXT: - F32
# CHECK-NEXT: - Index: 2
# CHECK-NEXT: ParamTypes: []
# CHECK-NEXT: ReturnTypes: []
# CHECK-NEXT: - Type: IMPORT
# CHECK-NEXT: Imports:
# CHECK-NEXT: - Module: env
# CHECK-NEXT: Field: f
# CHECK-NEXT: Kind: FUNCTION
# CHECK-NEXT: SigIndex: 0
# CHECK-NEXT: - Module: env
# CHECK-NEXT: Field: f
# CHECK-NEXT: Kind: FUNCTION
# CHECK-NEXT: SigIndex: 1
# CHECK-NEXT: - Type:
69 changes: 69 additions & 0 deletions lld/test/wasm/duplicate-global-imports.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o
# RUN: wasm-ld --no-check-features -o %t1.wasm %t.o
# RUN: obj2yaml %t1.wasm | FileCheck %s

.global g1
.import_module g1, env
.import_name g1, g
.globaltype g1, i64, immutable

# Same import module/name/type as `g1`, should be de-duped.
.global g2
.import_module g2, env
.import_name g2, g
.globaltype g2, i64, immutable

# Imported as an i32 instead of i64, so should not be de-duped.
.global g3
.import_module g3, env
.import_name g3, g
.globaltype g3, i32, immutable

# Imported as mutable instead of immutable, so should not be de-duped.
.global g4
.import_module g4, env
.import_name g4, g
.globaltype g4, i64

.globl _start
_start:
.functype _start () -> ()
global.get g1
drop
global.get g2
drop
global.get g3
drop
global.get g4
drop
end_function


# CHECK: - Type: IMPORT
# CHECK-NEXT: Imports:
# CHECK-NEXT: - Module: env
# CHECK-NEXT: Field: g
# CHECK-NEXT: Kind: GLOBAL
# CHECK-NEXT: GlobalType: I64
# CHECK-NEXT: GlobalMutable: false
# CHECK-NEXT: - Module: env
# CHECK-NEXT: Field: g
# CHECK-NEXT: Kind: GLOBAL
# CHECK-NEXT: GlobalType: I32
# CHECK-NEXT: GlobalMutable: false
# CHECK-NEXT: - Module: env
# CHECK-NEXT: Field: g
# CHECK-NEXT: Kind: GLOBAL
# CHECK-NEXT: GlobalType: I64
# CHECK-NEXT: GlobalMutable: true
# CHECK-NEXT: - Type:

# CHECK: GlobalNames:
# CHECK-NEXT: - Index: 0
# CHECK-NEXT: Name: g1
# CHECK-NEXT: - Index: 1
# CHECK-NEXT: Name: g3
# CHECK-NEXT: - Index: 2
# CHECK-NEXT: Name: g4
# CHECK-NEXT: - Index: 3
# CHECK-NEXT: Name: __stack_pointer
75 changes: 75 additions & 0 deletions lld/test/wasm/duplicate-table-imports.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# RUN: llvm-mc -mattr=+reference-types -triple=wasm32-unknown-unknown -filetype=obj -o %t.o %s
# RUN: wasm-ld --allow-undefined -o %t1.wasm %t.o
# RUN: obj2yaml %t1.wasm | FileCheck %s

.tabletype t1, funcref
.import_module t1, env
.import_name t1, t
.globl t1

# Same import module/name/type as `t1`, should be de-duped.
.tabletype t2, funcref
.import_module t2, env
.import_name t2, t
.globl t2

# Imported as an externref instead of funcref, so should not be de-duped.
.tabletype t3, externref
.import_module t3, env
.import_name t3, t
.globl t3

.globl _start
_start:
.functype _start () -> ()

# Read from `t1`
i32.const 0
table.get t1
drop

# Read from `t2`
i32.const 0
table.get t2
drop

# Read from `t3`
i32.const 0
table.get t3
drop

end_function

# XXX: the second imported table has index 1, not 0. I've verified by hand (with
# `wasm2wat`) that the resulting Wasm file is correct: `t3` does end up at index
# 1 and our `table.get` instructions are using the proper table index
# immediates. This is also asserted (less legibly) in the hexdump of the code
# body below. It looks like there's a bug in how `obj2yaml` disassembles
# multiple table imports.

# CHECK: - Type: IMPORT
# CHECK-NEXT: Imports:
# CHECK-NEXT: - Module: env
# CHECK-NEXT: Field: t
# CHECK-NEXT: Kind: TABLE
# CHECK-NEXT: Table:
# CHECK-NEXT: Index: 0
# CHECK-NEXT: ElemType: FUNCREF
# CHECK-NEXT: Limits:
# CHECK-NEXT: Minimum: 0x0
# CHECK-NEXT: - Module: env
# CHECK-NEXT: Field: t
# CHECK-NEXT: Kind: TABLE
# CHECK-NEXT: Table:
# CHECK-NEXT: Index: 0
# CHECK-NEXT: ElemType: EXTERNREF
# CHECK-NEXT: Limits:
# CHECK-NEXT: Minimum: 0x0
# CHECK-NEXT: - Type:

# CHECK: - Type: CODE
# CHECK-NEXT: Functions:
# CHECK-NEXT: - Index: 0
# CHECK-NEXT: Locals: []
# CHECK-NEXT: Body: 41002580808080001A41002580808080001A41002581808080001A0B
# CHECK-NEXT: - Type:
58 changes: 50 additions & 8 deletions lld/wasm/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,59 @@ void ImportSection::addGOTEntry(Symbol *sym) {
gotSymbols.push_back(sym);
}

template <typename UndefinedT>
static void getImportModuleAndName(Symbol *sym, StringRef *outModule,
StringRef *outName) {
if (auto *undef = dyn_cast<UndefinedT>(sym)) {
*outModule = undef->importModule ? *undef->importModule : defaultModule;
*outName = undef->importName ? *undef->importName : sym->getName();
} else {
*outModule = defaultModule;
*outName = sym->getName();
}
}

void ImportSection::addImport(Symbol *sym) {
assert(!isSealed);
importedSymbols.emplace_back(sym);
if (auto *f = dyn_cast<FunctionSymbol>(sym))
f->setFunctionIndex(numImportedFunctions++);
else if (auto *g = dyn_cast<GlobalSymbol>(sym))
g->setGlobalIndex(numImportedGlobals++);
else if (auto *t = dyn_cast<TagSymbol>(sym))
StringRef module;
StringRef name;
if (auto *f = dyn_cast<FunctionSymbol>(sym)) {
getImportModuleAndName<UndefinedFunction>(sym, &module, &name);
ImportKey<WasmSignature> key(*(f->getSignature()), module, name);
auto entry = importedFunctions.try_emplace(key, numImportedFunctions);
if (entry.second) {
importedSymbols.emplace_back(sym);
f->setFunctionIndex(numImportedFunctions++);
} else {
f->setFunctionIndex(entry.first->second);
}
} else if (auto *g = dyn_cast<GlobalSymbol>(sym)) {
getImportModuleAndName<UndefinedGlobal>(sym, &module, &name);
ImportKey<WasmGlobalType> key(*(g->getGlobalType()), module, name);
auto entry = importedGlobals.try_emplace(key, numImportedGlobals);
if (entry.second) {
importedSymbols.emplace_back(sym);
g->setGlobalIndex(numImportedGlobals++);
} else {
g->setGlobalIndex(entry.first->second);
}
} else if (auto *t = dyn_cast<TagSymbol>(sym)) {
// NB: There's currently only one possible kind of tag, and no
// `UndefinedTag`, so we don't bother de-duplicating tag imports.
importedSymbols.emplace_back(sym);
t->setTagIndex(numImportedTags++);
else
cast<TableSymbol>(sym)->setTableNumber(numImportedTables++);
} else {
auto *table = cast<TableSymbol>(sym);
getImportModuleAndName<UndefinedTable>(sym, &module, &name);
ImportKey<WasmTableType> key(*(table->getTableType()), module, name);
auto entry = importedTables.try_emplace(key, numImportedTables);
if (entry.second) {
importedSymbols.emplace_back(sym);
table->setTableNumber(numImportedTables++);
} else {
table->setTableNumber(entry.first->second);
}
}
}

void ImportSection::writeBody() {
Expand Down
65 changes: 65 additions & 0 deletions lld/wasm/SyntheticSections.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,68 @@ class TypeSection : public SyntheticSection {
llvm::DenseMap<WasmSignature, int32_t> typeIndices;
};

/**
* A key for some kind of imported entity of type `T`.
*
* Used when de-duplicating imports.
*/
template <typename T> struct ImportKey {
public:
enum class State { Plain, Empty, Tombstone };

public:
T type;
llvm::Optional<StringRef> importModule;
llvm::Optional<StringRef> importName;
State state;

public:
ImportKey(T type) : type(type), state(State::Plain) {}
ImportKey(T type, State state) : type(type), state(state) {}
ImportKey(T type, llvm::Optional<StringRef> importModule,
llvm::Optional<StringRef> importName)
: type(type), importModule(importModule), importName(importName),
state(State::Plain) {}
};

template <typename T>
inline bool operator==(const ImportKey<T> &lhs, const ImportKey<T> &rhs) {
return lhs.state == rhs.state && lhs.importModule == rhs.importModule &&
lhs.importName == rhs.importName && lhs.type == rhs.type;
}

} // namespace wasm
} // namespace lld

// `ImportKey<T>` can be used as a key in a `DenseMap` if `T` can be used as a
// key in a `DenseMap`.
template <typename T> struct llvm::DenseMapInfo<lld::wasm::ImportKey<T>> {
static lld::wasm::ImportKey<T> getEmptyKey() {
typename lld::wasm::ImportKey<T> key(llvm::DenseMapInfo<T>::getEmptyKey());
key.state = lld::wasm::ImportKey<T>::State::Empty;
return key;
}
static lld::wasm::ImportKey<T> getTombstoneKey() {
typename lld::wasm::ImportKey<T> key(llvm::DenseMapInfo<T>::getEmptyKey());
key.state = lld::wasm::ImportKey<T>::State::Tombstone;
return key;
}
static unsigned getHashValue(const lld::wasm::ImportKey<T> &key) {
uintptr_t hash = hash_value(key.importModule);
hash = hash_combine(hash, key.importName);
hash = hash_combine(hash, llvm::DenseMapInfo<T>::getHashValue(key.type));
hash = hash_combine(hash, key.state);
return hash;
}
static bool isEqual(const lld::wasm::ImportKey<T> &lhs,
const lld::wasm::ImportKey<T> &rhs) {
return lhs == rhs;
}
};

namespace lld {
namespace wasm {

class ImportSection : public SyntheticSection {
public:
ImportSection() : SyntheticSection(llvm::wasm::WASM_SEC_IMPORT) {}
Expand Down Expand Up @@ -131,6 +193,9 @@ class ImportSection : public SyntheticSection {
unsigned numImportedFunctions = 0;
unsigned numImportedTags = 0;
unsigned numImportedTables = 0;
llvm::DenseMap<ImportKey<WasmGlobalType>, uint32_t> importedGlobals;
llvm::DenseMap<ImportKey<WasmSignature>, uint32_t> importedFunctions;
llvm::DenseMap<ImportKey<WasmTableType>, uint32_t> importedTables;
};

class FunctionSection : public SyntheticSection {
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/BinaryFormat/Wasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,20 @@ struct WasmLimits {
uint64_t Maximum;
};

inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) {
return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum &&
LHS.Maximum == RHS.Maximum;
}

struct WasmTableType {
uint8_t ElemType;
WasmLimits Limits;
};

inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) {
return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits;
}

struct WasmTable {
uint32_t Index;
WasmTableType Type;
Expand Down

0 comments on commit 4ae575b

Please sign in to comment.