Skip to content

Commit

Permalink
[Object][Wasm] Allow parsing of GC types in type and table sections (#…
Browse files Browse the repository at this point in the history
…79235)

This change allows a WasmObjectFile to be created from a wasm file even 
if it uses typed funcrefs and GC types. It does not significantly change how 
lib/Object models its various internal types (e.g. WasmSignature,
WasmElemSegment), so LLVM does not really "support" or understand such
files, but it is sufficient to parse the type, global and element sections, discarding
types that are not understood. This is useful for low-level binary tools such as
nm and objcopy, which use only limited aspects of the binary (such as function
definitions) or deal with sections as opaque blobs.

This is done by allowing `WasmValType` to have a value of `OTHERREF`
(representing any unmodeled reference type), and adding a field to
`WasmSignature` indicating it's a placeholder for an unmodeled reference 
type (since there is a 1:1 correspondence between WasmSignature objects
and types in the type section).
Then the object file parsers for the type and element sections are expanded
to parse encoded reference types and discard any unmodeled fields.
  • Loading branch information
dschuff committed Jan 25, 2024
1 parent 7fdb932 commit 7f409cd
Show file tree
Hide file tree
Showing 12 changed files with 333 additions and 36 deletions.
5 changes: 4 additions & 1 deletion lld/wasm/InputChunks.h
Expand Up @@ -259,10 +259,13 @@ class InputFunction : public InputChunk {
file->codeSection->Content.slice(inputSectionOffset, function->Size);
debugName = function->DebugName;
comdat = function->Comdat;
assert(s.Kind != WasmSignature::Placeholder);
}

InputFunction(StringRef name, const WasmSignature &s)
: InputChunk(nullptr, InputChunk::Function, name), signature(s) {}
: InputChunk(nullptr, InputChunk::Function, name), signature(s) {
assert(s.Kind == WasmSignature::Function);
}

static bool classof(const InputChunk *c) {
return c->kind() == InputChunk::Function ||
Expand Down
4 changes: 3 additions & 1 deletion lld/wasm/InputElement.h
Expand Up @@ -76,7 +76,9 @@ class InputGlobal : public InputElement {
class InputTag : public InputElement {
public:
InputTag(const WasmSignature &s, const WasmTag &t, ObjFile *f)
: InputElement(t.SymbolName, f), signature(s) {}
: InputElement(t.SymbolName, f), signature(s) {
assert(s.Kind == WasmSignature::Tag);
}

const WasmSignature &signature;
};
Expand Down
3 changes: 3 additions & 0 deletions lld/wasm/InputFiles.cpp
Expand Up @@ -81,6 +81,9 @@ InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
std::unique_ptr<Binary> bin =
CHECK(createBinary(mb), mb.getBufferIdentifier());
auto *obj = cast<WasmObjectFile>(bin.get());
if (obj->hasUnmodeledTypes())
fatal(toString(mb.getBufferIdentifier()) +
"file has unmodeled reference or GC types");
if (obj->isSharedObject())
return make<SharedFile>(mb);
return make<ObjFile>(mb, archiveName, lazy);
Expand Down
2 changes: 2 additions & 0 deletions lld/wasm/WriterUtils.cpp
Expand Up @@ -35,6 +35,8 @@ std::string toString(ValType type) {
return "funcref";
case ValType::EXTERNREF:
return "externref";
case ValType::OTHERREF:
return "otherref";
}
llvm_unreachable("Invalid wasm::ValType");
}
Expand Down
47 changes: 46 additions & 1 deletion llvm/include/llvm/BinaryFormat/Wasm.h
Expand Up @@ -56,9 +56,25 @@ enum : unsigned {
WASM_TYPE_F32 = 0x7D,
WASM_TYPE_F64 = 0x7C,
WASM_TYPE_V128 = 0x7B,
WASM_TYPE_NULLFUNCREF = 0x73,
WASM_TYPE_NULLEXTERNREF = 0x72,
WASM_TYPE_NULLREF = 0x71,
WASM_TYPE_FUNCREF = 0x70,
WASM_TYPE_EXTERNREF = 0x6F,
WASM_TYPE_ANYREF = 0x6E,
WASM_TYPE_EQREF = 0x6D,
WASM_TYPE_I31REF = 0x6C,
WASM_TYPE_STRUCTREF = 0x6B,
WASM_TYPE_ARRAYREF = 0x6A,
WASM_TYPE_EXNREF = 0x69,
WASM_TYPE_NONNULLABLE = 0x64,
WASM_TYPE_NULLABLE = 0x63,
WASM_TYPE_FUNC = 0x60,
WASM_TYPE_ARRAY = 0x5E,
WASM_TYPE_STRUCT = 0x5F,
WASM_TYPE_SUB = 0x50,
WASM_TYPE_SUB_FINAL = 0x4F,
WASM_TYPE_REC = 0x4E,
WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
};

Expand Down Expand Up @@ -93,6 +109,20 @@ enum : unsigned {
WASM_OPCODE_I64_SUB = 0x7d,
WASM_OPCODE_I64_MUL = 0x7e,
WASM_OPCODE_REF_NULL = 0xd0,
WASM_OPCODE_REF_FUNC = 0xd2,
WASM_OPCODE_GC_PREFIX = 0xfb,
};

// Opcodes in the GC-prefixed space (0xfb)
enum : unsigned {
WASM_OPCODE_STRUCT_NEW = 0x00,
WASM_OPCODE_STRUCT_NEW_DEFAULT = 0x01,
WASM_OPCODE_ARRAY_NEW = 0x06,
WASM_OPCODE_ARRAY_NEW_DEFAULT = 0x07,
WASM_OPCODE_ARRAY_NEW_FIXED = 0x08,
WASM_OPCODE_REF_I31 = 0x1c,
// any.convert_extern and extern.convert_any don't seem to be supported by
// Binaryen.
};

// Opcodes used in synthetic functions.
Expand Down Expand Up @@ -127,7 +157,8 @@ enum : unsigned {

enum : unsigned {
WASM_ELEM_SEGMENT_IS_PASSIVE = 0x01,
WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02,
WASM_ELEM_SEGMENT_IS_DECLARATIVE = 0x02, // if passive == 1
WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0
WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04,
};
const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND = 0x3;
Expand Down Expand Up @@ -229,6 +260,9 @@ enum class ValType {
V128 = WASM_TYPE_V128,
FUNCREF = WASM_TYPE_FUNCREF,
EXTERNREF = WASM_TYPE_EXTERNREF,
// Unmodeled value types include ref types with heap types other than
// func or extern, and type-specialized funcrefs
OTHERREF = 0xff,
};

struct WasmDylinkImportInfo {
Expand Down Expand Up @@ -297,6 +331,8 @@ struct WasmInitExprMVP {
} Value;
};

// Extended-const init exprs and exprs with GC types are not explicitly
// modeled, but the raw body of the expr is attached.
struct WasmInitExpr {
uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than
// one instruction)
Expand Down Expand Up @@ -367,6 +403,11 @@ struct WasmDataSegment {
uint32_t Comdat; // from the "comdat info" section
};

// Represents a Wasm element segment, with some limitations compared the spec:
// 1) Does not model passive or declarative segments (Segment will end up with
// an Offset field of i32.const 0)
// 2) Does not model init exprs (Segment will get an empty Functions list)
// 2) Does not model types other than basic funcref/externref (see ValType)
struct WasmElemSegment {
uint32_t Flags;
uint32_t TableNumber;
Expand Down Expand Up @@ -436,6 +477,10 @@ struct WasmLinkingData {
struct WasmSignature {
SmallVector<ValType, 1> Returns;
SmallVector<ValType, 4> Params;
// LLVM can parse types other than functions encoded in the type section,
// but does not actually model them. Instead a placeholder signature is
// created in the Object's signature list.
enum { Function, Tag, Placeholder } Kind = Function;
// Support empty and tombstone instances, needed by DenseMap.
enum { Plain, Empty, Tombstone } State = Plain;

Expand Down
6 changes: 5 additions & 1 deletion llvm/include/llvm/Object/Wasm.h
Expand Up @@ -39,7 +39,9 @@ class WasmSymbol {
const wasm::WasmTableType *TableType,
const wasm::WasmSignature *Signature)
: Info(Info), GlobalType(GlobalType), TableType(TableType),
Signature(Signature) {}
Signature(Signature) {
assert(!Signature || Signature->Kind != wasm::WasmSignature::Placeholder);
}

const wasm::WasmSymbolInfo &Info;
const wasm::WasmGlobalType *GlobalType;
Expand Down Expand Up @@ -209,6 +211,7 @@ class WasmObjectFile : public ObjectFile {
Expected<SubtargetFeatures> getFeatures() const override;
bool isRelocatableObject() const override;
bool isSharedObject() const;
bool hasUnmodeledTypes() const { return HasUnmodeledTypes; }

struct ReadContext {
const uint8_t *Start;
Expand Down Expand Up @@ -291,6 +294,7 @@ class WasmObjectFile : public ObjectFile {
bool HasLinkingSection = false;
bool HasDylinkSection = false;
bool HasMemory64 = false;
bool HasUnmodeledTypes = false;
wasm::WasmLinkingData LinkingData;
uint32_t NumImportedGlobals = 0;
uint32_t NumImportedTables = 0;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/MC/WasmObjectWriter.cpp
Expand Up @@ -972,6 +972,8 @@ void WasmObjectWriter::writeTableSection(ArrayRef<wasm::WasmTable> Tables) {

encodeULEB128(Tables.size(), W->OS);
for (const wasm::WasmTable &Table : Tables) {
assert(Table.Type.ElemType != wasm::ValType::OTHERREF &&
"Cannot encode general ref-typed tables");
encodeULEB128((uint32_t)Table.Type.ElemType, W->OS);
encodeULEB128(Table.Type.Limits.Flags, W->OS);
encodeULEB128(Table.Type.Limits.Minimum, W->OS);
Expand Down

0 comments on commit 7f409cd

Please sign in to comment.