Skip to content

Commit

Permalink
[WebAssembly] Initial Disassembler.
Browse files Browse the repository at this point in the history
This implements a new table-gen emitter to create tables for
a wasm disassembler, and a dissassembler to use them.

Comes with 2 tests, that tests a few instructions manually. Is also able to
disassemble large .wasm files with objdump reasonably.

Not working so well, to be addressed in followups:
- objdump appears to be passing an incorrect starting point.
- since the disassembler works an instruction at a time, and it is
  disassembling stack instruction, it has no idea of pseudo register assignments.
  These registers are required for the instruction printing code that follows.
  For now, all such registers appear in the output as $0.

Patch by Wouter van Oortmerssen

Differential Revision: https://reviews.llvm.org/D45848

llvm-svn: 332052
  • Loading branch information
sbc100 committed May 10, 2018
1 parent 65e9f1f commit 16c1682
Show file tree
Hide file tree
Showing 12 changed files with 375 additions and 15 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/WebAssembly/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS WebAssembly.td)
tablegen(LLVM WebAssemblyGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM WebAssemblyGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM WebAssemblyGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM WebAssemblyGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM WebAssemblyGenFastISel.inc -gen-fast-isel)
tablegen(LLVM WebAssemblyGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter)
Expand Down
126 changes: 121 additions & 5 deletions llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,23 @@
#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/TargetRegistry.h"

using namespace llvm;

#define DEBUG_TYPE "wasm-disassembler"

using DecodeStatus = MCDisassembler::DecodeStatus;

#include "WebAssemblyGenDisassemblerTables.inc"

namespace {
class WebAssemblyDisassembler final : public MCDisassembler {
std::unique_ptr<const MCInstrInfo> MCII;
Expand Down Expand Up @@ -60,11 +67,120 @@ extern "C" void LLVMInitializeWebAssemblyDisassembler() {
createWebAssemblyDisassembler);
}

MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
raw_ostream &OS, raw_ostream &CS) const {
static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
if (Size >= Bytes.size())
return -1;
auto V = Bytes[Size];
Size++;
return V;
}

// TODO: Implement disassembly.
static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes, bool Signed) {
unsigned N = 0;
const char *Error = nullptr;
auto Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
Bytes.data() + Bytes.size(), &Error)
: static_cast<int64_t>(
decodeULEB128(Bytes.data() + Size, &N,
Bytes.data() + Bytes.size(), &Error));
if (Error)
return false;
Size += N;
MI.addOperand(MCOperand::createImm(Val));
return true;
}

template <typename T>
bool parseFPImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
if (Size + sizeof(T) > Bytes.size())
return false;
T Val;
memcpy(&Val, Bytes.data() + Size, sizeof(T));
support::endian::byte_swap<T, support::endianness::little>(Val);
Size += sizeof(T);
MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
return true;
}

return MCDisassembler::Fail;
MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
raw_ostream & /*OS*/, raw_ostream &CS) const {
CommentStream = &CS;
Size = 0;
auto Opc = nextByte(Bytes, Size);
if (Opc < 0)
return MCDisassembler::Fail;
const auto *WasmInst = &InstructionTable0[Opc];
// If this is a prefix byte, indirect to another table.
if (WasmInst->ET == ET_Prefix) {
WasmInst = nullptr;
// Linear search, so far only 2 entries.
for (auto PT = PrefixTable; PT->Table; PT++) {
if (PT->Prefix == Opc) {
WasmInst = PT->Table;
break;
}
}
if (!WasmInst)
return MCDisassembler::Fail;
Opc = nextByte(Bytes, Size);
if (Opc < 0)
return MCDisassembler::Fail;
WasmInst += Opc;
}
if (WasmInst->ET == ET_Unused)
return MCDisassembler::Fail;
// At this point we must have a valid instruction to decode.
assert(WasmInst->ET == ET_Instruction);
MI.setOpcode(WasmInst->Opcode);
// Parse any operands.
for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
switch (WasmInst->Operands[OPI]) {
// ULEB operands:
case WebAssembly::OPERAND_BASIC_BLOCK:
case WebAssembly::OPERAND_LOCAL:
case WebAssembly::OPERAND_GLOBAL:
case WebAssembly::OPERAND_FUNCTION32:
case WebAssembly::OPERAND_OFFSET32:
case WebAssembly::OPERAND_P2ALIGN:
case WebAssembly::OPERAND_TYPEINDEX:
case MCOI::OPERAND_IMMEDIATE: {
if (!parseLEBImmediate(MI, Size, Bytes, false))
return MCDisassembler::Fail;
break;
}
// SLEB operands:
case WebAssembly::OPERAND_I32IMM:
case WebAssembly::OPERAND_I64IMM:
case WebAssembly::OPERAND_SIGNATURE: {
if (!parseLEBImmediate(MI, Size, Bytes, true))
return MCDisassembler::Fail;
break;
}
// FP operands.
case WebAssembly::OPERAND_F32IMM: {
if (!parseFPImmediate<float>(MI, Size, Bytes))
return MCDisassembler::Fail;
break;
}
case WebAssembly::OPERAND_F64IMM: {
if (!parseFPImmediate<double>(MI, Size, Bytes))
return MCDisassembler::Fail;
break;
}
case MCOI::OPERAND_REGISTER: {
// These are NOT actually in the instruction stream, but MC is going to
// expect operands to be present for them!
// FIXME: can MC re-generate register assignments or do we have to
// do this? Since this function decodes a single instruction, we don't
// have the proper context for tracking an operand stack here.
MI.addOperand(MCOperand::createReg(0));
break;
}
default:
llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
}
}
return MCDisassembler::Success;
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,

void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot,
const MCSubtargetInfo & /*STI*/) {
const MCSubtargetInfo &STI) {
// Print the instruction (this uses the AsmStrings from the .td files).
printInstruction(MI, OS);

Expand Down Expand Up @@ -194,20 +194,16 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
}

void
WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
unsigned OpNo,
raw_ostream &O) {
void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(
const MCInst *MI, unsigned OpNo, raw_ostream &O) {
int64_t Imm = MI->getOperand(OpNo).getImm();
if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode()))
return;
O << ":p2align=" << Imm;
}

void
WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
unsigned OpNo,
raw_ostream &O) {
void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(
const MCInst *MI, unsigned OpNo, raw_ostream &O) {
int64_t Imm = MI->getOperand(OpNo).getImm();
switch (WebAssembly::ExprType(Imm)) {
case WebAssembly::ExprType::Void: break;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssembly.td
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,15 @@ def WebAssemblyAsmParser : AsmParser {
let ShouldEmitMatchRegisterName = 0;
}

def WebAssemblyAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
int PassSubtarget = 0;
int Variant = 0;
bit isMCAsmWriter = 1;
}

def WebAssembly : Target {
let InstructionSet = WebAssemblyInstrInfo;
let AssemblyParsers = [WebAssemblyAsmParser];
let AssemblyWriters = [WebAssemblyAsmWriter];
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ def BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
}
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1

// This is technically a control-flow instruction, since all it affects is the
// IP.
def NOP : I<(outs), (ins), [], "nop", 0x01>;

// Placemarkers to indicate the start or end of a block or loop scope.
// These use/clobber VALUE_STACK to prevent them from being moved into the
// middle of an expression tree.
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/MC/Disassembler/WebAssembly/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
if not 'WebAssembly' in config.root.targets:
config.unsupported = True

33 changes: 33 additions & 0 deletions llvm/test/MC/Disassembler/WebAssembly/wasm.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# RUN: llvm-mc --disassemble %s -triple=wasm32-unknown-unknown | FileCheck %s

# CHECK: .text

# CHECK: nop
0x01

# CHECK: i32.add $0=, $0, $0
# NOTE: registers are meaningless, as there is no context for what they are.
0x6a

# CHECK: i64.const $0=, -1
0x42 0x7F

# CHECK: i64.load32_u $0=, 16($0):p2align=1
0x35 0x01 0x10

# CHECK: block
# 3
# FIXME: WebAssemblyInstPrinter does not currently print block number.
0x02 0x03

# CHECK: call_indirect
# $0=, 128, 0
# FIXME: WebAssemblyInstPrinter does not print immediates.
0x11 0x80 0x01 0x00

# CHECK: get_local $0=, 128
0x20 0x80 0x01

# Prefix byte example:
# CHECK: i64.trunc_u:sat/f64 $0=, $0
0xFC 0x07
45 changes: 44 additions & 1 deletion llvm/unittests/MC/Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ static const char *symbolLookupCallback(void *DisInfo, uint64_t ReferenceValue,
return nullptr;
}

TEST(Disassembler, Test1) {
TEST(Disassembler, X86Test) {
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllDisassemblers();
Expand Down Expand Up @@ -62,3 +62,46 @@ TEST(Disassembler, Test1) {

LLVMDisasmDispose(DCR);
}

TEST(Disassembler, WebAssemblyTest) {
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllDisassemblers();

uint8_t Bytes[] = {0x6a, 0x42, 0x7F, 0x35, 0x01, 0x10};
uint8_t *BytesP = Bytes;
const char OutStringSize = 100;
char OutString[OutStringSize];
LLVMDisasmContextRef DCR = LLVMCreateDisasm(
"wasm32-unknown-unknown-elf", nullptr, 0, nullptr, symbolLookupCallback);
if (!DCR)
return;

size_t InstSize;
unsigned NumBytes = sizeof(Bytes);
unsigned PC = 0;

InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString,
OutStringSize);
EXPECT_EQ(InstSize, 1U);
EXPECT_EQ(StringRef(OutString), "\ti32.add \t$0=, $0, $0");
PC += InstSize;
BytesP += InstSize;
NumBytes -= InstSize;

InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString,
OutStringSize);
EXPECT_EQ(InstSize, 2U);
EXPECT_EQ(StringRef(OutString), "\ti64.const\t$0=, -1");

PC += InstSize;
BytesP += InstSize;
NumBytes -= InstSize;

InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString,
OutStringSize);
EXPECT_EQ(InstSize, 3U);
EXPECT_EQ(StringRef(OutString), "\ti64.load32_u\t$0=, 16($0):p2align=1");

LLVMDisasmDispose(DCR);
}
1 change: 1 addition & 0 deletions llvm/utils/TableGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ add_tablegen(llvm-tblgen LLVM
X86FoldTablesEmitter.cpp
X86ModRMFilters.cpp
X86RecognizableInstr.cpp
WebAssemblyDisassemblerEmitter.cpp
CTagsEmitter.cpp
)
set_target_properties(llvm-tblgen PROPERTIES FOLDER "Tablegenning")
9 changes: 9 additions & 0 deletions llvm/utils/TableGen/DisassemblerEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//

#include "CodeGenTarget.h"
#include "WebAssemblyDisassemblerEmitter.h"
#include "X86DisassemblerTables.h"
#include "X86RecognizableInstr.h"
#include "llvm/TableGen/Error.h"
Expand Down Expand Up @@ -125,6 +126,14 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
return;
}

// WebAssembly has variable length opcodes, so can't use EmitFixedLenDecoder
// below (which depends on a Size table-gen Record), and also uses a custom
// disassembler.
if (Target.getName() == "WebAssembly") {
emitWebAssemblyDisassemblerTables(OS, Target.getInstructionsByEnumValue());
return;
}

// ARM and Thumb have a CHECK() macro to deal with DecodeStatuses.
if (Target.getName() == "ARM" || Target.getName() == "Thumb" ||
Target.getName() == "AArch64" || Target.getName() == "ARM64") {
Expand Down
Loading

0 comments on commit 16c1682

Please sign in to comment.