From 29c6ce5879c0c00fca3442d5211b6e28a0b336b5 Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Wed, 26 Dec 2018 22:46:18 +0000 Subject: [PATCH] [WebAssembly] Make assembler check for proper nesting of control flow. Summary: It does so using a simple nesting stack, and gives clear errors upon violation. This is unique to wasm, since most CPUs do not have any nested constructs. Had to add an end of file check to the general assembler for this. Note: if/else/end instructions are not currently supported in our tablegen defs, so these tests will be enabled in a follow-up. They already pass the nesting check. Reviewers: dschuff, aheejin Subscribers: sbc100, jgravelle-google, sunfish, llvm-commits Differential Revision: https://reviews.llvm.org/D55797 llvm-svn: 350078 --- .../llvm/MC/MCParser/MCTargetAsmParser.h | 3 + llvm/lib/MC/MCParser/AsmParser.cpp | 3 + .../AsmParser/WebAssemblyAsmParser.cpp | 112 +++++++++++++++++- .../MC/WebAssembly/basic-assembly-errors.s | 25 ++++ llvm/test/MC/WebAssembly/basic-assembly.s | 10 ++ llvm/test/MC/WebAssembly/simd-encodings.s | 3 + 6 files changed, 151 insertions(+), 5 deletions(-) create mode 100644 llvm/test/MC/WebAssembly/basic-assembly-errors.s diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h index bb979422f0a5b..ccf13a6a4fb48 100644 --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -490,6 +490,9 @@ class MCTargetAsmParser : public MCAsmParserExtension { MCContext &Ctx) { return nullptr; } + + // For any checks or cleanups at the end of parsing. + virtual void onEndOfFile() {} }; } // end namespace llvm diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 545da359ea593..cf42a6f7075b9 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -899,6 +899,9 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { eatToEndOfStatement(); } + getTargetParser().onEndOfFile(); + printPendingErrors(); + // All errors should have been emitted. assert(!hasPendingError() && "unexpected error from parseStatement"); diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index f463aeaf82750..62229d390b3c7 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -172,6 +172,18 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { Instructions, } CurrentState = FileStart; + // For ensuring blocks are properly nested. + enum NestingType { + Function, + Block, + Loop, + Try, + If, + Else, + Undefined, + }; + std::vector NestingStack; + // We track this to see if a .functype following a label is the same, // as this is how we recognize the start of a function. MCSymbol *LastLabel = nullptr; @@ -184,10 +196,6 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } - void addSignature(std::unique_ptr &&Sig) { - Signatures.push_back(std::move(Sig)); - } - #define GET_ASSEMBLER_HEADER #include "WebAssemblyGenAsmMatcher.inc" @@ -197,10 +205,60 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { llvm_unreachable("ParseRegister is not implemented."); } - bool error(const StringRef &Msg, const AsmToken &Tok) { + bool error(const Twine &Msg, const AsmToken &Tok) { return Parser.Error(Tok.getLoc(), Msg + Tok.getString()); } + bool error(const Twine &Msg) { + return Parser.Error(Lexer.getTok().getLoc(), Msg); + } + + void addSignature(std::unique_ptr &&Sig) { + Signatures.push_back(std::move(Sig)); + } + + std::pair nestingString(NestingType NT) { + switch (NT) { + case Function: + return {"function", "end_function"}; + case Block: + return {"block", "end_block"}; + case Loop: + return {"loop", "end_loop"}; + case Try: + return {"try", "end_try"}; + case If: + return {"if", "end_if"}; + case Else: + return {"else", "end_if"}; + default: + llvm_unreachable("unknown NestingType"); + } + } + + void push(NestingType NT) { NestingStack.push_back(NT); } + + bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) { + if (NestingStack.empty()) + return error(Twine("End of block construct with no start: ") + Ins); + auto Top = NestingStack.back(); + if (Top != NT1 && Top != NT2) + return error(Twine("Block construct type mismatch, expected: ") + + nestingString(Top).second + ", instead got: " + Ins); + NestingStack.pop_back(); + return false; + } + + bool ensureEmptyNestingStack() { + auto err = !NestingStack.empty(); + while (!NestingStack.empty()) { + error(Twine("Unmatched block construct(s) at function end: ") + + nestingString(NestingStack.back()).first); + NestingStack.pop_back(); + } + return err; + } + bool isNext(AsmToken::TokenKind Kind) { auto Ok = Lexer.is(Kind); if (Ok) @@ -327,6 +385,45 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { // If no '.', there is no type prefix. auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second; + // If this instruction is part of a control flow structure, ensure + // proper nesting. + if (BaseName == "block") { + push(Block); + } else if (BaseName == "loop") { + push(Loop); + } else if (BaseName == "try") { + push(Try); + } else if (BaseName == "if") { + push(If); + } else if (BaseName == "else") { + if (pop(BaseName, If)) + return true; + push(Else); + } else if (BaseName == "catch") { + if (pop(BaseName, Try)) + return true; + push(Try); + } else if (BaseName == "catch_all") { + if (pop(BaseName, Try)) + return true; + push(Try); + } else if (BaseName == "end_if") { + if (pop(BaseName, If, Else)) + return true; + } else if (BaseName == "end_try") { + if (pop(BaseName, Try)) + return true; + } else if (BaseName == "end_loop") { + if (pop(BaseName, Loop)) + return true; + } else if (BaseName == "end_block") { + if (pop(BaseName, Block)) + return true; + } else if (BaseName == "end_function") { + if (pop(BaseName, Function) || ensureEmptyNestingStack()) + return true; + } + while (Lexer.isNot(AsmToken::EndOfStatement)) { auto &Tok = Lexer.getTok(); switch (Tok.getKind()) { @@ -476,7 +573,10 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); if (CurrentState == Label && WasmSym == LastLabel) { // This .functype indicates a start of a function. + if (ensureEmptyNestingStack()) + return true; CurrentState = FunctionStart; + push(Function); } auto Signature = make_unique(); if (parseSignature(Signature.get())) @@ -565,6 +665,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { } llvm_unreachable("Implement any new match types added!"); } + + void onEndOfFile() override { ensureEmptyNestingStack(); } }; } // end anonymous namespace diff --git a/llvm/test/MC/WebAssembly/basic-assembly-errors.s b/llvm/test/MC/WebAssembly/basic-assembly-errors.s new file mode 100644 index 0000000000000..06fc4f85e7cbc --- /dev/null +++ b/llvm/test/MC/WebAssembly/basic-assembly-errors.s @@ -0,0 +1,25 @@ +# RUN: not llvm-mc -triple=wasm32-unknown-unknown -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s 2>&1 | FileCheck %s + + .text + .section .text.main,"",@ + .type test0,@function +# CHECK: End of block construct with no start: end_try + end_try +test0: + .functype test0 () -> () +# CHECK: Block construct type mismatch, expected: end_function, instead got: end_loop + end_loop + block +# CHECK: Block construct type mismatch, expected: end_block, instead got: end_if + end_if + try + loop +# CHECK: Block construct type mismatch, expected: end_loop, instead got: end_function +# CHECK: error: Unmatched block construct(s) at function end: loop +# CHECK: error: Unmatched block construct(s) at function end: try +# CHECK: error: Unmatched block construct(s) at function end: block +# CHECK: error: Unmatched block construct(s) at function end: function + end_function +.Lfunc_end0: + .size test0, .Lfunc_end0-test0 + diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s index 6d95db4829172..e29e47d80aa9c 100644 --- a/llvm/test/MC/WebAssembly/basic-assembly.s +++ b/llvm/test/MC/WebAssembly/basic-assembly.s @@ -59,6 +59,11 @@ test0: end_block # default jumps here. i32.const 3 end_block # "switch" exit. + #if # These are not in tablegen defs yet.. + #if + #end_if + #else + #end_if f32x4.add # Test correct parsing of instructions with / and : in them: # TODO: enable once instruction has been added. @@ -129,6 +134,11 @@ test0: # CHECK-NEXT: end_block # label3: # CHECK-NEXT: i32.const 3 # CHECK-NEXT: end_block # label2: +# DONT-CHECK-NEXT: if +# DONT-CHECK-NEXT: if +# DONT-CHECK-NEXT: end_if +# DONT-CHECK-NEXT: else +# DONT-CHECK-NEXT: end_if # CHECK-NEXT: f32x4.add # CHECK-NEXT: i32.trunc_s/f32 # CHECK-NEXT: try diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index abb033ca21506..1709d0edbde6f 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -1,5 +1,8 @@ # RUN: llvm-mc -show-encoding -triple=wasm32-unkown-unknown -mattr=+sign-ext,+simd128 < %s | FileCheck %s +main: + .functype main () -> () + # CHECK: v128.load 48:p2align=0 # encoding: [0xfd,0x00,0x00,0x30] v128.load 48