Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PostfixExpression: move parser out of NativePDB internals
Summary: The postfix expressions in PDB and breakpad symbol files are similar enough that they can be parsed by the same parser. This patch generalizes the parser in the NativePDB plugin and moves it into the PostfixExpression file created in the previous commit (r358976). The generalization consists of treating any unrecognised token as a "symbol" node (previously these would only be created for tokens starting with "$", and other token would abort the parse). This is needed because breakpad symbols can also contain ".cfa" tokens, which refer to the frame's CFA. The cosmetic changes include: - using a factory function instead of a class for creating nodes (this is more generic as it allows the same BumpPtrAllocator to be used for other things too) - using dedicated function for parsing operator tokens instead of a DenseMap (more efficient as we don't need to create the DenseMap every time). Reviewers: amccarth, clayborg, JDevlieghere, aleksandr.urakov Subscribers: jasonmolenda, lldb-commits, markmentovai, mgorny Differential Revision: https://reviews.llvm.org/D61003 llvm-svn: 359073
- Loading branch information
Showing
6 changed files
with
205 additions
and
96 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
//===-- PostfixExpression.cpp -----------------------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This file implements support for postfix expressions found in several symbol | ||
// file formats, and their conversion to DWARF. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "lldb/Symbol/PostfixExpression.h" | ||
#include "llvm/ADT/StringExtras.h" | ||
|
||
using namespace lldb_private; | ||
using namespace lldb_private::postfix; | ||
|
||
static llvm::Optional<BinaryOpNode::OpType> | ||
GetBinaryOpType(llvm::StringRef token) { | ||
if (token.size() != 1) | ||
return llvm::None; | ||
switch (token[0]) { | ||
case '@': | ||
return BinaryOpNode::Align; | ||
case '-': | ||
return BinaryOpNode::Minus; | ||
case '+': | ||
return BinaryOpNode::Plus; | ||
} | ||
return llvm::None; | ||
} | ||
|
||
static llvm::Optional<UnaryOpNode::OpType> | ||
GetUnaryOpType(llvm::StringRef token) { | ||
if (token == "^") | ||
return UnaryOpNode::Deref; | ||
return llvm::None; | ||
} | ||
|
||
Node *postfix::Parse(llvm::StringRef expr, llvm::BumpPtrAllocator &alloc) { | ||
llvm::SmallVector<Node *, 4> stack; | ||
|
||
llvm::StringRef token; | ||
while (std::tie(token, expr) = getToken(expr), !token.empty()) { | ||
if (auto op_type = GetBinaryOpType(token)) { | ||
// token is binary operator | ||
if (stack.size() < 2) | ||
return nullptr; | ||
|
||
Node *right = stack.pop_back_val(); | ||
Node *left = stack.pop_back_val(); | ||
stack.push_back(MakeNode<BinaryOpNode>(alloc, *op_type, *left, *right)); | ||
continue; | ||
} | ||
|
||
if (auto op_type = GetUnaryOpType(token)) { | ||
// token is unary operator | ||
if (stack.empty()) | ||
return nullptr; | ||
|
||
Node *operand = stack.pop_back_val(); | ||
stack.push_back(MakeNode<UnaryOpNode>(alloc, *op_type, *operand)); | ||
continue; | ||
} | ||
|
||
uint32_t value; | ||
if (to_integer(token, value, 10)) { | ||
// token is integer literal | ||
stack.push_back(MakeNode<IntegerNode>(alloc, value)); | ||
continue; | ||
} | ||
|
||
stack.push_back(MakeNode<SymbolNode>(alloc, token)); | ||
} | ||
|
||
if (stack.size() != 1) | ||
return nullptr; | ||
|
||
return stack.back(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
//===-- PostfixExpressionTest.cpp -------------------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "lldb/Symbol/PostfixExpression.h" | ||
#include "llvm/Support/FormatVariadic.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
#include "gtest/gtest.h" | ||
|
||
using namespace lldb_private; | ||
using namespace lldb_private::postfix; | ||
|
||
static std::string ToString(BinaryOpNode::OpType type) { | ||
switch (type) { | ||
case BinaryOpNode::Align: | ||
return "@"; | ||
case BinaryOpNode::Minus: | ||
return "-"; | ||
case BinaryOpNode::Plus: | ||
return "+"; | ||
} | ||
llvm_unreachable("Fully covered switch!"); | ||
} | ||
|
||
static std::string ToString(UnaryOpNode::OpType type) { | ||
switch (type) { | ||
case UnaryOpNode::Deref: | ||
return "^"; | ||
} | ||
llvm_unreachable("Fully covered switch!"); | ||
} | ||
|
||
struct ASTPrinter : public Visitor<std::string> { | ||
protected: | ||
std::string Visit(BinaryOpNode &binary, Node *&) override { | ||
return llvm::formatv("{0}({1}, {2})", ToString(binary.GetOpType()), | ||
Dispatch(binary.Left()), Dispatch(binary.Right())); | ||
} | ||
|
||
std::string Visit(IntegerNode &integer, Node *&) override { | ||
return llvm::formatv("int({0})", integer.GetValue()); | ||
} | ||
|
||
std::string Visit(RegisterNode ®, Node *&) override { | ||
return llvm::formatv("reg({0})", reg.GetRegNum()); | ||
} | ||
|
||
std::string Visit(SymbolNode &symbol, Node *&) override { | ||
return symbol.GetName(); | ||
} | ||
|
||
std::string Visit(UnaryOpNode &unary, Node *&) override { | ||
return llvm::formatv("{0}({1})", ToString(unary.GetOpType()), | ||
Dispatch(unary.Operand())); | ||
} | ||
|
||
public: | ||
static std::string Print(Node *node) { | ||
if (node) | ||
return ASTPrinter().Dispatch(node); | ||
return "nullptr"; | ||
} | ||
}; | ||
|
||
static std::string ParseAndStringify(llvm::StringRef expr) { | ||
llvm::BumpPtrAllocator alloc; | ||
return ASTPrinter::Print(Parse(expr, alloc)); | ||
} | ||
|
||
TEST(PostfixExpression, Parse) { | ||
EXPECT_EQ("int(47)", ParseAndStringify("47")); | ||
EXPECT_EQ("$foo", ParseAndStringify("$foo")); | ||
EXPECT_EQ("+(int(1), int(2))", ParseAndStringify("1 2 +")); | ||
EXPECT_EQ("-(int(1), int(2))", ParseAndStringify("1 2 -")); | ||
EXPECT_EQ("@(int(1), int(2))", ParseAndStringify("1 2 @")); | ||
EXPECT_EQ("+(int(1), +(int(2), int(3)))", ParseAndStringify("1 2 3 + +")); | ||
EXPECT_EQ("+(+(int(1), int(2)), int(3))", ParseAndStringify("1 2 + 3 +")); | ||
EXPECT_EQ("^(int(1))", ParseAndStringify("1 ^")); | ||
EXPECT_EQ("^(^(int(1)))", ParseAndStringify("1 ^ ^")); | ||
EXPECT_EQ("^(+(int(1), ^(int(2))))", ParseAndStringify("1 2 ^ + ^")); | ||
EXPECT_EQ("-($foo, int(47))", ParseAndStringify("$foo 47 -")); | ||
|
||
EXPECT_EQ("nullptr", ParseAndStringify("+")); | ||
EXPECT_EQ("nullptr", ParseAndStringify("^")); | ||
EXPECT_EQ("nullptr", ParseAndStringify("1 +")); | ||
EXPECT_EQ("nullptr", ParseAndStringify("1 2 ^")); | ||
EXPECT_EQ("nullptr", ParseAndStringify("1 2 3 +")); | ||
EXPECT_EQ("nullptr", ParseAndStringify("^ 1")); | ||
EXPECT_EQ("nullptr", ParseAndStringify("+ 1 2")); | ||
EXPECT_EQ("nullptr", ParseAndStringify("1 + 2")); | ||
EXPECT_EQ("nullptr", ParseAndStringify("1 2")); | ||
EXPECT_EQ("nullptr", ParseAndStringify("")); | ||
} |