245 changes: 245 additions & 0 deletions llvm/lib/BinaryFormat/MsgPackDocument.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
//===-- MsgPackDocument.cpp - MsgPack Document --------------------------*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// This file implements a class that exposes a simple in-memory representation
/// of a document of MsgPack objects, that can be read from MsgPack, written to
/// MsgPack, and inspected and modified in memory. This is intended to be a
/// lighter-weight (in terms of memory allocations) replacement for
/// MsgPackTypes.
///
//===----------------------------------------------------------------------===//

#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/BinaryFormat/MsgPackWriter.h"

using namespace llvm;
using namespace msgpack;

// Convert this DocNode into an empty array.
void DocNode::convertToArray() { *this = getDocument()->getArrayNode(); }

// Convert this DocNode into an empty map.
void DocNode::convertToMap() { *this = getDocument()->getMapNode(); }

/// Find the key in the MapDocNode.
DocNode::MapTy::iterator MapDocNode::find(StringRef S) {
return find(getDocument()->getNode(S));
}

/// Member access for MapDocNode. The string data must remain valid for the
/// lifetime of the Document.
DocNode &MapDocNode::operator[](StringRef S) {
return (*this)[getDocument()->getNode(S)];
}

/// Member access for MapDocNode.
DocNode &MapDocNode::operator[](DocNode Key) {
assert(!Key.isEmpty());
MapTy::value_type Entry(Key, DocNode());
auto ItAndInserted = Map->insert(Entry);
if (ItAndInserted.second) {
// Ensure a new element has its KindAndDoc initialized.
ItAndInserted.first->second = getDocument()->getNode();
}
return ItAndInserted.first->second;
}

/// Array element access. This extends the array if necessary.
DocNode &ArrayDocNode::operator[](size_t Index) {
if (size() <= Index) {
// Ensure new elements have their KindAndDoc initialized.
Array->resize(Index + 1, getDocument()->getNode());
}
return (*Array)[Index];
}

// A level in the document reading stack.
struct StackLevel {
DocNode Node;
size_t Length;
// Points to map entry when we have just processed a map key.
DocNode *MapEntry;
};

// Read a document from a binary msgpack blob.
// The blob data must remain valid for the lifetime of this Document (because a
// string object in the document contains a StringRef into the original blob).
// If Multi, then this sets root to an array and adds top-level objects to it.
// If !Multi, then it only reads a single top-level object, even if there are
// more, and sets root to that.
// Returns false if failed due to illegal format.
bool Document::readFromBlob(StringRef Blob, bool Multi) {
msgpack::Reader MPReader(Blob);
SmallVector<StackLevel, 4> Stack;
if (Multi) {
// Create the array for multiple top-level objects.
Root = getArrayNode();
Stack.push_back(StackLevel({Root, (size_t)-1, nullptr}));
}
do {
// On to next element (or key if doing a map key next).
// Read the value.
Object Obj;
if (!MPReader.read(Obj)) {
if (Multi && Stack.size() == 1) {
// OK to finish here as we've just done a top-level element with Multi
break;
}
return false; // Finished too early
}
// Convert it into a DocNode.
DocNode Node;
switch (Obj.Kind) {
case Type::Nil:
Node = getNode();
break;
case Type::Int:
Node = getNode(Obj.Int);
break;
case Type::UInt:
Node = getNode(Obj.UInt);
break;
case Type::Boolean:
Node = getNode(Obj.Bool);
break;
case Type::Float:
Node = getNode(Obj.Float);
break;
case Type::String:
Node = getNode(Obj.Raw);
break;
case Type::Map:
Node = getMapNode();
break;
case Type::Array:
Node = getArrayNode();
break;
default:
return false; // Raw and Extension not supported
}

// Store it.
if (Stack.empty())
Root = Node;
else if (Stack.back().Node.getKind() == Type::Array) {
// Reading an array entry.
auto &Array = Stack.back().Node.getArray();
Array.push_back(Node);
} else {
auto &Map = Stack.back().Node.getMap();
if (!Stack.back().MapEntry) {
// Reading a map key.
Stack.back().MapEntry = &Map[Node];
} else {
// Reading the value for the map key read in the last iteration.
*Stack.back().MapEntry = Node;
Stack.back().MapEntry = nullptr;
}
}

// See if we're starting a new array or map.
switch (Node.getKind()) {
case msgpack::Type::Array:
case msgpack::Type::Map:
Stack.push_back(StackLevel({Node, Obj.Length, nullptr}));
break;
default:
break;
}

// Pop finished stack levels.
while (!Stack.empty()) {
if (Stack.back().Node.getKind() == msgpack::Type::Array) {
if (Stack.back().Node.getArray().size() != Stack.back().Length)
break;
} else {
if (Stack.back().MapEntry ||
Stack.back().Node.getMap().size() != Stack.back().Length)
break;
}
Stack.pop_back();
}
} while (!Stack.empty());
return true;
}

struct WriterStackLevel {
DocNode Node;
DocNode::MapTy::iterator MapIt;
DocNode::ArrayTy::iterator ArrayIt;
bool OnKey;
};

/// Write a MsgPack document to a binary MsgPack blob.
void Document::writeToBlob(std::string &Blob) {
Blob.clear();
raw_string_ostream OS(Blob);
msgpack::Writer MPWriter(OS);
SmallVector<WriterStackLevel, 4> Stack;
DocNode Node = getRoot();
for (;;) {
switch (Node.getKind()) {
case Type::Array:
MPWriter.writeArraySize(Node.getArray().size());
Stack.push_back(
{Node, DocNode::MapTy::iterator(), Node.getArray().begin(), false});
break;
case Type::Map:
MPWriter.writeMapSize(Node.getMap().size());
Stack.push_back(
{Node, Node.getMap().begin(), DocNode::ArrayTy::iterator(), true});
break;
case Type::Nil:
MPWriter.writeNil();
break;
case Type::Boolean:
MPWriter.write(Node.getBool());
break;
case Type::Int:
MPWriter.write(Node.getInt());
break;
case Type::UInt:
MPWriter.write(Node.getUInt());
break;
case Type::String:
MPWriter.write(Node.getString());
break;
default:
llvm_unreachable("unhandled msgpack object kind");
}
// Pop finished stack levels.
while (!Stack.empty()) {
if (Stack.back().Node.getKind() == Type::Map) {
if (Stack.back().MapIt != Stack.back().Node.getMap().end())
break;
} else {
if (Stack.back().ArrayIt != Stack.back().Node.getArray().end())
break;
}
Stack.pop_back();
}
if (Stack.empty())
break;
// Get the next value.
if (Stack.back().Node.getKind() == Type::Map) {
if (Stack.back().OnKey) {
// Do the key of a key,value pair in a map.
Node = Stack.back().MapIt->first;
Stack.back().OnKey = false;
} else {
Node = Stack.back().MapIt->second;
++Stack.back().MapIt;
Stack.back().OnKey = true;
}
} else {
Node = *Stack.back().ArrayIt;
++Stack.back().ArrayIt;
}
}
}

249 changes: 249 additions & 0 deletions llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
//===-- MsgPackDocumentYAML.cpp - MsgPack Document YAML interface -------*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// This file implements YAMLIO on a msgpack::Document.
//
//===----------------------------------------------------------------------===//

#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Support/YAMLTraits.h"

using namespace llvm;
using namespace msgpack;

namespace {

// Struct used to represent scalar node. (MapDocNode and ArrayDocNode already
// exist in MsgPackDocument.h.)
struct ScalarDocNode : DocNode {
ScalarDocNode(DocNode N) : DocNode(N) {}

/// Get the YAML tag for this ScalarDocNode. This normally returns ""; it only
/// returns something else if the result of toString would be ambiguous, e.g.
/// a string that parses as a number or boolean.
StringRef getYAMLTag() const;
};

} // namespace

/// Convert this DocNode to a string, assuming it is scalar.
std::string DocNode::toString() const {
std::string S;
raw_string_ostream OS(S);
switch (getKind()) {
case msgpack::Type::String:
OS << Raw;
break;
case msgpack::Type::Nil:
break;
case msgpack::Type::Boolean:
OS << (Bool ? "true" : "false");
break;
case msgpack::Type::Int:
OS << Int;
break;
case msgpack::Type::UInt:
if (getDocument()->getHexMode())
OS << format("%#llx", (unsigned long long)UInt);
else
OS << UInt;
break;
case msgpack::Type::Float:
OS << Float;
break;
default:
llvm_unreachable("not scalar");
break;
}
return S;
}

/// Convert the StringRef and use it to set this DocNode (assuming scalar). If
/// it is a string, copy the string into the Document's strings list so we do
/// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
StringRef DocNode::fromString(StringRef S, StringRef Tag) {
if (Tag == "tag:yaml.org,2002:str")
Tag = "";
if (Tag == "!int" || Tag == "") {
// Try unsigned int then signed int.
*this = getDocument()->getNode(uint64_t(0));
StringRef Err = yaml::ScalarTraits<uint64_t>::input(S, nullptr, getUInt());
if (Err != "") {
*this = getDocument()->getNode(int64_t(0));
Err = yaml::ScalarTraits<int64_t>::input(S, nullptr, getInt());
}
if (Err == "" || Tag != "")
return Err;
}
if (Tag == "!nil") {
*this = getDocument()->getNode();
return "";
}
if (Tag == "!bool" || Tag == "") {
*this = getDocument()->getNode(false);
StringRef Err = yaml::ScalarTraits<bool>::input(S, nullptr, getBool());
if (Err == "" || Tag != "")
return Err;
}
if (Tag == "!float" || Tag == "") {
*this = getDocument()->getNode(0.0);
StringRef Err = yaml::ScalarTraits<double>::input(S, nullptr, getFloat());
if (Err == "" || Tag != "")
return Err;
}
assert((Tag == "!str" || Tag == "") && "unsupported tag");
std::string V;
StringRef Err = yaml::ScalarTraits<std::string>::input(S, nullptr, V);
if (Err == "")
*this = getDocument()->getNode(V, /*Copy=*/true);
return Err;
}

/// Get the YAML tag for this ScalarDocNode. This normally returns ""; it only
/// returns something else if the result of toString would be ambiguous, e.g.
/// a string that parses as a number or boolean.
StringRef ScalarDocNode::getYAMLTag() const {
if (getKind() == msgpack::Type::Nil)
return "!nil";
// Try converting both ways and see if we get the same kind. If not, we need
// a tag.
ScalarDocNode N = getDocument()->getNode();
N.fromString(toString(), "");
if (N.getKind() == getKind())
return "";
// Tolerate signedness of int changing, as tags do not differentiate between
// them anyway.
if (N.getKind() == msgpack::Type::UInt && getKind() == msgpack::Type::Int)
return "";
if (N.getKind() == msgpack::Type::Int && getKind() == msgpack::Type::UInt)
return "";
// We do need a tag.
switch (getKind()) {
case msgpack::Type::String:
return "!str";
case msgpack::Type::Int:
return "!int";
case msgpack::Type::UInt:
return "!int";
case msgpack::Type::Boolean:
return "!bool";
case msgpack::Type::Float:
return "!float";
default:
llvm_unreachable("unrecognized kind");
}
}

namespace llvm {
namespace yaml {

/// YAMLIO for DocNode
template <> struct PolymorphicTraits<DocNode> {

static NodeKind getKind(const DocNode &N) {
switch (N.getKind()) {
case msgpack::Type::Map:
return NodeKind::Map;
case msgpack::Type::Array:
return NodeKind::Sequence;
default:
return NodeKind::Scalar;
}
}

static MapDocNode &getAsMap(DocNode &N) { return N.getMap(/*Convert=*/true); }

static ArrayDocNode &getAsSequence(DocNode &N) {
N.getArray(/*Convert=*/true);
return *static_cast<ArrayDocNode *>(&N);
}

static ScalarDocNode &getAsScalar(DocNode &N) {
return *static_cast<ScalarDocNode *>(&N);
}
};

/// YAMLIO for ScalarDocNode
template <> struct TaggedScalarTraits<ScalarDocNode> {

static void output(const ScalarDocNode &S, void *Ctxt, raw_ostream &OS,
raw_ostream &TagOS) {
TagOS << S.getYAMLTag();
OS << S.toString();
}

static StringRef input(StringRef Str, StringRef Tag, void *Ctxt,
ScalarDocNode &S) {
return S.fromString(Str, Tag);
}

static QuotingType mustQuote(const ScalarDocNode &S, StringRef ScalarStr) {
switch (S.getKind()) {
case Type::Int:
return ScalarTraits<int64_t>::mustQuote(ScalarStr);
case Type::UInt:
return ScalarTraits<uint64_t>::mustQuote(ScalarStr);
case Type::Nil:
return ScalarTraits<StringRef>::mustQuote(ScalarStr);
case Type::Boolean:
return ScalarTraits<bool>::mustQuote(ScalarStr);
case Type::Float:
return ScalarTraits<double>::mustQuote(ScalarStr);
case Type::Binary:
case Type::String:
return ScalarTraits<std::string>::mustQuote(ScalarStr);
default:
llvm_unreachable("unrecognized ScalarKind");
}
}
};

/// YAMLIO for MapDocNode
template <> struct CustomMappingTraits<MapDocNode> {

static void inputOne(IO &IO, StringRef Key, MapDocNode &M) {
ScalarDocNode KeyObj = M.getDocument()->getNode();
KeyObj.fromString(Key, "");
IO.mapRequired(Key.str().c_str(), M.getMap()[KeyObj]);
}

static void output(IO &IO, MapDocNode &M) {
for (auto I : M.getMap()) {
IO.mapRequired(I.first.toString().c_str(), I.second);
}
}
};

/// YAMLIO for ArrayNode
template <> struct SequenceTraits<ArrayDocNode> {

static size_t size(IO &IO, ArrayDocNode &A) { return A.size(); }

static DocNode &element(IO &IO, ArrayDocNode &A, size_t Index) {
return A[Index];
}
};

} // namespace yaml
} // namespace llvm

/// Convert MsgPack Document to YAML text.
void msgpack::Document::toYAML(raw_ostream &OS) {
yaml::Output Yout(OS);
Yout << getRoot();
}

/// Read YAML text into the MsgPack document. Returns false on failure.
bool msgpack::Document::fromYAML(StringRef S) {
clear();
yaml::Input Yin(S);
Yin >> getRoot();
return !Yin.error();
}

1 change: 1 addition & 0 deletions llvm/unittests/BinaryFormat/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(BinaryFormatTests
DwarfTest.cpp
MachOTest.cpp
MsgPackDocumentTest.cpp
MsgPackReaderTest.cpp
MsgPackTypesTest.cpp
MsgPackWriterTest.cpp
Expand Down
168 changes: 168 additions & 0 deletions llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
//===- MsgPackDocumentTest.cpp --------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "gtest/gtest.h"

using namespace llvm;
using namespace msgpack;

TEST(MsgPackDocument, TestReadInt) {
Document Doc;
bool Ok = Doc.readFromBlob(StringRef("\xd0\x00", 2), /*Multi=*/false);
ASSERT_TRUE(Ok);
ASSERT_EQ(Doc.getRoot().getKind(), Type::Int);
ASSERT_EQ(Doc.getRoot().getInt(), 0);
}

TEST(MsgPackDocument, TestReadArray) {
Document Doc;
bool Ok = Doc.readFromBlob(StringRef("\x92\xd0\x01\xc0"), /*Multi=*/false);
ASSERT_TRUE(Ok);
ASSERT_EQ(Doc.getRoot().getKind(), Type::Array);
auto A = Doc.getRoot().getArray();
ASSERT_EQ(A.size(), 2u);
auto SI = A[0];
ASSERT_EQ(SI.getKind(), Type::Int);
ASSERT_EQ(SI.getInt(), 1);
auto SN = A[1];
ASSERT_EQ(SN.getKind(), Type::Nil);
}

TEST(MsgPackDocument, TestReadMap) {
Document Doc;
bool Ok = Doc.readFromBlob(StringRef("\x82\xa3"
"foo"
"\xd0\x01\xa3"
"bar"
"\xd0\x02"),
/*Multi=*/false);
ASSERT_TRUE(Ok);
ASSERT_EQ(Doc.getRoot().getKind(), Type::Map);
auto M = Doc.getRoot().getMap();
ASSERT_EQ(M.size(), 2u);
auto FooS = M["foo"];
ASSERT_EQ(FooS.getKind(), Type::Int);
ASSERT_EQ(FooS.getInt(), 1);
auto BarS = M["bar"];
ASSERT_EQ(BarS.getKind(), Type::Int);
ASSERT_EQ(BarS.getInt(), 2);
}

TEST(MsgPackDocument, TestWriteInt) {
Document Doc;
Doc.getRoot() = Doc.getNode(int64_t(1));
std::string Buffer;
Doc.writeToBlob(Buffer);
ASSERT_EQ(Buffer, "\x01");
}

TEST(MsgPackDocument, TestWriteArray) {
Document Doc;
auto A = Doc.getRoot().getArray(/*Convert=*/true);
A.push_back(Doc.getNode(int64_t(1)));
A.push_back(Doc.getNode());
std::string Buffer;
Doc.writeToBlob(Buffer);
ASSERT_EQ(Buffer, "\x92\x01\xc0");
}

TEST(MsgPackDocument, TestWriteMap) {
Document Doc;
auto M = Doc.getRoot().getMap(/*Convert=*/true);
M["foo"] = Doc.getNode(int64_t(1));
M["bar"] = Doc.getNode(int64_t(2));
std::string Buffer;
Doc.writeToBlob(Buffer);
ASSERT_EQ(Buffer, "\x82\xa3"
"bar"
"\x02\xa3"
"foo"
"\x01");
}

TEST(MsgPackDocument, TestOutputYAMLArray) {
Document Doc;
auto A = Doc.getRoot().getArray(/*Convert=*/true);
A.push_back(Doc.getNode(int64_t(1)));
A.push_back(Doc.getNode(int64_t(2)));
std::string Buffer;
raw_string_ostream OStream(Buffer);
Doc.toYAML(OStream);
ASSERT_EQ(OStream.str(), "---\n- 1\n- 2\n...\n");
}

TEST(MsgPackDocument, TestInputYAMLArray) {
Document Doc;
bool Ok = Doc.fromYAML("---\n- !int 0x1\n- !str 2\n...\n");
ASSERT_TRUE(Ok);
ASSERT_EQ(Doc.getRoot().getKind(), Type::Array);
auto A = Doc.getRoot().getArray();
ASSERT_EQ(A.size(), 2u);
auto SI = A[0];
ASSERT_EQ(SI.getKind(), Type::UInt);
ASSERT_EQ(SI.getUInt(), 1u);
auto SS = A[1];
ASSERT_EQ(SS.getKind(), Type::String);
ASSERT_EQ(SS.getString(), "2");
}

TEST(MsgPackDocument, TestOutputYAMLMap) {
Document Doc;
auto M = Doc.getRoot().getMap(/*Convert=*/true);
M["foo"] = Doc.getNode(int64_t(1));
M["bar"] = Doc.getNode(uint64_t(2));
auto N = Doc.getMapNode();
M["qux"] = N;
N["baz"] = Doc.getNode(true);
std::string Buffer;
raw_string_ostream OStream(Buffer);
Doc.toYAML(OStream);
ASSERT_EQ(OStream.str(), "---\n"
"bar: 2\n"
"foo: 1\n"
"qux: \n"
" baz: true\n"
"...\n");
}

TEST(MsgPackDocument, TestOutputYAMLMapHex) {
Document Doc;
Doc.setHexMode();
auto M = Doc.getRoot().getMap(/*Convert=*/true);
M["foo"] = Doc.getNode(int64_t(1));
M["bar"] = Doc.getNode(uint64_t(2));
auto N = Doc.getMapNode();
M["qux"] = N;
N["baz"] = Doc.getNode(true);
std::string Buffer;
raw_string_ostream OStream(Buffer);
Doc.toYAML(OStream);
ASSERT_EQ(OStream.str(), "---\n"
"bar: 0x2\n"
"foo: 1\n"
"qux: \n"
" baz: true\n"
"...\n");
}

TEST(MsgPackDocument, TestInputYAMLMap) {
Document Doc;
bool Ok = Doc.fromYAML("---\nfoo: !int 0x1\nbaz: !str 2\n...\n");
ASSERT_TRUE(Ok);
ASSERT_EQ(Doc.getRoot().getKind(), Type::Map);
auto M = Doc.getRoot().getMap();
ASSERT_EQ(M.size(), 2u);
auto SI = M["foo"];
ASSERT_EQ(SI.getKind(), Type::UInt);
ASSERT_EQ(SI.getUInt(), 1u);
auto SS = M["baz"];
ASSERT_EQ(SS.getKind(), Type::String);
ASSERT_EQ(SS.getString(), "2");
}