Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[BinaryFormat] Add MessagePack reader/writer
Add support for reading and writing MessagePack, a binary object serialization format which aims to be more compact than text formats like JSON or YAML. The specification can be found at https://github.com/msgpack/msgpack/blob/master/spec.md Will be used for encoding metadata in AMDGPU code objects. Differential Revision: https://reviews.llvm.org/D44429 llvm-svn: 340457
- Loading branch information
1 parent
f3c39a7
commit 20f9cd8
Showing
10 changed files
with
2,361 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
//===- MsgPack.def - MessagePack definitions --------------------*- C++ -*-===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
/// | ||
/// \file | ||
/// Macros for running through MessagePack enumerators. | ||
/// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#if !( \ | ||
defined HANDLE_MP_FIRST_BYTE || defined HANDLE_MP_FIX_BITS || \ | ||
defined HANDLE_MP_FIX_BITS_MASK || defined HANDLE_MP_FIX_MAX || \ | ||
defined HANDLE_MP_FIX_LEN || defined HANDLE_MP_FIX_MIN) | ||
#error "Missing macro definition of HANDLE_MP*" | ||
#endif | ||
|
||
#ifndef HANDLE_MP_FIRST_BYTE | ||
#define HANDLE_MP_FIRST_BYTE(ID, NAME) | ||
#endif | ||
|
||
#ifndef HANDLE_MP_FIX_BITS | ||
#define HANDLE_MP_FIX_BITS(ID, NAME) | ||
#endif | ||
|
||
#ifndef HANDLE_MP_FIX_BITS_MASK | ||
#define HANDLE_MP_FIX_BITS_MASK(ID, NAME) | ||
#endif | ||
|
||
#ifndef HANDLE_MP_FIX_MAX | ||
#define HANDLE_MP_FIX_MAX(ID, NAME) | ||
#endif | ||
|
||
#ifndef HANDLE_MP_FIX_LEN | ||
#define HANDLE_MP_FIX_LEN(ID, NAME) | ||
#endif | ||
|
||
#ifndef HANDLE_MP_FIX_MIN | ||
#define HANDLE_MP_FIX_MIN(ID, NAME) | ||
#endif | ||
|
||
HANDLE_MP_FIRST_BYTE(0xc0, Nil) | ||
HANDLE_MP_FIRST_BYTE(0xc2, False) | ||
HANDLE_MP_FIRST_BYTE(0xc3, True) | ||
HANDLE_MP_FIRST_BYTE(0xc4, Bin8) | ||
HANDLE_MP_FIRST_BYTE(0xc5, Bin16) | ||
HANDLE_MP_FIRST_BYTE(0xc6, Bin32) | ||
HANDLE_MP_FIRST_BYTE(0xc7, Ext8) | ||
HANDLE_MP_FIRST_BYTE(0xc8, Ext16) | ||
HANDLE_MP_FIRST_BYTE(0xc9, Ext32) | ||
HANDLE_MP_FIRST_BYTE(0xca, Float32) | ||
HANDLE_MP_FIRST_BYTE(0xcb, Float64) | ||
HANDLE_MP_FIRST_BYTE(0xcc, UInt8) | ||
HANDLE_MP_FIRST_BYTE(0xcd, UInt16) | ||
HANDLE_MP_FIRST_BYTE(0xce, UInt32) | ||
HANDLE_MP_FIRST_BYTE(0xcf, UInt64) | ||
HANDLE_MP_FIRST_BYTE(0xd0, Int8) | ||
HANDLE_MP_FIRST_BYTE(0xd1, Int16) | ||
HANDLE_MP_FIRST_BYTE(0xd2, Int32) | ||
HANDLE_MP_FIRST_BYTE(0xd3, Int64) | ||
HANDLE_MP_FIRST_BYTE(0xd4, FixExt1) | ||
HANDLE_MP_FIRST_BYTE(0xd5, FixExt2) | ||
HANDLE_MP_FIRST_BYTE(0xd6, FixExt4) | ||
HANDLE_MP_FIRST_BYTE(0xd7, FixExt8) | ||
HANDLE_MP_FIRST_BYTE(0xd8, FixExt16) | ||
HANDLE_MP_FIRST_BYTE(0xd9, Str8) | ||
HANDLE_MP_FIRST_BYTE(0xda, Str16) | ||
HANDLE_MP_FIRST_BYTE(0xdb, Str32) | ||
HANDLE_MP_FIRST_BYTE(0xdc, Array16) | ||
HANDLE_MP_FIRST_BYTE(0xdd, Array32) | ||
HANDLE_MP_FIRST_BYTE(0xde, Map16) | ||
HANDLE_MP_FIRST_BYTE(0xdf, Map32) | ||
|
||
HANDLE_MP_FIX_BITS(0x00, PositiveInt) | ||
HANDLE_MP_FIX_BITS(0x80, Map) | ||
HANDLE_MP_FIX_BITS(0x90, Array) | ||
HANDLE_MP_FIX_BITS(0xa0, String) | ||
HANDLE_MP_FIX_BITS(0xe0, NegativeInt) | ||
|
||
HANDLE_MP_FIX_BITS_MASK(0x80, PositiveInt) | ||
HANDLE_MP_FIX_BITS_MASK(0xf0, Map) | ||
HANDLE_MP_FIX_BITS_MASK(0xf0, Array) | ||
HANDLE_MP_FIX_BITS_MASK(0xe0, String) | ||
HANDLE_MP_FIX_BITS_MASK(0xe0, NegativeInt) | ||
|
||
HANDLE_MP_FIX_MAX(0x7f, PositiveInt) | ||
HANDLE_MP_FIX_MAX(0x0f, Map) | ||
HANDLE_MP_FIX_MAX(0x0f, Array) | ||
HANDLE_MP_FIX_MAX(0x1f, String) | ||
|
||
HANDLE_MP_FIX_LEN(0x01, Ext1) | ||
HANDLE_MP_FIX_LEN(0x02, Ext2) | ||
HANDLE_MP_FIX_LEN(0x04, Ext4) | ||
HANDLE_MP_FIX_LEN(0x08, Ext8) | ||
HANDLE_MP_FIX_LEN(0x10, Ext16) | ||
|
||
HANDLE_MP_FIX_MIN(-0x20, NegativeInt) | ||
|
||
#undef HANDLE_MP_FIRST_BYTE | ||
#undef HANDLE_MP_FIX_BITS | ||
#undef HANDLE_MP_FIX_BITS_MASK | ||
#undef HANDLE_MP_FIX_MAX | ||
#undef HANDLE_MP_FIX_LEN | ||
#undef HANDLE_MP_FIX_MIN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
//===-- MsgPack.h - MessagePack Constants -----------------------*- C++ -*-===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
/// | ||
/// \file | ||
/// This file contains constants used for implementing MessagePack support. | ||
/// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_BINARYFORMAT_MSGPACK_H | ||
#define LLVM_BINARYFORMAT_MSGPACK_H | ||
|
||
#include "llvm/Support/DataTypes.h" | ||
#include "llvm/Support/Endian.h" | ||
|
||
namespace llvm { | ||
namespace msgpack { | ||
|
||
/// The endianness of all multi-byte encoded values in MessagePack. | ||
constexpr support::endianness Endianness = support::big; | ||
|
||
/// The first byte identifiers of MessagePack object formats. | ||
namespace FirstByte { | ||
#define HANDLE_MP_FIRST_BYTE(ID, NAME) constexpr uint8_t NAME = ID; | ||
#include "llvm/BinaryFormat/MsgPack.def" | ||
} | ||
|
||
/// Most significant bits used to identify "Fix" variants in MessagePack. | ||
/// | ||
/// For example, FixStr objects encode their size in the five least significant | ||
/// bits of their first byte, which is identified by the bit pattern "101" in | ||
/// the three most significant bits. So FixBits::String contains 0b10100000. | ||
/// | ||
/// A corresponding mask of the bit pattern is found in \c FixBitsMask. | ||
namespace FixBits { | ||
#define HANDLE_MP_FIX_BITS(ID, NAME) constexpr uint8_t NAME = ID; | ||
#include "llvm/BinaryFormat/MsgPack.def" | ||
} | ||
|
||
/// Mask of bits used to identify "Fix" variants in MessagePack. | ||
/// | ||
/// For example, FixStr objects encode their size in the five least significant | ||
/// bits of their first byte, which is identified by the bit pattern "101" in | ||
/// the three most significant bits. So FixBitsMask::String contains | ||
/// 0b11100000. | ||
/// | ||
/// The corresponding bit pattern to mask for is found in FixBits. | ||
namespace FixBitsMask { | ||
#define HANDLE_MP_FIX_BITS_MASK(ID, NAME) constexpr uint8_t NAME = ID; | ||
#include "llvm/BinaryFormat/MsgPack.def" | ||
} | ||
|
||
/// The maximum value or size encodable in "Fix" variants of formats. | ||
/// | ||
/// For example, FixStr objects encode their size in the five least significant | ||
/// bits of their first byte, so the largest encodable size is 0b00011111. | ||
namespace FixMax { | ||
#define HANDLE_MP_FIX_MAX(ID, NAME) constexpr uint8_t NAME = ID; | ||
#include "llvm/BinaryFormat/MsgPack.def" | ||
} | ||
|
||
/// The exact size encodable in "Fix" variants of formats. | ||
/// | ||
/// The only objects for which an exact size makes sense are of Extension type. | ||
/// | ||
/// For example, FixExt4 stores an extension type containing exactly four bytes. | ||
namespace FixLen { | ||
#define HANDLE_MP_FIX_LEN(ID, NAME) constexpr uint8_t NAME = ID; | ||
#include "llvm/BinaryFormat/MsgPack.def" | ||
} | ||
|
||
/// The minimum value or size encodable in "Fix" variants of formats. | ||
/// | ||
/// The only object for which a minimum makes sense is a negative FixNum. | ||
/// | ||
/// Negative FixNum objects encode their signed integer value in one byte, but | ||
/// they must have the pattern "111" as their three most significant bits. This | ||
/// means all values are negative, and the smallest representable value is | ||
/// 0b11100000. | ||
namespace FixMin { | ||
#define HANDLE_MP_FIX_MIN(ID, NAME) constexpr int8_t NAME = ID; | ||
#include "llvm/BinaryFormat/MsgPack.def" | ||
} | ||
|
||
} // end namespace msgpack | ||
} // end namespace llvm | ||
|
||
#endif // LLVM_BINARYFORMAT_MSGPACK_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
//===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
/// | ||
/// \file | ||
/// This is a MessagePack reader. | ||
/// | ||
/// See https://github.com/msgpack/msgpack/blob/master/spec.md for the full | ||
/// standard. | ||
/// | ||
/// Typical usage: | ||
/// \code | ||
/// StringRef input = GetInput(); | ||
/// msgpack::Reader MPReader(input); | ||
/// msgpack::Object Obj; | ||
/// | ||
/// while (MPReader.read(Obj)) { | ||
/// switch (Obj.Kind) { | ||
/// case msgpack::Type::Int: | ||
// // Use Obj.Int | ||
/// break; | ||
/// // ... | ||
/// } | ||
/// } | ||
/// \endcode | ||
/// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_SUPPORT_MSGPACKREADER_H | ||
#define LLVM_SUPPORT_MSGPACKREADER_H | ||
|
||
#include "llvm/Support/MemoryBuffer.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
#include <cstdint> | ||
|
||
namespace llvm { | ||
namespace msgpack { | ||
|
||
/// MessagePack types as defined in the standard, with the exception of Integer | ||
/// being divided into a signed Int and unsigned UInt variant in order to map | ||
/// directly to C++ types. | ||
/// | ||
/// The types map onto corresponding union members of the \c Object struct. | ||
enum class Type : uint8_t { | ||
Int, | ||
UInt, | ||
Nil, | ||
Boolean, | ||
Float, | ||
String, | ||
Binary, | ||
Array, | ||
Map, | ||
Extension, | ||
}; | ||
|
||
/// Extension types are composed of a user-defined type ID and an uninterpreted | ||
/// sequence of bytes. | ||
struct ExtensionType { | ||
/// User-defined extension type. | ||
int8_t Type; | ||
/// Raw bytes of the extension object. | ||
StringRef Bytes; | ||
}; | ||
|
||
/// MessagePack object, represented as a tagged union of C++ types. | ||
/// | ||
/// All types except \c Type::Nil (which has only one value, and so is | ||
/// completely represented by the \c Kind itself) map to a exactly one union | ||
/// member. | ||
struct Object { | ||
Type Kind; | ||
union { | ||
/// Value for \c Type::Int. | ||
int64_t Int; | ||
/// Value for \c Type::Uint. | ||
uint64_t UInt; | ||
/// Value for \c Type::Boolean. | ||
bool Bool; | ||
/// Value for \c Type::Float. | ||
double Float; | ||
/// Value for \c Type::String and \c Type::Binary. | ||
StringRef Raw; | ||
/// Value for \c Type::Array and \c Type::Map. | ||
size_t Length; | ||
/// Value for \c Type::Extension. | ||
ExtensionType Extension; | ||
}; | ||
|
||
Object() : Kind(Type::Int), Int(0) {} | ||
}; | ||
|
||
/// Reads MessagePack objects from memory, one at a time. | ||
class Reader { | ||
public: | ||
/// Construct a reader, keeping a reference to the \p InputBuffer. | ||
Reader(MemoryBufferRef InputBuffer); | ||
/// Construct a reader, keeping a reference to the \p Input. | ||
Reader(StringRef Input); | ||
|
||
Reader(const Reader &) = delete; | ||
Reader &operator=(const Reader &) = delete; | ||
|
||
/// Read one object from the input buffer, advancing past it. | ||
/// | ||
/// The \p Obj is updated with the kind of the object read, and the | ||
/// corresponding union member is updated. | ||
/// | ||
/// For the collection objects (Array and Map), only the length is read, and | ||
/// the caller must make and additional \c N calls (in the case of Array) or | ||
/// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection | ||
/// elements. | ||
/// | ||
/// \param [out] Obj filled with next object on success. | ||
/// | ||
/// \returns true when object successfully read, false when at end of | ||
/// input (and so \p Obj was not updated), otherwise an error. | ||
Expected<bool> read(Object &Obj); | ||
|
||
private: | ||
MemoryBufferRef InputBuffer; | ||
StringRef::iterator Current; | ||
StringRef::iterator End; | ||
|
||
size_t remainingSpace() { | ||
// The rest of the code maintains the invariant that End >= Current, so | ||
// that this cast is always defined behavior. | ||
return static_cast<size_t>(End - Current); | ||
} | ||
|
||
template <class T> Expected<bool> readRaw(Object &Obj); | ||
template <class T> Expected<bool> readInt(Object &Obj); | ||
template <class T> Expected<bool> readUInt(Object &Obj); | ||
template <class T> Expected<bool> readLength(Object &Obj); | ||
template <class T> Expected<bool> readExt(Object &Obj); | ||
Expected<bool> createRaw(Object &Obj, uint32_t Size); | ||
Expected<bool> createExt(Object &Obj, uint32_t Size); | ||
}; | ||
|
||
} // end namespace msgpack | ||
} // end namespace llvm | ||
|
||
#endif // LLVM_SUPPORT_MSGPACKREADER_H |
Oops, something went wrong.