Skip to content

Commit

Permalink
[BinaryFormat] Add MessagePack reader/writer
Browse files Browse the repository at this point in the history
Add support for reading and writing MessagePack, a binary object serialization
format which aims to be more compact than text formats like JSON or YAML.

The specification can be found at
https://github.com/msgpack/msgpack/blob/master/spec.md

Will be used for encoding metadata in AMDGPU code objects.

Differential Revision: https://reviews.llvm.org/D44429

llvm-svn: 340457
  • Loading branch information
scott-linder committed Aug 22, 2018
1 parent f3c39a7 commit 20f9cd8
Show file tree
Hide file tree
Showing 10 changed files with 2,361 additions and 0 deletions.
108 changes: 108 additions & 0 deletions llvm/include/llvm/BinaryFormat/MsgPack.def
@@ -0,0 +1,108 @@
//===- MsgPack.def - MessagePack definitions --------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Macros for running through MessagePack enumerators.
///
//===----------------------------------------------------------------------===//

#if !( \
defined HANDLE_MP_FIRST_BYTE || defined HANDLE_MP_FIX_BITS || \
defined HANDLE_MP_FIX_BITS_MASK || defined HANDLE_MP_FIX_MAX || \
defined HANDLE_MP_FIX_LEN || defined HANDLE_MP_FIX_MIN)
#error "Missing macro definition of HANDLE_MP*"
#endif

#ifndef HANDLE_MP_FIRST_BYTE
#define HANDLE_MP_FIRST_BYTE(ID, NAME)
#endif

#ifndef HANDLE_MP_FIX_BITS
#define HANDLE_MP_FIX_BITS(ID, NAME)
#endif

#ifndef HANDLE_MP_FIX_BITS_MASK
#define HANDLE_MP_FIX_BITS_MASK(ID, NAME)
#endif

#ifndef HANDLE_MP_FIX_MAX
#define HANDLE_MP_FIX_MAX(ID, NAME)
#endif

#ifndef HANDLE_MP_FIX_LEN
#define HANDLE_MP_FIX_LEN(ID, NAME)
#endif

#ifndef HANDLE_MP_FIX_MIN
#define HANDLE_MP_FIX_MIN(ID, NAME)
#endif

HANDLE_MP_FIRST_BYTE(0xc0, Nil)
HANDLE_MP_FIRST_BYTE(0xc2, False)
HANDLE_MP_FIRST_BYTE(0xc3, True)
HANDLE_MP_FIRST_BYTE(0xc4, Bin8)
HANDLE_MP_FIRST_BYTE(0xc5, Bin16)
HANDLE_MP_FIRST_BYTE(0xc6, Bin32)
HANDLE_MP_FIRST_BYTE(0xc7, Ext8)
HANDLE_MP_FIRST_BYTE(0xc8, Ext16)
HANDLE_MP_FIRST_BYTE(0xc9, Ext32)
HANDLE_MP_FIRST_BYTE(0xca, Float32)
HANDLE_MP_FIRST_BYTE(0xcb, Float64)
HANDLE_MP_FIRST_BYTE(0xcc, UInt8)
HANDLE_MP_FIRST_BYTE(0xcd, UInt16)
HANDLE_MP_FIRST_BYTE(0xce, UInt32)
HANDLE_MP_FIRST_BYTE(0xcf, UInt64)
HANDLE_MP_FIRST_BYTE(0xd0, Int8)
HANDLE_MP_FIRST_BYTE(0xd1, Int16)
HANDLE_MP_FIRST_BYTE(0xd2, Int32)
HANDLE_MP_FIRST_BYTE(0xd3, Int64)
HANDLE_MP_FIRST_BYTE(0xd4, FixExt1)
HANDLE_MP_FIRST_BYTE(0xd5, FixExt2)
HANDLE_MP_FIRST_BYTE(0xd6, FixExt4)
HANDLE_MP_FIRST_BYTE(0xd7, FixExt8)
HANDLE_MP_FIRST_BYTE(0xd8, FixExt16)
HANDLE_MP_FIRST_BYTE(0xd9, Str8)
HANDLE_MP_FIRST_BYTE(0xda, Str16)
HANDLE_MP_FIRST_BYTE(0xdb, Str32)
HANDLE_MP_FIRST_BYTE(0xdc, Array16)
HANDLE_MP_FIRST_BYTE(0xdd, Array32)
HANDLE_MP_FIRST_BYTE(0xde, Map16)
HANDLE_MP_FIRST_BYTE(0xdf, Map32)

HANDLE_MP_FIX_BITS(0x00, PositiveInt)
HANDLE_MP_FIX_BITS(0x80, Map)
HANDLE_MP_FIX_BITS(0x90, Array)
HANDLE_MP_FIX_BITS(0xa0, String)
HANDLE_MP_FIX_BITS(0xe0, NegativeInt)

HANDLE_MP_FIX_BITS_MASK(0x80, PositiveInt)
HANDLE_MP_FIX_BITS_MASK(0xf0, Map)
HANDLE_MP_FIX_BITS_MASK(0xf0, Array)
HANDLE_MP_FIX_BITS_MASK(0xe0, String)
HANDLE_MP_FIX_BITS_MASK(0xe0, NegativeInt)

HANDLE_MP_FIX_MAX(0x7f, PositiveInt)
HANDLE_MP_FIX_MAX(0x0f, Map)
HANDLE_MP_FIX_MAX(0x0f, Array)
HANDLE_MP_FIX_MAX(0x1f, String)

HANDLE_MP_FIX_LEN(0x01, Ext1)
HANDLE_MP_FIX_LEN(0x02, Ext2)
HANDLE_MP_FIX_LEN(0x04, Ext4)
HANDLE_MP_FIX_LEN(0x08, Ext8)
HANDLE_MP_FIX_LEN(0x10, Ext16)

HANDLE_MP_FIX_MIN(-0x20, NegativeInt)

#undef HANDLE_MP_FIRST_BYTE
#undef HANDLE_MP_FIX_BITS
#undef HANDLE_MP_FIX_BITS_MASK
#undef HANDLE_MP_FIX_MAX
#undef HANDLE_MP_FIX_LEN
#undef HANDLE_MP_FIX_MIN
93 changes: 93 additions & 0 deletions llvm/include/llvm/BinaryFormat/MsgPack.h
@@ -0,0 +1,93 @@
//===-- MsgPack.h - MessagePack Constants -----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains constants used for implementing MessagePack support.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_BINARYFORMAT_MSGPACK_H
#define LLVM_BINARYFORMAT_MSGPACK_H

#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Endian.h"

namespace llvm {
namespace msgpack {

/// The endianness of all multi-byte encoded values in MessagePack.
constexpr support::endianness Endianness = support::big;

/// The first byte identifiers of MessagePack object formats.
namespace FirstByte {
#define HANDLE_MP_FIRST_BYTE(ID, NAME) constexpr uint8_t NAME = ID;
#include "llvm/BinaryFormat/MsgPack.def"
}

/// Most significant bits used to identify "Fix" variants in MessagePack.
///
/// For example, FixStr objects encode their size in the five least significant
/// bits of their first byte, which is identified by the bit pattern "101" in
/// the three most significant bits. So FixBits::String contains 0b10100000.
///
/// A corresponding mask of the bit pattern is found in \c FixBitsMask.
namespace FixBits {
#define HANDLE_MP_FIX_BITS(ID, NAME) constexpr uint8_t NAME = ID;
#include "llvm/BinaryFormat/MsgPack.def"
}

/// Mask of bits used to identify "Fix" variants in MessagePack.
///
/// For example, FixStr objects encode their size in the five least significant
/// bits of their first byte, which is identified by the bit pattern "101" in
/// the three most significant bits. So FixBitsMask::String contains
/// 0b11100000.
///
/// The corresponding bit pattern to mask for is found in FixBits.
namespace FixBitsMask {
#define HANDLE_MP_FIX_BITS_MASK(ID, NAME) constexpr uint8_t NAME = ID;
#include "llvm/BinaryFormat/MsgPack.def"
}

/// The maximum value or size encodable in "Fix" variants of formats.
///
/// For example, FixStr objects encode their size in the five least significant
/// bits of their first byte, so the largest encodable size is 0b00011111.
namespace FixMax {
#define HANDLE_MP_FIX_MAX(ID, NAME) constexpr uint8_t NAME = ID;
#include "llvm/BinaryFormat/MsgPack.def"
}

/// The exact size encodable in "Fix" variants of formats.
///
/// The only objects for which an exact size makes sense are of Extension type.
///
/// For example, FixExt4 stores an extension type containing exactly four bytes.
namespace FixLen {
#define HANDLE_MP_FIX_LEN(ID, NAME) constexpr uint8_t NAME = ID;
#include "llvm/BinaryFormat/MsgPack.def"
}

/// The minimum value or size encodable in "Fix" variants of formats.
///
/// The only object for which a minimum makes sense is a negative FixNum.
///
/// Negative FixNum objects encode their signed integer value in one byte, but
/// they must have the pattern "111" as their three most significant bits. This
/// means all values are negative, and the smallest representable value is
/// 0b11100000.
namespace FixMin {
#define HANDLE_MP_FIX_MIN(ID, NAME) constexpr int8_t NAME = ID;
#include "llvm/BinaryFormat/MsgPack.def"
}

} // end namespace msgpack
} // end namespace llvm

#endif // LLVM_BINARYFORMAT_MSGPACK_H
148 changes: 148 additions & 0 deletions llvm/include/llvm/BinaryFormat/MsgPackReader.h
@@ -0,0 +1,148 @@
//===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This is a MessagePack reader.
///
/// See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
/// standard.
///
/// Typical usage:
/// \code
/// StringRef input = GetInput();
/// msgpack::Reader MPReader(input);
/// msgpack::Object Obj;
///
/// while (MPReader.read(Obj)) {
/// switch (Obj.Kind) {
/// case msgpack::Type::Int:
// // Use Obj.Int
/// break;
/// // ...
/// }
/// }
/// \endcode
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_MSGPACKREADER_H
#define LLVM_SUPPORT_MSGPACKREADER_H

#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>

namespace llvm {
namespace msgpack {

/// MessagePack types as defined in the standard, with the exception of Integer
/// being divided into a signed Int and unsigned UInt variant in order to map
/// directly to C++ types.
///
/// The types map onto corresponding union members of the \c Object struct.
enum class Type : uint8_t {
Int,
UInt,
Nil,
Boolean,
Float,
String,
Binary,
Array,
Map,
Extension,
};

/// Extension types are composed of a user-defined type ID and an uninterpreted
/// sequence of bytes.
struct ExtensionType {
/// User-defined extension type.
int8_t Type;
/// Raw bytes of the extension object.
StringRef Bytes;
};

/// MessagePack object, represented as a tagged union of C++ types.
///
/// All types except \c Type::Nil (which has only one value, and so is
/// completely represented by the \c Kind itself) map to a exactly one union
/// member.
struct Object {
Type Kind;
union {
/// Value for \c Type::Int.
int64_t Int;
/// Value for \c Type::Uint.
uint64_t UInt;
/// Value for \c Type::Boolean.
bool Bool;
/// Value for \c Type::Float.
double Float;
/// Value for \c Type::String and \c Type::Binary.
StringRef Raw;
/// Value for \c Type::Array and \c Type::Map.
size_t Length;
/// Value for \c Type::Extension.
ExtensionType Extension;
};

Object() : Kind(Type::Int), Int(0) {}
};

/// Reads MessagePack objects from memory, one at a time.
class Reader {
public:
/// Construct a reader, keeping a reference to the \p InputBuffer.
Reader(MemoryBufferRef InputBuffer);
/// Construct a reader, keeping a reference to the \p Input.
Reader(StringRef Input);

Reader(const Reader &) = delete;
Reader &operator=(const Reader &) = delete;

/// Read one object from the input buffer, advancing past it.
///
/// The \p Obj is updated with the kind of the object read, and the
/// corresponding union member is updated.
///
/// For the collection objects (Array and Map), only the length is read, and
/// the caller must make and additional \c N calls (in the case of Array) or
/// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection
/// elements.
///
/// \param [out] Obj filled with next object on success.
///
/// \returns true when object successfully read, false when at end of
/// input (and so \p Obj was not updated), otherwise an error.
Expected<bool> read(Object &Obj);

private:
MemoryBufferRef InputBuffer;
StringRef::iterator Current;
StringRef::iterator End;

size_t remainingSpace() {
// The rest of the code maintains the invariant that End >= Current, so
// that this cast is always defined behavior.
return static_cast<size_t>(End - Current);
}

template <class T> Expected<bool> readRaw(Object &Obj);
template <class T> Expected<bool> readInt(Object &Obj);
template <class T> Expected<bool> readUInt(Object &Obj);
template <class T> Expected<bool> readLength(Object &Obj);
template <class T> Expected<bool> readExt(Object &Obj);
Expected<bool> createRaw(Object &Obj, uint32_t Size);
Expected<bool> createExt(Object &Obj, uint32_t Size);
};

} // end namespace msgpack
} // end namespace llvm

#endif // LLVM_SUPPORT_MSGPACKREADER_H

0 comments on commit 20f9cd8

Please sign in to comment.