Skip to content
This repository has been archived by the owner on Jul 20, 2021. It is now read-only.

Commit

Permalink
introduce rivbin binary encoder
Browse files Browse the repository at this point in the history
fixes partly #463, taken from tfchain encoding contribution
  • Loading branch information
GlenDC committed Nov 16, 2018
1 parent 18b19ea commit 1a8ca06
Show file tree
Hide file tree
Showing 14 changed files with 2,000 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -8,7 +8,7 @@ run = Test
daemonpkgs = ./cmd/rivined
clientpkgs = ./cmd/rivinec
pkgs = ./build ./modules/gateway $(daemonpkgs) $(clientpkgs)
testpkgs = ./build ./crypto ./pkg/encoding/siabin ./modules ./modules/gateway ./modules/blockcreator ./modules/wallet ./modules/explorer ./modules/consensus ./persist ./cmd/rivinec ./cmd/rivined ./sync ./types ./pkg/cli ./pkg/client ./pkg/daemon
testpkgs = ./build ./crypto ./pkg/encoding/siabin ./pkg/encoding/rivbin ./modules ./modules/gateway ./modules/blockcreator ./modules/wallet ./modules/explorer ./modules/consensus ./persist ./cmd/rivinec ./cmd/rivined ./sync ./types ./pkg/cli ./pkg/client ./pkg/daemon

version = $(shell git describe | cut -d '-' -f 1)
commit = $(shell git rev-parse --short HEAD)
Expand Down
43 changes: 43 additions & 0 deletions doc/encoding/RivineEncoding.md
@@ -0,0 +1,43 @@
# Rivine Binary Encoding

The main goal of the rivine (binary) encoding library is to achieve the smallest byte footprint for encoded content.
This encoding library is heavily inspired upon the initial Sia (binary) encoding library.

## Standard Encoding

All integers are little-endian, encoded as unsigned integers, but the amount of types depend on the exact integral type:

| byte size | types |
| - | - |
| 1 | uint8, int8 |
| 2 | uint16, int16 |
| 3 | uint24<sup>(1)</sup> |
| 4 | uint32, int32 |
| 8 | uint64, int64, uint, int |

> (1) `uint24` is not a standard type, but the tfchain encoding lib does allow to encode uint32 integers that fit in 3 bytes, as 3 bytes.
Booleans are encoded as a single byte, `0x00` for `False` and `0x01` for `True`.

Nil pointers are equivalent to "False", i.e. a single zero byte. Valid pointers are represented by a "True" byte (0x01) followed by the encoding of the dereferenced value.

Variable-length types, such as strings and slices, are represented by a length prefix followed by the encoded value. Strings are encoded as their literal UTF-8 bytes. Slices are encoded as the concatenation of their encoded elements. The length prefix can be one, two, three or four bytes:

| byte size | inclusive size range |
| - | - |
| 1 | 0 - 127 |
| 2 | 128 - 16 383 |
| 3 | 16 384 - 2 097 151 |
| 4 | 2 097 152 - 536 870 911 |

This implies that variable-length types cannot have a size greater than `536 870 911`,
which to be fair is a very big limit for blockchain purposes. Perhaps too big of a limit already,
as it is expected that for most purposes the slice length will fit in a single byte, and the extreme cases in 2 bytes.

Maps are not supported; attempting to encode a map will cause Marshal to panic. This is because their elements are not ordered in a consistent way, and it is imperative that this encoding scheme be deterministic. To encode a map, either convert it to a slice of structs, or define a MarshalSia method (see below).

Arrays and structs are simply the concatenation of their encoded elements (no length prefix is required here as the size is fixed). Byte slices are not subject to the 8-byte integer rule; they are encoded as their literal representation, one byte per byte.

All struct fields must be exported. The ordering of struct fields is determined by their type definition.

Finally, if a type implements the `RivineMarshaler` interface, its `MarshalRivine` method will be used to encode the type. Similarly, if a type implements the `RivineUnmarshaler` interface, its `UnmarshalRivine` method will be used to decode the type. Note that unless a type implements both interfaces, it must conform to the spec above. Otherwise, it may encode and decode itself however desired. This may be an attractive option where speed is critical, since it allows for more compact representations, and bypasses the use of reflection.
253 changes: 253 additions & 0 deletions pkg/encoding/rivbin/decode.go
@@ -0,0 +1,253 @@
package encoding

import (
"bytes"
"errors"
"io"
"reflect"
)

type (
// A RivineUnmarshaler can read and decode itself from a stream.
RivineUnmarshaler interface {
UnmarshalRivine(io.Reader) error
}
)

// Unmarshal decodes the encoded value b and stores it in v, which must be a
// pointer. The decoding rules are the inverse of those specified in the
// package docstring for marshaling.
func Unmarshal(b []byte, v interface{}) error {
r := bytes.NewBuffer(b)
return NewDecoder(r).Decode(v)
}

// UnmarshalAll decodes the encoded values in b and stores them in vs, which
// must be pointers.
func UnmarshalAll(b []byte, vs ...interface{}) error {
dec := NewDecoder(bytes.NewBuffer(b))
return dec.DecodeAll(vs...)
}

// A Decoder reads and decodes values from an input stream.
type Decoder struct {
r io.Reader
}

// NewDecoder returns a new decoder that reads from r.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{r}
}

// Decode reads the next encoded value from its input stream and stores it in
// v, which must be a pointer. The decoding rules are the inverse of those
// specified in the package docstring.
func (d *Decoder) Decode(v interface{}) error {
// v must be a pointer
pval := reflect.ValueOf(v)
if pval.Kind() != reflect.Ptr || pval.IsNil() {
return errBadPointer
}

return d.decode(pval.Elem())
}

var errBadPointer = errors.New("cannot decode into invalid pointer")

// DecodeAll decodes a variable number of arguments.
func (d *Decoder) DecodeAll(vs ...interface{}) error {
var err error
for _, v := range vs {
err = d.Decode(v)
if err != nil {
return err
}
}
return nil
}

// decode reads the next encoded value from its input stream and stores it in
// val. The decoding rules are the inverse of those specified in the package
// docstring.
func (d *Decoder) decode(val reflect.Value) error {
// check for RivineUnmarshaler interface first
if val.CanAddr() && val.Addr().CanInterface() {
ival := val.Addr().Interface()
if u, ok := ival.(RivineUnmarshaler); ok {
err := u.UnmarshalRivine(d.r)
return err
}
}

switch val.Kind() {
case reflect.Ptr:
isDefined, err := UnmarshalBool(d.r)
if err != nil || !isDefined {
return err // nil in case !isDefined
}
return d.decode(val.Elem())

case reflect.Bool:
b, err := UnmarshalBool(d.r)
if err != nil {
return err
}
val.SetBool(b)
return nil

case reflect.Uint8:
x, err := UnmarshalUint8(d.r)
if err != nil {
return err
}
val.SetUint(uint64(x))
return nil

case reflect.Uint32:
x, err := UnmarshalUint32(d.r)
if err != nil {
return err
}
val.SetUint(uint64(x))
return nil

case reflect.Int:
x, err := UnmarshalUint64(d.r)
if err != nil {
return err
}
val.SetInt(int64(x))
return nil

case reflect.Int64:
x, err := UnmarshalUint64(d.r)
if err != nil {
return err
}
val.SetInt(int64(x))
return nil

case reflect.Uint64:
x, err := UnmarshalUint64(d.r)
if err != nil {
return err
}
val.SetUint(x)
return nil

case reflect.Uint:
x, err := UnmarshalUint64(d.r)
if err != nil {
return err
}
val.SetUint(x)
return nil

case reflect.Int32:
x, err := UnmarshalUint32(d.r)
if err != nil {
return err
}
val.SetInt(int64(int32(x)))
return nil

case reflect.Uint16:
x, err := UnmarshalUint16(d.r)
if err != nil {
return err
}
val.SetUint(uint64(x))
return nil

case reflect.Int16:
x, err := UnmarshalUint16(d.r)
if err != nil {
return err
}
val.SetInt(int64(int16(x)))
return nil

case reflect.Int8:
x, err := UnmarshalUint8(d.r)
if err != nil {
return err
}
val.SetInt(int64(int8(x)))
return nil

case reflect.String: // very similar to byte slices
strLen, err := decodeSliceLength(d.r) // length is capped by the decodeSliceLength Func
if err != nil {
return err
}
b, err := d.readN(strLen)
if err != nil {
return err
}
val.SetString(string(b))
return nil

case reflect.Slice:
// slices are variable length, but otherwise the same as arrays.
// just have to allocate them first, then we can fallthrough to the array logic.
sliceLen, err := decodeSliceLength(d.r) // length is capped by the decodeSliceLength Func
if err != nil || sliceLen == 0 {
return err // nil in case sliceLen==0
}
val.Set(reflect.MakeSlice(val.Type(), sliceLen, sliceLen))
fallthrough
case reflect.Array:
// special case for byte arrays (e.g. hashes)
if val.Type().Elem().Kind() == reflect.Uint8 {
// convert val to a slice and read into it directly
b := val.Slice(0, val.Len())
_, err := io.ReadFull(d.r, b.Bytes())
if err != nil {
return err
}
return nil
}
// arrays are unmarshalled by sequentially unmarshalling their elements
var err error
for i := 0; i < val.Len(); i++ {
err = d.decode(val.Index(i))
if err != nil {
return err
}
}
return nil

case reflect.Struct:
var err error
for i := 0; i < val.NumField(); i++ {
if isFieldHidden(val, i) {
continue // ignore
}
err = d.decode(val.Field(i))
if err != nil {
return err
}
}
return nil

default:
return errors.New("unknown type")
}
}

// readN reads n bytes and panics if the read fails.
func (d *Decoder) readN(n int) ([]byte, error) {
if buf, ok := d.r.(*bytes.Buffer); ok {
b := buf.Next(n)
if len(b) != n {
return nil, io.ErrUnexpectedEOF
}
return b, nil
}
b := make([]byte, n)
_, err := io.ReadFull(d.r, b)
if err != nil {
return nil, err
}
return b, nil
}
4 changes: 4 additions & 0 deletions pkg/encoding/rivbin/doc.go
@@ -0,0 +1,4 @@
// Package encoding converts arbitrary objects into byte slices, and vis
// versa. It also contains helper functions for reading and writing
// specific/specialised objects.
package encoding

0 comments on commit 1a8ca06

Please sign in to comment.