introduce rivbin binary encoder

fixes partly #463, taken from tfchain encoding contribution
threefoldtecharchive · Nov 16, 2018 · 1a8ca06 · 1a8ca06
1 parent 18b19ea
commit 1a8ca06
Show file tree

Hide file tree

Showing 14 changed files with 2,000 additions and 1 deletion.
diff --git a/Makefile b/Makefile
@@ -8,7 +8,7 @@ run = Test
 daemonpkgs = ./cmd/rivined
 clientpkgs = ./cmd/rivinec
 pkgs = ./build ./modules/gateway $(daemonpkgs) $(clientpkgs)
-testpkgs = ./build ./crypto ./pkg/encoding/siabin ./modules ./modules/gateway ./modules/blockcreator ./modules/wallet ./modules/explorer ./modules/consensus ./persist ./cmd/rivinec ./cmd/rivined ./sync ./types ./pkg/cli ./pkg/client ./pkg/daemon
+testpkgs = ./build ./crypto ./pkg/encoding/siabin ./pkg/encoding/rivbin ./modules ./modules/gateway ./modules/blockcreator ./modules/wallet ./modules/explorer ./modules/consensus ./persist ./cmd/rivinec ./cmd/rivined ./sync ./types ./pkg/cli ./pkg/client ./pkg/daemon
 
 version = $(shell git describe | cut -d '-' -f 1)
 commit = $(shell git rev-parse --short HEAD)

diff --git a/doc/encoding/RivineEncoding.md b/doc/encoding/RivineEncoding.md
@@ -0,0 +1,43 @@
+# Rivine Binary Encoding
+
+The main goal of the rivine (binary) encoding library is to achieve the smallest byte footprint for encoded content.
+This encoding library is heavily inspired upon the initial Sia (binary) encoding library.
+
+## Standard Encoding
+
+All integers are little-endian, encoded as unsigned integers, but the amount of types depend on the exact integral type:
+
+| byte size | types |
+| - | - |
+| 1 | uint8, int8 |
+| 2 | uint16, int16 |
+| 3 | uint24<sup>(1)</sup> |
+| 4 | uint32, int32 |
+| 8 | uint64, int64, uint, int |
+
+> (1) `uint24` is not a standard type, but the tfchain encoding lib does allow to encode uint32 integers that fit in 3 bytes, as 3 bytes.
+
+Booleans are encoded as a single byte, `0x00` for `False` and `0x01` for `True`.
+
+Nil pointers are equivalent to "False", i.e. a single zero byte. Valid pointers are represented by a "True" byte (0x01) followed by the encoding of the dereferenced value.
+
+Variable-length types, such as strings and slices, are represented by a length prefix followed by the encoded value. Strings are encoded as their literal UTF-8 bytes. Slices are encoded as the concatenation of their encoded elements. The length prefix can be one, two, three or four bytes:
+
+| byte size | inclusive size range |
+| - | - |
+| 1 | 0 - 127 |
+| 2 | 128 - 16 383 |
+| 3 | 16 384 - 2 097 151 |
+| 4 | 2 097 152 - 536 870 911 |
+
+This implies that variable-length types cannot have a size greater than `536 870 911`,
+which to be fair is a very big limit for blockchain purposes. Perhaps too big of a limit already,
+as it is expected that for most purposes the slice length will fit in a single byte, and the extreme cases in 2 bytes.
+
+Maps are not supported; attempting to encode a map will cause Marshal to panic. This is because their elements are not ordered in a consistent way, and it is imperative that this encoding scheme be deterministic. To encode a map, either convert it to a slice of structs, or define a MarshalSia method (see below).
+
+Arrays and structs are simply the concatenation of their encoded elements (no length prefix is required here as the size is fixed). Byte slices are not subject to the 8-byte integer rule; they are encoded as their literal representation, one byte per byte.
+
+All struct fields must be exported. The ordering of struct fields is determined by their type definition.
+
+Finally, if a type implements the `RivineMarshaler` interface, its `MarshalRivine` method will be used to encode the type. Similarly, if a type implements the `RivineUnmarshaler` interface, its `UnmarshalRivine` method will be used to decode the type. Note that unless a type implements both interfaces, it must conform to the spec above. Otherwise, it may encode and decode itself however desired. This may be an attractive option where speed is critical, since it allows for more compact representations, and bypasses the use of reflection.
diff --git a/pkg/encoding/rivbin/decode.go b/pkg/encoding/rivbin/decode.go
@@ -0,0 +1,253 @@
+package encoding
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"reflect"
+)
+
+type (
+	// A RivineUnmarshaler can read and decode itself from a stream.
+	RivineUnmarshaler interface {
+		UnmarshalRivine(io.Reader) error
+	}
+)
+
+// Unmarshal decodes the encoded value b and stores it in v, which must be a
+// pointer. The decoding rules are the inverse of those specified in the
+// package docstring for marshaling.
+func Unmarshal(b []byte, v interface{}) error {
+	r := bytes.NewBuffer(b)
+	return NewDecoder(r).Decode(v)
+}
+
+// UnmarshalAll decodes the encoded values in b and stores them in vs, which
+// must be pointers.
+func UnmarshalAll(b []byte, vs ...interface{}) error {
+	dec := NewDecoder(bytes.NewBuffer(b))
+	return dec.DecodeAll(vs...)
+}
+
+// A Decoder reads and decodes values from an input stream.
+type Decoder struct {
+	r io.Reader
+}
+
+// NewDecoder returns a new decoder that reads from r.
+func NewDecoder(r io.Reader) *Decoder {
+	return &Decoder{r}
+}
+
+// Decode reads the next encoded value from its input stream and stores it in
+// v, which must be a pointer. The decoding rules are the inverse of those
+// specified in the package docstring.
+func (d *Decoder) Decode(v interface{}) error {
+	// v must be a pointer
+	pval := reflect.ValueOf(v)
+	if pval.Kind() != reflect.Ptr || pval.IsNil() {
+		return errBadPointer
+	}
+
+	return d.decode(pval.Elem())
+}
+
+var errBadPointer = errors.New("cannot decode into invalid pointer")
+
+// DecodeAll decodes a variable number of arguments.
+func (d *Decoder) DecodeAll(vs ...interface{}) error {
+	var err error
+	for _, v := range vs {
+		err = d.Decode(v)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// decode reads the next encoded value from its input stream and stores it in
+// val. The decoding rules are the inverse of those specified in the package
+// docstring.
+func (d *Decoder) decode(val reflect.Value) error {
+	// check for RivineUnmarshaler interface first
+	if val.CanAddr() && val.Addr().CanInterface() {
+		ival := val.Addr().Interface()
+		if u, ok := ival.(RivineUnmarshaler); ok {
+			err := u.UnmarshalRivine(d.r)
+			return err
+		}
+	}
+
+	switch val.Kind() {
+	case reflect.Ptr:
+		isDefined, err := UnmarshalBool(d.r)
+		if err != nil || !isDefined {
+			return err // nil in case !isDefined
+		}
+		return d.decode(val.Elem())
+
+	case reflect.Bool:
+		b, err := UnmarshalBool(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetBool(b)
+		return nil
+
+	case reflect.Uint8:
+		x, err := UnmarshalUint8(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetUint(uint64(x))
+		return nil
+
+	case reflect.Uint32:
+		x, err := UnmarshalUint32(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetUint(uint64(x))
+		return nil
+
+	case reflect.Int:
+		x, err := UnmarshalUint64(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetInt(int64(x))
+		return nil
+
+	case reflect.Int64:
+		x, err := UnmarshalUint64(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetInt(int64(x))
+		return nil
+
+	case reflect.Uint64:
+		x, err := UnmarshalUint64(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetUint(x)
+		return nil
+
+	case reflect.Uint:
+		x, err := UnmarshalUint64(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetUint(x)
+		return nil
+
+	case reflect.Int32:
+		x, err := UnmarshalUint32(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetInt(int64(int32(x)))
+		return nil
+
+	case reflect.Uint16:
+		x, err := UnmarshalUint16(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetUint(uint64(x))
+		return nil
+
+	case reflect.Int16:
+		x, err := UnmarshalUint16(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetInt(int64(int16(x)))
+		return nil
+
+	case reflect.Int8:
+		x, err := UnmarshalUint8(d.r)
+		if err != nil {
+			return err
+		}
+		val.SetInt(int64(int8(x)))
+		return nil
+
+	case reflect.String: // very similar to byte slices
+		strLen, err := decodeSliceLength(d.r) // length is capped by the decodeSliceLength Func
+		if err != nil {
+			return err
+		}
+		b, err := d.readN(strLen)
+		if err != nil {
+			return err
+		}
+		val.SetString(string(b))
+		return nil
+
+	case reflect.Slice:
+		// slices are variable length, but otherwise the same as arrays.
+		// just have to allocate them first, then we can fallthrough to the array logic.
+		sliceLen, err := decodeSliceLength(d.r) // length is capped by the decodeSliceLength Func
+		if err != nil || sliceLen == 0 {
+			return err // nil in case sliceLen==0
+		}
+		val.Set(reflect.MakeSlice(val.Type(), sliceLen, sliceLen))
+		fallthrough
+	case reflect.Array:
+		// special case for byte arrays (e.g. hashes)
+		if val.Type().Elem().Kind() == reflect.Uint8 {
+			// convert val to a slice and read into it directly
+			b := val.Slice(0, val.Len())
+			_, err := io.ReadFull(d.r, b.Bytes())
+			if err != nil {
+				return err
+			}
+			return nil
+		}
+		// arrays are unmarshalled by sequentially unmarshalling their elements
+		var err error
+		for i := 0; i < val.Len(); i++ {
+			err = d.decode(val.Index(i))
+			if err != nil {
+				return err
+			}
+		}
+		return nil
+
+	case reflect.Struct:
+		var err error
+		for i := 0; i < val.NumField(); i++ {
+			if isFieldHidden(val, i) {
+				continue // ignore
+			}
+			err = d.decode(val.Field(i))
+			if err != nil {
+				return err
+			}
+		}
+		return nil
+
+	default:
+		return errors.New("unknown type")
+	}
+}
+
+// readN reads n bytes and panics if the read fails.
+func (d *Decoder) readN(n int) ([]byte, error) {
+	if buf, ok := d.r.(*bytes.Buffer); ok {
+		b := buf.Next(n)
+		if len(b) != n {
+			return nil, io.ErrUnexpectedEOF
+		}
+		return b, nil
+	}
+	b := make([]byte, n)
+	_, err := io.ReadFull(d.r, b)
+	if err != nil {
+		return nil, err
+	}
+	return b, nil
+}
diff --git a/pkg/encoding/rivbin/doc.go b/pkg/encoding/rivbin/doc.go
@@ -0,0 +1,4 @@
+// Package encoding converts arbitrary objects into byte slices, and vis
+// versa. It also contains helper functions for reading and writing
+// specific/specialised objects.
+package encoding