tsdb/engine/tsm1/timestamp.go

package tsm1

// Timestamp encoding is adaptive and based on structure of the timestamps that are encoded.  It
// uses a combination of delta encoding, scaling and compression using simple8b, run length encoding
// as well as falling back to no compression if needed.
//
// Timestamp values to be encoded should be sorted before encoding.  When encoded, the values are
// first delta-encoded.  The first value is the starting timestamp, subsequent values are the difference
// from the prior value.
//
// Timestamp resolution can also be in the nanosecond.  Many timestamps are monotonically increasing
// and fall on even boundaries of time such as every 10s.  When the timestamps have this structure,
// they are scaled by the largest common divisor that is also a factor of 10.  This has the effect
// of converting very large integer deltas into very small one that can be reversed by multiplying them
// by the scaling factor.
//
// Using these adjusted values, if all the deltas are the same, the time range is stored using run
// length encoding.  If run length encoding is not possible and all values are less than 1 << 60 - 1
// (~36.5 yrs in nanosecond resolution), then the timestamps are encoded using simple8b encoding.  If
// any value exceeds the maximum values, the deltas are stored uncompressed using 8b each.
//
// Each compressed byte slice has a 1 byte header indicating the compression type.  The 4 high bits
// indicate the encoding type.  The 4 low bits are used by the encoding type.
//
// For run-length encoding, the 4 low bits store the log10 of the scaling factor.  The next 8 bytes are
// the starting timestamp, next 1-10 bytes is the delta value using variable-length encoding, finally the
// next 1-10 bytes is the count of values.
//
// For simple8b encoding, the 4 low bits store the log10 of the scaling factor.  The next 8 bytes is the
// first delta value stored uncompressed, the remaining bytes are 64bit words containg compressed delta
// values.
//
// For uncompressed encoding, the delta values are stored using 8 bytes each.

import (
	"encoding/binary"
	"fmt"
	"math"

	"github.com/jwilder/encoding/simple8b"
)

const (
	// timeUncompressed is a an uncompressed format using 8 bytes per timestamp
	timeUncompressed = 0
	// timeCompressedPackedSimple is a bit-packed format using simple8b encoding
	timeCompressedPackedSimple = 1
	// timeCompressedRLE is a run-length encoding format
	timeCompressedRLE = 2
)

// TimeEncoder encodes time.Time to byte slices.
type TimeEncoder interface {
	Write(t int64)
	Bytes() ([]byte, error)
	Reset()
}

type encoder struct {
	ts    []uint64
	bytes []byte
	enc   *simple8b.Encoder
}

// NewTimeEncoder returns a TimeEncoder with an initial buffer ready to hold sz bytes.
func NewTimeEncoder(sz int) TimeEncoder {
	return &encoder{
		ts:  make([]uint64, 0, sz),
		enc: simple8b.NewEncoder(),
	}
}

// Reset sets the encoder back to its initial state.
func (e *encoder) Reset() {
	e.ts = e.ts[:0]
	e.bytes = e.bytes[:0]
	e.enc.Reset()
}

// Write adds a timestamp to the compressed stream.
func (e *encoder) Write(t int64) {
	e.ts = append(e.ts, uint64(t))
}

func (e *encoder) reduce() (max, divisor uint64, rle bool, deltas []uint64) {
	// Compute the deltas in place to avoid allocating another slice
	deltas = e.ts
	// Starting values for a max and divisor
	max, divisor = 0, 1e12

	// Indicates whether the the deltas can be run-length encoded
	rle = true

	// Iterate in reverse so we can apply deltas in place
	for i := len(deltas) - 1; i > 0; i-- {

		// First differential encode the values
		deltas[i] = deltas[i] - deltas[i-1]

		// We also need to keep track of the max value and largest common divisor
		v := deltas[i]

		if v > max {
			max = v
		}

		// If our value is divisible by 10, break.  Otherwise, try the next smallest divisor.
		for divisor > 1 && v%divisor != 0 {
			divisor /= 10
		}

		// Skip the first value || see if prev = curr.  The deltas can be RLE if the are all equal.
		rle = i == len(deltas)-1 || rle && (deltas[i+1] == deltas[i])
	}
	return
}

// Bytes returns the encoded bytes of all written times.
func (e *encoder) Bytes() ([]byte, error) {
	if len(e.ts) == 0 {
		return e.bytes[:0], nil
	}

	// Maximum and largest common divisor.  rle is true if dts (the delta timestamps),
	// are all the same.
	max, div, rle, dts := e.reduce()

	// The deltas are all the same, so we can run-length encode them
	if rle && len(e.ts) > 1 {
		return e.encodeRLE(e.ts[0], e.ts[1], div, len(e.ts))
	}

	// We can't compress this time-range, the deltas exceed 1 << 60
	if max > simple8b.MaxValue {
		return e.encodeRaw()
	}

	return e.encodePacked(div, dts)
}

func (e *encoder) encodePacked(div uint64, dts []uint64) ([]byte, error) {
	// Only apply the divisor if it's greater than 1 since division is expensive.
	if div > 1 {
		for _, v := range dts[1:] {
			if err := e.enc.Write(v / div); err != nil {
				return nil, err
			}
		}
	} else {
		for _, v := range dts[1:] {
			if err := e.enc.Write(v); err != nil {
				return nil, err
			}
		}
	}

	// The compressed deltas
	deltas, err := e.enc.Bytes()
	if err != nil {
		return nil, err
	}

	sz := 8 + 1 + len(deltas)
	if cap(e.bytes) < sz {
		e.bytes = make([]byte, sz)
	}
	b := e.bytes[:sz]

	// 4 high bits used for the encoding type
	b[0] = byte(timeCompressedPackedSimple) << 4
	// 4 low bits are the log10 divisor
	b[0] |= byte(math.Log10(float64(div)))

	// The first delta value
	binary.BigEndian.PutUint64(b[1:9], uint64(dts[0]))

	copy(b[9:], deltas)
	return b[:9+len(deltas)], nil
}

func (e *encoder) encodeRaw() ([]byte, error) {
	sz := 1 + len(e.ts)*8
	if cap(e.bytes) < sz {
		e.bytes = make([]byte, sz)
	}
	b := e.bytes[:sz]
	b[0] = byte(timeUncompressed) << 4
	for i, v := range e.ts {
		binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], uint64(v))
	}
	return b, nil
}

func (e *encoder) encodeRLE(first, delta, div uint64, n int) ([]byte, error) {
	// Large varints can take up to 10 bytes, we're encoding 3 + 1 byte type
	sz := 31
	if cap(e.bytes) < sz {
		e.bytes = make([]byte, sz)
	}
	b := e.bytes[:sz]
	// 4 high bits used for the encoding type
	b[0] = byte(timeCompressedRLE) << 4
	// 4 low bits are the log10 divisor
	b[0] |= byte(math.Log10(float64(div)))

	i := 1
	// The first timestamp
	binary.BigEndian.PutUint64(b[i:], uint64(first))
	i += 8
	// The first delta
	i += binary.PutUvarint(b[i:], uint64(delta/div))
	// The number of times the delta is repeated
	i += binary.PutUvarint(b[i:], uint64(n))

	return b[:i], nil
}

// TimeDecoder decodes a byte slice into timestamps.
type TimeDecoder struct {
	v    int64
	i, n int
	ts   []uint64
	dec  simple8b.Decoder
	err  error

	// The delta value for a run-length encoded byte slice
	rleDelta int64

	encoding byte
}

// Init initializes the decoder with bytes to read from.
func (d *TimeDecoder) Init(b []byte) {
	d.v = 0
	d.i = 0
	d.ts = d.ts[:0]
	d.err = nil
	if len(b) > 0 {
		// Encoding type is stored in the 4 high bits of the first byte
		d.encoding = b[0] >> 4
	}
	d.decode(b)
}

// Next returns true if there are any timestamps remaining to be decoded.
func (d *TimeDecoder) Next() bool {
	if d.err != nil {
		return false
	}

	if d.encoding == timeCompressedRLE {
		if d.i >= d.n {
			return false
		}
		d.i++
		d.v += d.rleDelta
		return d.i < d.n
	}

	if d.i >= len(d.ts) {
		return false
	}
	d.v = int64(d.ts[d.i])
	d.i++
	return true
}

// Read returns the next timestamp from the decoder.
func (d *TimeDecoder) Read() int64 {
	return d.v
}

// Error returns the last error encountered by the decoder.
func (d *TimeDecoder) Error() error {
	return d.err
}

func (d *TimeDecoder) decode(b []byte) {
	if len(b) == 0 {
		return
	}

	switch d.encoding {
	case timeUncompressed:
		d.decodeRaw(b[1:])
	case timeCompressedRLE:
		d.decodeRLE(b)
	case timeCompressedPackedSimple:
		d.decodePacked(b)
	default:
		d.err = fmt.Errorf("unknown encoding: %v", d.encoding)
	}
}

func (d *TimeDecoder) decodePacked(b []byte) {
	if len(b) < 9 {
		d.err = fmt.Errorf("TimeDecoder: not enough data to decode packed timestamps")
		return
	}
	div := uint64(math.Pow10(int(b[0] & 0xF)))
	first := uint64(binary.BigEndian.Uint64(b[1:9]))

	d.dec.SetBytes(b[9:])

	d.i = 0
	deltas := d.ts[:0]
	deltas = append(deltas, first)

	for d.dec.Next() {
		deltas = append(deltas, d.dec.Read())
	}

	// Compute the prefix sum and scale the deltas back up
	last := deltas[0]
	if div > 1 {
		for i := 1; i < len(deltas); i++ {
			dgap := deltas[i] * div
			deltas[i] = last + dgap
			last = deltas[i]
		}
	} else {
		for i := 1; i < len(deltas); i++ {
			deltas[i] += last
			last = deltas[i]
		}
	}

	d.i = 0
	d.ts = deltas
}

func (d *TimeDecoder) decodeRLE(b []byte) {
	if len(b) < 9 {
		d.err = fmt.Errorf("TimeDecoder: not enough data for initial RLE timestamp")
		return
	}

	var i, n int

	// Lower 4 bits hold the 10 based exponent so we can scale the values back up
	mod := int64(math.Pow10(int(b[i] & 0xF)))
	i++

	// Next 8 bytes is the starting timestamp
	first := binary.BigEndian.Uint64(b[i : i+8])
	i += 8

	// Next 1-10 bytes is our (scaled down by factor of 10) run length values
	value, n := binary.Uvarint(b[i:])
	if n <= 0 {
		d.err = fmt.Errorf("TimeDecoder: invalid run length in decodeRLE")
		return
	}

	// Scale the value back up
	value *= uint64(mod)
	i += n

	// Last 1-10 bytes is how many times the value repeats
	count, n := binary.Uvarint(b[i:])
	if n <= 0 {
		d.err = fmt.Errorf("TimeDecoder: invalid repeat value in decodeRLE")
		return
	}

	d.v = int64(first - value)
	d.rleDelta = int64(value)

	d.i = -1
	d.n = int(count)
}

func (d *TimeDecoder) decodeRaw(b []byte) {
	d.i = 0
	d.ts = make([]uint64, len(b)/8)
	for i := range d.ts {
		d.ts[i] = binary.BigEndian.Uint64(b[i*8 : i*8+8])

		delta := d.ts[i]
		// Compute the prefix sum and scale the deltas back up
		if i > 0 {
			d.ts[i] = d.ts[i-1] + delta
		}
	}
}

func CountTimestamps(b []byte) int {
	if len(b) == 0 {
		return 0
	}

	// Encoding type is stored in the 4 high bits of the first byte
	encoding := b[0] >> 4
	switch encoding {
	case timeUncompressed:
		// Uncompressed timestamps are just 8 bytes each
		return len(b[1:]) / 8
	case timeCompressedRLE:
		// First 9 bytes are the starting timestamp and scaling factor, skip over them
		i := 9
		// Next 1-10 bytes is our (scaled down by factor of 10) run length values
		_, n := binary.Uvarint(b[9:])
		i += n
		// Last 1-10 bytes is how many times the value repeats
		count, _ := binary.Uvarint(b[i:])
		return int(count)
	case timeCompressedPackedSimple:
		// First 9 bytes are the starting timestamp and scaling factor, skip over them
		count, _ := simple8b.CountBytes(b[9:])
		return count + 1 // +1 is for the first uncompressed timestamp, starting timestamep in b[1:9]
	default:
		return 0
	}
}