Skip to content

Commit

Permalink
optimize varint decoding
Browse files Browse the repository at this point in the history
Copies the varint decoding optimizations from the cpp protobuf
implementation. Performance is significantly improved:

 name                  control ns/iter   variable ns/iter  diff ns/iter   diff %  speedup
 decode_varint_small   1,165 (686 MB/s)  664 (1204 MB/s)           -501  -43.00%   x 1.75
 decode_varint_medium  4,574 (174 MB/s)  1,213 (659 MB/s)        -3,361  -73.48%   x 3.77
 decode_varint_mixed   5,134 (155 MB/s)  1,354 (590 MB/s)        -3,780  -73.63%   x 3.79
 decode_varint_large   8,832 (90 MB/s)   1,675 (477 MB/s)        -7,157  -81.03%   x 5.27
  • Loading branch information
danburkert committed Jul 17, 2017
1 parent 6b940e7 commit a655f2b
Showing 1 changed file with 106 additions and 11 deletions.
117 changes: 106 additions & 11 deletions src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,79 @@ pub fn encode_varint<B>(mut value: u64, buf: &mut B) where B: BufMut {
}

/// Decodes a LEB128-encoded variable length integer from the buffer.
#[inline]
pub fn decode_varint<B>(buf: &mut B) -> Result<u64, DecodeError> where B: Buf {
// NLL hack.
'slow: loop {
// Another NLL hack.
let (value, advance) = {
let bytes = buf.bytes();
let len = bytes.len();
if len == 0 {
return Err(DecodeError::new("invalid varint"));
}

let byte = bytes[0];
if byte < 0x80 {
(byte as u64, 1)
} else if len > 10 || bytes[len - 1] < 0x80 {
decode_varint_slice(bytes)?
} else {
break 'slow;
}
};

buf.advance(advance);
return Ok(value);
}
decode_varint_slow(buf)
}

/// Decodes a LEB128-encoded variable length integer from the slice, returning the value and the
/// number of bytes read.
///
/// Based loosely on [ReadVarint64FromArray][1].
///
/// [1]: https://github.com/google/protobuf/blob/3.3.x/src/google/protobuf/io/coded_stream.cc#L365-L406
#[inline]
fn decode_varint_slice(bytes: &[u8]) -> Result<(u64, usize), DecodeError> {
// Fully unrolled varint decoding loop. Splitting into 32-bit pieces gives better performance.

let mut b: u8;
let mut part0: u32;
b = bytes[0]; part0 = b as u32 ; if b < 0x80 { return Ok((part0 as u64, 1)) };
part0 -= 0x80;
b = bytes[1]; part0 += (b as u32) << 7; if b < 0x80 { return Ok((part0 as u64, 2)) };
part0 -= 0x80 << 7;
b = bytes[2]; part0 += (b as u32) << 14; if b < 0x80 { return Ok((part0 as u64, 3)) };
part0 -= 0x80 << 14;
b = bytes[3]; part0 += (b as u32) << 21; if b < 0x80 { return Ok((part0 as u64, 4)) };
part0 -= 0x80 << 21;
let value = part0 as u64;

let mut part1: u32;
b = bytes[4]; part1 = b as u32 ; if b < 0x80 { return Ok((value + ((part1 as u64) << 28), 5)) };
part1 -= 0x80;
b = bytes[5]; part1 += (b as u32) << 7; if b < 0x80 { return Ok((value + ((part1 as u64) << 28), 6)) };
part1 -= 0x80 << 7;
b = bytes[6]; part1 += (b as u32) << 14; if b < 0x80 { return Ok((value + ((part1 as u64) << 28), 7)) };
part1 -= 0x80 << 14;
b = bytes[7]; part1 += (b as u32) << 21; if b < 0x80 { return Ok((value + ((part1 as u64) << 28), 8)) };
part1 -= 0x80 << 21;
let value = value + ((part1 as u64) << 28);

let mut part2: u32;
b = bytes[8]; part2 = b as u32 ; if b < 0x80 { return Ok((value + ((part2 as u64) << 56), 9)) };
part2 -= 0x80;
b = bytes[9]; part2 += (b as u32) << 7; if b < 0x80 { return Ok((value + ((part2 as u64) << 56), 10)) };

// We have overrun the maximum size of a varint (10 bytes). Assume the data is corrupt.
return Err(DecodeError::new("invalid varint"));
}

/// Decodes a LEB128-encoded variable length integer from the buffer, advancing the buffer as
/// necessary.
#[inline(never)]
fn decode_varint_slow<B>(buf: &mut B) -> Result<u64, DecodeError> where B: Buf {
let mut value = 0;
for count in 0..min(10, buf.remaining()) {
let byte = buf.get_u8();
Expand Down Expand Up @@ -768,6 +839,7 @@ pub mod btree_map {
mod test {
use std::fmt::Debug;
use std::io::Cursor;
use std::u64;

use bytes::{Bytes, BytesMut, IntoBuf};
use quickcheck::TestResult;
Expand Down Expand Up @@ -916,25 +988,48 @@ mod test {
fn varint() {
fn check(value: u64, encoded: &[u8]) {
let mut buf = Vec::new();

encode_varint(value, &mut buf);

assert_eq!(buf, encoded);

let roundtrip_value = decode_varint(&mut Bytes::from(encoded).into_buf()).expect("decoding failed");
let roundtrip_value = decode_varint(&mut encoded.into_buf()).expect("decoding failed");
assert_eq!(value, roundtrip_value);

let roundtrip_value = decode_varint_slow(&mut encoded.into_buf()).expect("slow decoding failed");
assert_eq!(value, roundtrip_value);
}

check(0, &[0b0000_0000]);
check(1, &[0b0000_0001]);
check(2u64.pow(0) - 1, &[0x00]);
check(2u64.pow(0), &[0x01]);

check(2u64.pow(7) - 1, &[0x7F]);
check(2u64.pow(7), &[0x80, 0x01]);
check(300, &[0xAC, 0x02]);

check(2u64.pow(14) - 1, &[0xFF, 0x7F]);
check(2u64.pow(14), &[0x80, 0x80, 0x01]);

check(2u64.pow(21) - 1, &[0xFF, 0xFF, 0x7F]);
check(2u64.pow(21), &[0x80, 0x80, 0x80, 0x01]);

check(2u64.pow(28) - 1, &[0xFF, 0xFF, 0xFF, 0x7F]);
check(2u64.pow(28), &[0x80, 0x80, 0x80, 0x80, 0x01]);

check(2u64.pow(35) - 1, &[0xFF, 0xFF, 0xFF, 0xFF, 0x7F]);
check(2u64.pow(35), &[0x80, 0x80, 0x80, 0x80, 0x80, 0x01]);

check(2u64.pow(42) - 1, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]);
check(2u64.pow(42), &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]);

check(2u64.pow(49) - 1, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]);
check(2u64.pow(49), &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]);

check(127, &[0b0111_1111]);
check(128, &[0b1000_0000, 0b0000_0001]);
check(2u64.pow(56) - 1, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]);
check(2u64.pow(56), &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]);

check(300, &[0b1010_1100, 0b0000_0010]);
check(2u64.pow(63) - 1, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]);
check(2u64.pow(63), &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]);

check(16_383, &[0b1111_1111, 0b0111_1111]);
check(16_384, &[0b1000_0000, 0b1000_0000, 0b0000_0001]);
check(u64::MAX, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]);
}

/// This big bowl o' macro soup generates a quickcheck encoding test for each
Expand Down

0 comments on commit a655f2b

Please sign in to comment.