Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

codec: Some small improvements #4664

Merged
merged 10 commits into from May 10, 2019

Improve error handling

Signed-off-by: Breezewish <breezewish@pingcap.com>
  • Loading branch information...
breeswish committed May 10, 2019
commit e72f9255579a93b931415c80d5e3349958a6b4da

Some generated files are not rendered by default. Learn more.

@@ -7,7 +7,7 @@ publish = false
[dependencies]
libc = "0.2"
byteorder = "1.2"
quick-error = "1.2"
failure = "0.1"
panic_hook = { path = "../panic_hook" }
tikv_alloc = { path = "../tikv_alloc", default-features = false }

@@ -291,7 +291,7 @@ impl MemComparableByteCodec {
loop {
let src_ptr_next = src_ptr.add(MEMCMP_GROUP_SIZE + 1);
if std::intrinsics::unlikely(src_ptr_next > src_ptr_end) {
return Err(Error::UnexpectedEOF);
return Err(Error::new_eof_error());
}

// Copy `MEMCMP_GROUP_SIZE` bytes any way. However we will truncate the returned
@@ -386,7 +386,7 @@ pub trait MemComparableByteEncoder: NumberEncoder {
let len = MemComparableByteCodec::encoded_len(bs.len());
let buf = unsafe { self.bytes_mut(len) };
if unsafe { unlikely(buf.len() < len) } {
return Err(Error::BufferTooSmall);
return Err(Error::new_eof_error());
}
MemComparableByteCodec::encode_all(bs, buf);
unsafe {
@@ -404,7 +404,7 @@ pub trait MemComparableByteEncoder: NumberEncoder {
let len = MemComparableByteCodec::encoded_len(bs.len());
let buf = unsafe { self.bytes_mut(len) };
if unsafe { unlikely(buf.len() < len) } {
return Err(Error::BufferTooSmall);
return Err(Error::new_eof_error());
}
MemComparableByteCodec::encode_all_desc(bs, buf);
unsafe {
@@ -439,10 +439,7 @@ impl CompactByteCodec {
Err(_) => encoded.len(),
Ok((value, decoded_n)) => {
let r = value as usize + decoded_n;
if unsafe { unlikely(r > encoded.len()) } {
return encoded.len();
}
r
r.min(encoded.len())
}
}
}
@@ -485,7 +482,7 @@ impl<T: NumberDecoder> CompactByteDecoder for T {
let vn = self.read_var_i64()? as usize;
let data = self.bytes();
if unsafe { unlikely(data.len() < vn) } {
return Err(Error::UnexpectedEOF);
return Err(Error::new_eof_error());
}
let bs = data[0..vn].to_vec();
self.advance(vn);
@@ -1142,7 +1139,7 @@ mod tests {

#[cfg(test)]
mod benches {
use crate::test;
use crate::Error;

/// A naive implementation of encoding in mem-comparable format.
/// It does not process non zero-padding groups separately.
@@ -1204,26 +1201,21 @@ mod benches {
&key[index..index + ENC_GROUP_SIZE],
desc,
&mut buf,
))
.map_err(|_| super::Error::BufferTooSmall)?;
))?;
} else {
pad = ENC_GROUP_SIZE - remain;
self.write_all(adjust_bytes_order(&key[index..], desc, &mut buf))
.map_err(|_| super::Error::BufferTooSmall)?;
self.write_all(adjust_bytes_order(&key[index..], desc, &mut buf))?;
if desc {
self.write_all(&ENC_DESC_PADDING[..pad])
.map_err(|_| super::Error::BufferTooSmall)?;
self.write_all(&ENC_DESC_PADDING[..pad])?;
} else {
self.write_all(&ENC_ASC_PADDING[..pad])
.map_err(|_| super::Error::BufferTooSmall)?;
self.write_all(&ENC_ASC_PADDING[..pad])?;
}
}
self.write_all(adjust_bytes_order(
&[ENC_MARKER - (pad as u8)],
desc,
&mut buf,
))
.map_err(|_| super::Error::BufferTooSmall)?;
))?;
index += ENC_GROUP_SIZE;
}
Ok(())
@@ -1258,7 +1250,7 @@ mod benches {
let chunk = if next_offset <= data.len() {
&data[offset..next_offset]
} else {
return Err(super::Error::UnexpectedEOF);
return Err(Error::new_eof_error());
};
offset = next_offset;
// the last byte in decode unit is for marker which indicates pad size
@@ -1274,15 +1266,15 @@ mod benches {
continue;
}
if pad_size > ENC_GROUP_SIZE {
return Err(super::Error::BadPadding);
return Err(Error::BadPadding);
}
// if has padding, split the padding pattern and push rest bytes
let (bytes, padding) = bytes.split_at(ENC_GROUP_SIZE - pad_size);
key.write_all(bytes).unwrap();
let pad_byte = if desc { !0 } else { 0 };
// check the padding pattern whether validate or not
if padding.iter().any(|x| *x != pad_byte) {
return Err(super::Error::BadPadding);
return Err(Error::BadPadding);
}

if desc {
@@ -1303,7 +1295,7 @@ mod benches {
loop {
let marker_offset = read_offset + ENC_GROUP_SIZE;
if marker_offset >= data.len() {
return Err(super::Error::UnexpectedEOF);
return Err(Error::new_eof_error());
};

unsafe {
@@ -1330,7 +1322,7 @@ mod benches {

if pad_size > 0 {
if pad_size > ENC_GROUP_SIZE {
return Err(super::Error::BadPadding);
return Err(Error::BadPadding);
}

// check the padding pattern whether validate or not
@@ -1340,7 +1332,7 @@ mod benches {
&ENC_ASC_PADDING[..pad_size]
};
if &data[write_offset - pad_size..write_offset] != padding_slice {
return Err(super::Error::BadPadding);
return Err(Error::BadPadding);
}
unsafe {
data.set_len(write_offset - pad_size);
@@ -1,23 +1,27 @@
// Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0.

quick_error! {
#[derive(Debug)]
pub enum Error {
BufferTooSmall {
description("The buffer is too small to read or write data")
}
UnexpectedEOF {
description("Expecting more data but got EOF")
}
BadPadding {
description("Data padding is wrong")
}
Io(err: std::io::Error) {
from()
cause(err)
description(err.description())
}
use std::io;

#[derive(Debug, Fail)]
pub enum Error {
#[fail(display = "Io error")]
Io(#[fail(cause)] Box<io::Error>),

#[fail(display = "Data padding is incorrect")]
BadPadding,
}

impl Error {
pub(crate) fn new_eof_error() -> Error {
io::Error::new(io::ErrorKind::UnexpectedEof, "Unexpected EOF").into()
}
}

impl From<io::Error> for Error {
fn from(e: io::Error) -> Self {
Error::Io(Box::new(e))
}
}

// Box the error in case of large data structure when there is no error.
pub type Result<T> = std::result::Result<T, Error>;
@@ -5,7 +5,7 @@
#![feature(ptr_offset_from)]

#[macro_use]
extern crate quick_error;
extern crate failure;
#[cfg(test)]
extern crate test;
#[allow(unused_extern_crates)]
@@ -446,7 +446,7 @@ impl NumberCodec {
ptr = ptr.add(1);
}
if unlikely(ptr == ptr_end) {
return Err(Error::BufferTooSmall);
return Err(Error::new_eof_error());
}
val |= (*ptr as u64) << shift;
Ok((val, ptr.offset_from(buf.as_ptr()) as usize + 1))
@@ -483,7 +483,8 @@ impl NumberCodec {
pub fn try_decode_var_i64(buf: &[u8]) -> Result<(i64, usize)> {
let (uv, decoded_bytes) = Self::try_decode_var_u64(buf)?;
let v = uv >> 1;
if unsafe { likely(uv & 1 == 0) } {
if uv & 1 == 0 {
// no need for likely/unlikely here
Ok((v as i64, decoded_bytes))
} else {
Ok((!v as i64, decoded_bytes))
@@ -515,7 +516,7 @@ impl NumberCodec {
while ptr != ptr_end && *ptr >= 0x80 {
ptr = ptr.add(1);
}
// We we got here, we are either `ptr == ptr_end`, or `*ptr < 0x80`.
// When we got here, we are either `ptr == ptr_end`, or `*ptr < 0x80`.

This comment has been minimized.

Copy link
@lonng

lonng May 10, 2019

Contributor

The assert is not always true, e.g [0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f](9 bytes) both ptr == ptr_end and *ptr < 0x80 are true.

This comment has been minimized.

Copy link
@breeswish

breeswish May 10, 2019

Author Member
[0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f]
                                             ^ptr_end

this is ptr_end. Your case is only *ptr < 0x80

// For `ptr == ptr_end` case, it means we are expecting a value < 0x80
// but meet EOF, so only `len` is returned.
// For `*ptr < 0x80` case, it means currently it is pointing to the last byte
@@ -534,7 +535,7 @@ macro_rules! read {
let ret = {
let buf = $s.bytes();
if unsafe { unlikely(buf.len() < $size) } {
return Err(Error::BufferTooSmall);
return Err(Error::new_eof_error());
}
NumberCodec::$f(buf)
};
@@ -765,7 +766,7 @@ macro_rules! write {
{
let buf = unsafe { $s.bytes_mut($size) };
if unsafe { unlikely(buf.len() < $size) } {
return Err(Error::BufferTooSmall);
return Err(Error::new_eof_error());
}
NumberCodec::$f(buf, $v);
}
@@ -967,7 +968,7 @@ pub trait NumberEncoder: BufferWriter {
let encoded_bytes = {
let buf = unsafe { self.bytes_mut(MAX_VARINT64_LENGTH) };
if unsafe { unlikely(buf.len() < MAX_VARINT64_LENGTH) } {
return Err(Error::BufferTooSmall);
return Err(Error::new_eof_error());
}
NumberCodec::encode_var_u64(buf, v)
};
@@ -991,7 +992,7 @@ pub trait NumberEncoder: BufferWriter {
let encoded_bytes = {
let buf = unsafe { self.bytes_mut(MAX_VARINT64_LENGTH) };
if unsafe { unlikely(buf.len() < MAX_VARINT64_LENGTH) } {
return Err(Error::BufferTooSmall);
return Err(Error::new_eof_error());
}
NumberCodec::encode_var_i64(buf, v)
};
@@ -1008,7 +1009,7 @@ pub trait NumberEncoder: BufferWriter {
fn write_all_bytes(&mut self, values: &[u8]) -> Result<()> {
let buf = unsafe { self.bytes_mut(values.len()) };
if unsafe { unlikely(buf.len() < values.len()) } {
return Err(Error::BufferTooSmall);
return Err(Error::new_eof_error());
}
buf[..values.len()].copy_from_slice(values);
unsafe {
@@ -1774,7 +1775,7 @@ mod tests {

#[cfg(test)]
mod benches {
use crate::test;
use crate::Error;

use byteorder;
use protobuf::CodedOutputStream;
@@ -1892,7 +1893,7 @@ mod benches {
*data = &data[size..];
return Ok(f(buf));
}
Err(super::Error::BufferTooSmall)
Err(Error::new_eof_error())
}

/// The original implementation in TiKV
@@ -1967,12 +1968,10 @@ mod benches {
trait OldVarIntEncoder: byteorder::WriteBytesExt {
fn encode_var_u64(&mut self, mut v: u64) -> super::Result<()> {
while v >= 0x80 {
self.write_u8(v as u8 | 0x80)
.map_err(|_| super::Error::BufferTooSmall)?;
self.write_u8(v as u8 | 0x80)?;
v >>= 7;
}
self.write_u8(v as u8)
.map_err(|_| super::Error::BufferTooSmall)
Ok(self.write_u8(v as u8)?)
}
}

@@ -2098,7 +2097,7 @@ mod benches {
*data = unsafe { data.get_unchecked(10..) };
return Ok(res);
}
return Err(super::Error::BufferTooSmall);
return Err(Error::new_eof_error());
}
}

@@ -2112,7 +2111,7 @@ mod benches {
return Ok(res);
}
}
Err(super::Error::BufferTooSmall)
Err(Error::new_eof_error())
}

/// Decode u64 < 128 in VarInt using original TiKV implementation.
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.