Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

copr: implement Hash for Decimal and Time #6047

Merged
merged 5 commits into from Nov 26, 2019
Merged
Changes from 4 commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.

Always

Just for now

@@ -23,7 +23,7 @@ use tikv_util::box_try;
macro_rules! match_template_hashable {
($t:tt, $($tail:tt)*) => {
match_template::match_template! {
$t = [Int, Real, Bytes, Duration],
$t = [Int, Real, Bytes, Duration, Decimal, DateTime],
$($tail)*
}
};
@@ -81,7 +81,12 @@ impl BatchFastHashAggregationExecutor<Box<dyn BatchExecutor<StorageStats = ()>>>
// Only a subset of all eval types are supported.
let eval_type = box_try!(EvalType::try_from(def.get_field_type().as_accessor().tp()));
match eval_type {
EvalType::Int | EvalType::Real | EvalType::Bytes | EvalType::Duration => {}
EvalType::Int
| EvalType::Real
| EvalType::Bytes
| EvalType::Duration
| EvalType::Decimal
| EvalType::DateTime => {}
_ => return Err(other_err!("Eval type {} is not supported", eval_type)),
}

@@ -185,6 +190,8 @@ enum Groups {
Real(HashMap<Option<Real>, usize>),
Bytes(HashMap<Option<Bytes>, usize>),
Duration(HashMap<Option<Duration>, usize>),
Decimal(HashMap<Option<Decimal>, usize>),
DateTime(HashMap<Option<DateTime>, usize>),
}

impl Groups {
@@ -3,6 +3,7 @@
use std::borrow::ToOwned;
use std::cmp::Ordering;
use std::fmt::{self, Display, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::{Add, Deref, DerefMut, Div, Mul, Neg, Rem, Sub};
use std::str::{self, FromStr};
use std::string::ToString;
@@ -308,7 +309,7 @@ fn calc_sub_carry(lhs: &Decimal, rhs: &Decimal) -> (Option<i32>, u8, SubTmp, Sub
}
// here l_end is the last nonzero index in l.word_buf, attention:it may in the range of (0,l_int_word_cnt)
l_frac_word_cnt = cmp::max(0, l_end + 1 - l_stop as isize) as u8;
// here r_end is the last nonzero index in r.word_buf, attention:it may in the range of (0,l_int_word_cnt)
// here r_end is the last nonzero index in r.word_buf, attention:it may in the range of (0,r_int_word_cnt)
r_frac_word_cnt = cmp::max(0, r_end + 1 - r_stop as isize) as u8;
while l_idx as isize <= l_end
&& r_idx as isize <= r_end
@@ -2379,6 +2380,32 @@ impl Neg for Decimal {
}
}

impl Hash for Decimal {
fn hash<H: Hasher>(&self, state: &mut H) {
let (int_word_cnt, frac_word_cnt) = (word_cnt!(self.int_cnt), word_cnt!(self.frac_cnt));

let (stop, mut idx) = (int_word_cnt as usize, 0usize);
while idx < stop && self.word_buf[idx] == 0 {
idx += 1;
}
let start = idx as usize;
let int_word_cnt = stop - idx;

int_word_cnt.hash(state);
let mut end = (stop + frac_word_cnt as usize - 1) as isize;
// trims suffix 0(also trims the suffix 0 before the point
// when there is no digit after point).
while start as isize <= end && self.word_buf[end as usize] == 0 {
end -= 1;
}

self.word_buf[start..((end + 1) as usize)].hash(state);
// -0 should be not negative.
let negative = self.negative && (start as isize <= end);
negative.hash(state);
}
}

#[cfg(test)]
mod tests {
use super::*;
@@ -2387,6 +2414,7 @@ mod tests {
use crate::codec::error::ERR_DATA_OUT_OF_RANGE;
use crate::expr::{EvalConfig, Flag};
use std::cmp::Ordering;
use std::collections::hash_map::DefaultHasher;
use std::f64::EPSILON;
use std::iter::repeat;
use std::sync::Arc;
@@ -3136,6 +3164,27 @@ mod tests {
}
}

#[test]
fn test_hash() {
let cases = vec![
("1.00", "1"),
("-1.11", "-1.11000000"),
("30.20", "30.2"),
("0", "-0"),
("0.001", "0.001000"),
];

for (lhs_str, rhs_str) in cases {
let lhs = lhs_str.parse::<Decimal>().unwrap();
let rhs = rhs_str.parse::<Decimal>().unwrap();
let mut lhasher = DefaultHasher::new();
lhs.hash(&mut lhasher);
let mut rhasher = DefaultHasher::new();
rhs.hash(&mut rhasher);
assert_eq!(lhasher.finish(), rhasher.finish());
}
}

#[test]
fn test_max_decimal() {
let cases = vec![
@@ -11,6 +11,7 @@ pub use self::weekmode::WeekMode;
use std::cmp::Ordering;
use std::convert::{TryFrom, TryInto};
use std::fmt::Write;
use std::hash::{Hash, Hasher};
use std::mem;

use codec::prelude::*;
@@ -1490,6 +1491,14 @@ impl Ord for Time {
}
}

impl Hash for Time {
fn hash<H: Hasher>(&self, state: &mut H) {
let mut a = *self;
a.set_fsp_tt(0);
a.0.hash(state);
}
}

impl std::fmt::Display for Time {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
@@ -144,6 +144,35 @@ pub fn fuzz_coprocessor_codec_decimal(data: &[u8]) -> Result<(), Error> {
Ok(())
}

#[inline(always)]
pub fn fuzz_hash_decimal(data: &[u8]) -> Result<(), Error> {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use tidb_query::codec::data_type::Decimal;
use tidb_query::codec::mysql::DecimalDecoder;

fn fuzz_eq_then_hash(lhs: &Decimal, rhs: &Decimal) -> Result<(), Error> {
if lhs == rhs {
let mut lhasher = DefaultHasher::new();
lhs.hash(&mut lhasher);
let mut rhasher = DefaultHasher::new();
rhs.hash(&mut rhasher);
if lhasher.finish() == rhasher.finish() {
Ok(())
} else {
panic!("eq but not hash eq");
}
} else {
Ok(())
}
}
let mut cursor = Cursor::new(data);
let decimal1 = cursor.read_decimal()?;
let decimal2 = cursor.read_decimal()?;

fuzz_eq_then_hash(&decimal1, &decimal2)
}

trait ReadAsTimeType: ReadLiteralExt {
fn read_as_time_type(&mut self) -> Result<::tidb_query::codec::mysql::TimeType, Error> {
Ok(match self.read_as_u8()? % 3 {
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.