From 780f559e4fd38c99e9f9b54ca3443dde39c95174 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 30 Mar 2023 17:37:05 +0800 Subject: [PATCH 01/24] add new dependency, bit-set --- Cargo.lock | 10 ++++++++++ src/common/Cargo.toml | 1 + 2 files changed, 11 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index f6e1f73bf93a..b2d1c60d7f40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -993,6 +993,15 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + [[package]] name = "bit-vec" version = "0.6.3" @@ -5857,6 +5866,7 @@ dependencies = [ "arrow-schema", "async-trait", "auto_enums", + "bit-set", "bitflags 2.0.2", "byteorder", "bytes", diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml index 4c3bbf316b89..162c6767ce19 100644 --- a/src/common/Cargo.toml +++ b/src/common/Cargo.toml @@ -21,6 +21,7 @@ arrow-schema = "36" async-trait = "0.1" auto_enums = "0.8" bitflags = "2" +bit-set = "0.5.3" byteorder = "1" bytes = "1" chrono = { version = "0.4", default-features = false, features = [ From eb3b6c90704895c302e8c92099f8377dd319b22b Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Fri, 31 Mar 2023 10:19:25 +0800 Subject: [PATCH 02/24] add set8 --- Cargo.lock | 21 +++++++++++---------- src/common/Cargo.toml | 2 +- src/common/src/hash/key.rs | 24 ++++++++++++++---------- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b2d1c60d7f40..b642e4956631 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -993,15 +993,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bit-set" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" -dependencies = [ - "bit-vec", -] - [[package]] name = "bit-vec" version = "0.6.3" @@ -5866,7 +5857,6 @@ dependencies = [ "arrow-schema", "async-trait", "auto_enums", - "bit-set", "bitflags 2.0.2", "byteorder", "bytes", @@ -5913,6 +5903,7 @@ dependencies = [ "serde_default", "serde_json", "serde_with 2.3.1", + "smallbitset", "static_assertions", "strum", "strum_macros", @@ -7457,6 +7448,16 @@ dependencies = [ "futures-io", ] +[[package]] +name = "smallbitset" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93fb6dc506bf9247d497b1cd9748015f444b9b16da6da79484f54b20df49a7c" +dependencies = [ + "num", + "paste", +] + [[package]] name = "smallvec" version = "1.10.0" diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml index 162c6767ce19..eb00a350143f 100644 --- a/src/common/Cargo.toml +++ b/src/common/Cargo.toml @@ -21,7 +21,7 @@ arrow-schema = "36" async-trait = "0.1" auto_enums = "0.8" bitflags = "2" -bit-set = "0.5.3" +smallbitset = "0.6.1" byteorder = "1" bytes = "1" chrono = { version = "0.4", default-features = false, features = [ diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 928cb3cbc487..620356535952 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -41,6 +41,7 @@ use crate::types::{DataType, Date, Decimal, ScalarRef, Time, Timestamp, F32, F64 use crate::util::hash_util::Crc32FastBuilder; use crate::util::iter_util::ZipEqFast; use crate::util::value_encoding::{deserialize_datum, serialize_datum_into}; +use smallbitset::Set8; /// A wrapper for u64 hash result. #[derive(Default, Clone, Copy, Debug, PartialEq)] @@ -143,15 +144,16 @@ pub trait HashKey: ) -> ArrayResult<()>; fn has_null(&self) -> bool { - !self.null_bitmap().is_clear() + !self.null_bitmap().is_empty() } - fn null_bitmap(&self) -> &FixedBitSet; + fn null_bitmap(&self) -> &Set8; } /// Designed for hash keys with at most `N` serialized bytes. /// /// See [`crate::hash::calc_hash_key_kind`] +/// TODO(kwannoel): add benchmark and test, this should also use small bitset. #[derive(Clone, Debug)] pub struct FixedSizeKey { key: [u8; N], @@ -167,7 +169,7 @@ pub struct SerializedKey { // Key encoding. key: Vec, hash_code: u64, - null_bitmap: FixedBitSet, + null_bitmap: Set8, } impl EstimateSize for FixedSizeKey { @@ -192,7 +194,7 @@ impl Hash for FixedSizeKey { impl EstimateSize for SerializedKey { fn estimated_heap_size(&self) -> usize { - self.key.estimated_heap_size() + self.null_bitmap.estimated_heap_size() + self.key.estimated_heap_size() + 8 //self.null_bitmap.estimated_heap_size() } } @@ -575,7 +577,8 @@ impl HashKeyDeserializer for FixedSizeKeyDeserializer { pub struct SerializedKeySerializer { buffer: Vec, hash_code: u64, - null_bitmap: FixedBitSet, + null_bitmap: Set8, + null_bitmap_idx: usize, } impl HashKeySerializer for SerializedKeySerializer { @@ -585,20 +588,21 @@ impl HashKeySerializer for SerializedKeySerializer { Self { buffer: Vec::with_capacity(estimated_value_encoding_size), hash_code: hash_code.0, - null_bitmap: FixedBitSet::new(), + null_bitmap: Set8::empty(), + null_bitmap_idx: 0, } } fn append<'a, D: HashKeySerDe<'a>>(&mut self, data: Option) { let len_bitmap = self.null_bitmap.len(); - self.null_bitmap.grow(len_bitmap + 1); match data { Some(v) => { serialize_datum_into(&Some(v.to_owned_scalar().into()), &mut self.buffer); } None => { serialize_datum_into(&None, &mut self.buffer); - self.null_bitmap.insert(len_bitmap); + self.null_bitmap.add_inplace(self.null_bitmap_idx); + self.null_bitmap_idx += 1; } } } @@ -696,7 +700,7 @@ impl HashKey for FixedSizeKey { Ok(()) } - fn null_bitmap(&self) -> &FixedBitSet { + fn null_bitmap(&self) -> &Set8 { &self.null_bitmap } } @@ -726,7 +730,7 @@ impl HashKey for SerializedKey { Ok(()) } - fn null_bitmap(&self) -> &FixedBitSet { + fn null_bitmap(&self) -> &Set8 { &self.null_bitmap } } From 3dc8b20a3d6ce1d291c90fb00aacf9c16ac55a03 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Fri, 31 Mar 2023 12:08:29 +0800 Subject: [PATCH 03/24] fix remaining compat issues --- src/common/src/hash/key.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 620356535952..687bb6f8342c 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -158,7 +158,7 @@ pub trait HashKey: pub struct FixedSizeKey { key: [u8; N], hash_code: u64, - null_bitmap: FixedBitSet, + null_bitmap: Set8, } /// Designed for hash keys which can't be represented by [`FixedSizeKey`]. @@ -174,7 +174,8 @@ pub struct SerializedKey { impl EstimateSize for FixedSizeKey { fn estimated_heap_size(&self) -> usize { - self.null_bitmap.estimated_heap_size() + 8 + // self.null_bitmap.estimated_heap_size() } } @@ -493,7 +494,7 @@ impl<'a> HashKeySerDe<'a> for ListRef<'a> { pub struct FixedSizeKeySerializer { buffer: [u8; N], - null_bitmap: FixedBitSet, + null_bitmap: Set8, null_bitmap_idx: usize, data_len: usize, hash_code: u64, @@ -513,7 +514,7 @@ impl HashKeySerializer for FixedSizeKeySerializer { fn from_hash_code(hash_code: HashCode, _estimated_key_size: usize) -> Self { Self { buffer: [0u8; N], - null_bitmap: FixedBitSet::with_capacity(u8::BITS as usize), + null_bitmap: Set8::empty(), null_bitmap_idx: 0, data_len: 0, hash_code: hash_code.0, @@ -530,7 +531,7 @@ impl HashKeySerializer for FixedSizeKeySerializer { self.buffer[self.data_len..(self.data_len + ret.len())].copy_from_slice(ret); self.data_len += ret.len(); } - None => self.null_bitmap.insert(self.null_bitmap_idx), + None => { self.null_bitmap.add_inplace(self.null_bitmap_idx); } }; self.null_bitmap_idx += 1; } @@ -546,7 +547,7 @@ impl HashKeySerializer for FixedSizeKeySerializer { pub struct FixedSizeKeyDeserializer { cursor: Cursor<[u8; N]>, - null_bitmap: FixedBitSet, + null_bitmap: Set8, null_bitmap_idx: usize, } From 95dabcbfbb9c01486e789c0788f669497975c247 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Fri, 31 Mar 2023 12:38:48 +0800 Subject: [PATCH 04/24] wrap bitmap for easier refactoring --- src/common/src/hash/key.rs | 61 ++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 687bb6f8342c..158650ca2b93 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -43,6 +43,38 @@ use crate::util::iter_util::ZipEqFast; use crate::util::value_encoding::{deserialize_datum, serialize_datum_into}; use smallbitset::Set8; +/// Bitmap for null values in key. +/// This is specialized for key, +/// since it usually has few group keys. +#[repr(transparent)] +#[derive(Clone, Debug, PartialEq)] +pub struct NullBitmap { + inner: Set8, +} + +impl NullBitmap { + fn empty() -> Self { + NullBitmap { + inner: Set8::empty(), + } + } + fn is_empty(&self) -> bool { + self.inner.is_empty() + } + fn set_true(&mut self, idx: usize) { + self.inner.add_inplace(idx); + } + fn estimated_heap_size(&self) -> usize { + 8 + } + fn len(&mut self) -> usize { + self.inner.len() + } + fn contains(&mut self, x: usize) -> bool { + self.inner.contains(x) + } +} + /// A wrapper for u64 hash result. #[derive(Default, Clone, Copy, Debug, PartialEq)] pub struct HashCode(pub u64); @@ -147,7 +179,7 @@ pub trait HashKey: !self.null_bitmap().is_empty() } - fn null_bitmap(&self) -> &Set8; + fn null_bitmap(&self) -> &NullBitmap; } /// Designed for hash keys with at most `N` serialized bytes. @@ -158,7 +190,7 @@ pub trait HashKey: pub struct FixedSizeKey { key: [u8; N], hash_code: u64, - null_bitmap: Set8, + null_bitmap: NullBitmap, } /// Designed for hash keys which can't be represented by [`FixedSizeKey`]. @@ -169,13 +201,12 @@ pub struct SerializedKey { // Key encoding. key: Vec, hash_code: u64, - null_bitmap: Set8, + null_bitmap: NullBitmap, } impl EstimateSize for FixedSizeKey { fn estimated_heap_size(&self) -> usize { - 8 - // self.null_bitmap.estimated_heap_size() + self.null_bitmap.estimated_heap_size() } } @@ -195,7 +226,7 @@ impl Hash for FixedSizeKey { impl EstimateSize for SerializedKey { fn estimated_heap_size(&self) -> usize { - self.key.estimated_heap_size() + 8 //self.null_bitmap.estimated_heap_size() + self.key.estimated_heap_size() + self.null_bitmap.estimated_heap_size() } } @@ -494,7 +525,7 @@ impl<'a> HashKeySerDe<'a> for ListRef<'a> { pub struct FixedSizeKeySerializer { buffer: [u8; N], - null_bitmap: Set8, + null_bitmap: NullBitmap, null_bitmap_idx: usize, data_len: usize, hash_code: u64, @@ -514,7 +545,7 @@ impl HashKeySerializer for FixedSizeKeySerializer { fn from_hash_code(hash_code: HashCode, _estimated_key_size: usize) -> Self { Self { buffer: [0u8; N], - null_bitmap: Set8::empty(), + null_bitmap: NullBitmap::empty(), null_bitmap_idx: 0, data_len: 0, hash_code: hash_code.0, @@ -531,7 +562,7 @@ impl HashKeySerializer for FixedSizeKeySerializer { self.buffer[self.data_len..(self.data_len + ret.len())].copy_from_slice(ret); self.data_len += ret.len(); } - None => { self.null_bitmap.add_inplace(self.null_bitmap_idx); } + None => { self.null_bitmap.set_true(self.null_bitmap_idx); } }; self.null_bitmap_idx += 1; } @@ -547,7 +578,7 @@ impl HashKeySerializer for FixedSizeKeySerializer { pub struct FixedSizeKeyDeserializer { cursor: Cursor<[u8; N]>, - null_bitmap: Set8, + null_bitmap: NullBitmap, null_bitmap_idx: usize, } @@ -578,7 +609,7 @@ impl HashKeyDeserializer for FixedSizeKeyDeserializer { pub struct SerializedKeySerializer { buffer: Vec, hash_code: u64, - null_bitmap: Set8, + null_bitmap: NullBitmap, null_bitmap_idx: usize, } @@ -589,7 +620,7 @@ impl HashKeySerializer for SerializedKeySerializer { Self { buffer: Vec::with_capacity(estimated_value_encoding_size), hash_code: hash_code.0, - null_bitmap: Set8::empty(), + null_bitmap: NullBitmap::empty(), null_bitmap_idx: 0, } } @@ -602,7 +633,7 @@ impl HashKeySerializer for SerializedKeySerializer { } None => { serialize_datum_into(&None, &mut self.buffer); - self.null_bitmap.add_inplace(self.null_bitmap_idx); + self.null_bitmap.set_true(self.null_bitmap_idx); self.null_bitmap_idx += 1; } } @@ -701,7 +732,7 @@ impl HashKey for FixedSizeKey { Ok(()) } - fn null_bitmap(&self) -> &Set8 { + fn null_bitmap(&self) -> &NullBitmap { &self.null_bitmap } } @@ -731,7 +762,7 @@ impl HashKey for SerializedKey { Ok(()) } - fn null_bitmap(&self) -> &Set8 { + fn null_bitmap(&self) -> &NullBitmap { &self.null_bitmap } } From 1172cc6f83f12000e8806806d294318621afbab3 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Mon, 3 Apr 2023 16:16:26 +0800 Subject: [PATCH 05/24] fix review --- src/common/src/hash/key.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 158650ca2b93..5a703d5ee336 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -634,9 +634,9 @@ impl HashKeySerializer for SerializedKeySerializer { None => { serialize_datum_into(&None, &mut self.buffer); self.null_bitmap.set_true(self.null_bitmap_idx); - self.null_bitmap_idx += 1; } } + self.null_bitmap_idx += 1; } fn into_hash_key(self) -> SerializedKey { From 97a5ffee73164502ef53b563d9203c688e98be26 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Mon, 3 Apr 2023 16:29:57 +0800 Subject: [PATCH 06/24] stub is_subset --- src/common/src/hash/key.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 5a703d5ee336..51f7916ea3b2 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -67,12 +67,15 @@ impl NullBitmap { fn estimated_heap_size(&self) -> usize { 8 } - fn len(&mut self) -> usize { + fn len(&self) -> usize { self.inner.len() } - fn contains(&mut self, x: usize) -> bool { + fn contains(&self, x: usize) -> bool { self.inner.contains(x) } + pub fn is_subset(&self, other: &FixedBitSet) -> bool { + todo!() + } } /// A wrapper for u64 hash result. From adb9d637c47c6f3f6a26d7581ff89c9616d534ff Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Tue, 4 Apr 2023 12:43:14 +0800 Subject: [PATCH 07/24] try set64 --- src/common/src/hash/key.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 51f7916ea3b2..3b931ad11801 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -41,7 +41,7 @@ use crate::types::{DataType, Date, Decimal, ScalarRef, Time, Timestamp, F32, F64 use crate::util::hash_util::Crc32FastBuilder; use crate::util::iter_util::ZipEqFast; use crate::util::value_encoding::{deserialize_datum, serialize_datum_into}; -use smallbitset::Set8; +use smallbitset::{Set64, Set8}; /// Bitmap for null values in key. /// This is specialized for key, @@ -49,13 +49,13 @@ use smallbitset::Set8; #[repr(transparent)] #[derive(Clone, Debug, PartialEq)] pub struct NullBitmap { - inner: Set8, + inner: Set64, } impl NullBitmap { fn empty() -> Self { NullBitmap { - inner: Set8::empty(), + inner: Set64::empty(), } } fn is_empty(&self) -> bool { @@ -65,7 +65,7 @@ impl NullBitmap { self.inner.add_inplace(idx); } fn estimated_heap_size(&self) -> usize { - 8 + self.inner.capacity() } fn len(&self) -> usize { self.inner.len() From e960edb0bdde791f92fc48f8993d412c3c508c4a Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Tue, 4 Apr 2023 14:32:15 +0800 Subject: [PATCH 08/24] cargo check --- Cargo.lock | 1 + src/common/Cargo.toml | 2 +- src/common/src/hash/key.rs | 12 ++++++++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b642e4956631..3a6cd7d95ecb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8919,6 +8919,7 @@ dependencies = [ "memchr", "miniz_oxide", "multimap", + "num", "num-bigint", "num-integer", "num-traits", diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml index eb00a350143f..dc7b0189cdc4 100644 --- a/src/common/Cargo.toml +++ b/src/common/Cargo.toml @@ -21,7 +21,6 @@ arrow-schema = "36" async-trait = "0.1" auto_enums = "0.8" bitflags = "2" -smallbitset = "0.6.1" byteorder = "1" bytes = "1" chrono = { version = "0.4", default-features = false, features = [ @@ -66,6 +65,7 @@ serde = { version = "1", features = ["derive"] } serde_default = "0.1" serde_json = "1" serde_with = "2" +smallbitset = "0.6.1" static_assertions = "1" strum = "0.24" strum_macros = "0.24" diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 3b931ad11801..e231e5dbb3a8 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -29,6 +29,7 @@ use std::io::{Cursor, Read}; use chrono::{Datelike, Timelike}; use fixedbitset::FixedBitSet; +use smallbitset::{Set64, Set8}; use crate::array::serial_array::Serial; use crate::array::{ @@ -41,7 +42,6 @@ use crate::types::{DataType, Date, Decimal, ScalarRef, Time, Timestamp, F32, F64 use crate::util::hash_util::Crc32FastBuilder; use crate::util::iter_util::ZipEqFast; use crate::util::value_encoding::{deserialize_datum, serialize_datum_into}; -use smallbitset::{Set64, Set8}; /// Bitmap for null values in key. /// This is specialized for key, @@ -58,21 +58,27 @@ impl NullBitmap { inner: Set64::empty(), } } + fn is_empty(&self) -> bool { self.inner.is_empty() } + fn set_true(&mut self, idx: usize) { self.inner.add_inplace(idx); } + fn estimated_heap_size(&self) -> usize { self.inner.capacity() } + fn len(&self) -> usize { self.inner.len() } + fn contains(&self, x: usize) -> bool { self.inner.contains(x) } + pub fn is_subset(&self, other: &FixedBitSet) -> bool { todo!() } @@ -565,7 +571,9 @@ impl HashKeySerializer for FixedSizeKeySerializer { self.buffer[self.data_len..(self.data_len + ret.len())].copy_from_slice(ret); self.data_len += ret.len(); } - None => { self.null_bitmap.set_true(self.null_bitmap_idx); } + None => { + self.null_bitmap.set_true(self.null_bitmap_idx); + } }; self.null_bitmap_idx += 1; } From 8ddf959941ba67962ef51cdf20b265a3d8984d29 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Tue, 4 Apr 2023 14:36:02 +0800 Subject: [PATCH 09/24] fix clippy --- src/common/src/hash/key.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index e231e5dbb3a8..6699343167b0 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -29,7 +29,7 @@ use std::io::{Cursor, Read}; use chrono::{Datelike, Timelike}; use fixedbitset::FixedBitSet; -use smallbitset::{Set64, Set8}; +use smallbitset::Set64; use crate::array::serial_array::Serial; use crate::array::{ @@ -79,7 +79,7 @@ impl NullBitmap { self.inner.contains(x) } - pub fn is_subset(&self, other: &FixedBitSet) -> bool { + pub fn is_subset(&self, _other: &FixedBitSet) -> bool { todo!() } } @@ -637,7 +637,7 @@ impl HashKeySerializer for SerializedKeySerializer { } fn append<'a, D: HashKeySerDe<'a>>(&mut self, data: Option) { - let len_bitmap = self.null_bitmap.len(); + let _len_bitmap = self.null_bitmap.len(); match data { Some(v) => { serialize_datum_into(&Some(v.to_owned_scalar().into()), &mut self.buffer); From 9de2ed221e399e62b95cae599d2fd35f6b62321b Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Tue, 4 Apr 2023 15:18:26 +0800 Subject: [PATCH 10/24] add interface for subset --- src/common/src/hash/key.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 6699343167b0..d042be4a7a74 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -79,8 +79,20 @@ impl NullBitmap { self.inner.contains(x) } - pub fn is_subset(&self, _other: &FixedBitSet) -> bool { - todo!() + pub fn is_subset(&self, other: &NullBitmap) -> bool { + self.inner.contains_all(other.inner) + } +} + +impl From> for NullBitmap { + fn from(value: Vec) -> Self { + let mut bitmap = NullBitmap::empty(); + for (idx, is_true) in value.into_iter().enumerate() { + if is_true { + null_matched.add_inplace(idx); + } + } + bitmap } } From 351a9bd6ebb49e67242744544c38356329d0f58d Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Tue, 4 Apr 2023 15:45:21 +0800 Subject: [PATCH 11/24] fix nullbitmap breakages --- src/batch/src/executor/join/hash_join.rs | 11 +++-------- src/batch/src/executor/join/lookup_join_base.rs | 12 +++--------- src/common/src/hash/key.rs | 4 ++-- src/stream/src/executor/hash_join.rs | 12 +++--------- src/stream/src/executor/managed_state/join/mod.rs | 10 +++++----- 5 files changed, 16 insertions(+), 33 deletions(-) diff --git a/src/batch/src/executor/join/hash_join.rs b/src/batch/src/executor/join/hash_join.rs index bb4fc4ec5cc6..fca7f85c9241 100644 --- a/src/batch/src/executor/join/hash_join.rs +++ b/src/batch/src/executor/join/hash_join.rs @@ -18,7 +18,7 @@ use std::iter::empty; use std::marker::PhantomData; use std::sync::Arc; -use fixedbitset::FixedBitSet; + use futures_async_stream::try_stream; use itertools::Itertools; use risingwave_common::array::{Array, DataChunk, RowRef}; @@ -232,13 +232,8 @@ impl HashJoinExecutor { JoinHashMap::with_capacity_and_hasher(build_row_count, PrecomputedBuildHasher); let mut next_build_row_with_same_key = ChunkedData::with_chunk_sizes(build_side.iter().map(|c| c.capacity()))?; - let null_matched = { - let mut null_matched = FixedBitSet::with_capacity(self.null_matched.len()); - for (idx, col_null_matched) in self.null_matched.into_iter().enumerate() { - null_matched.set(idx, col_null_matched); - } - null_matched - }; + + let null_matched = self.null_matched.into(); // Build hash map for (build_chunk_id, build_chunk) in build_side.iter().enumerate() { diff --git a/src/batch/src/executor/join/lookup_join_base.rs b/src/batch/src/executor/join/lookup_join_base.rs index 7dccc98a1eb7..28661524431a 100644 --- a/src/batch/src/executor/join/lookup_join_base.rs +++ b/src/batch/src/executor/join/lookup_join_base.rs @@ -14,14 +14,14 @@ use std::marker::PhantomData; -use fixedbitset::FixedBitSet; + use futures::StreamExt; use futures_async_stream::try_stream; use itertools::Itertools; use risingwave_common::array::DataChunk; use risingwave_common::catalog::Schema; use risingwave_common::error::RwError; -use risingwave_common::hash::{HashKey, PrecomputedBuildHasher}; +use risingwave_common::hash::{HashKey, NullBitmap, PrecomputedBuildHasher}; use risingwave_common::row::Row; use risingwave_common::types::{DataType, ToOwnedDatum}; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; @@ -67,13 +67,7 @@ impl LookupJoinBase { pub async fn do_execute(mut self: Box) { let outer_side_schema = self.outer_side_input.schema().clone(); - let null_matched = { - let mut null_matched = FixedBitSet::with_capacity(self.null_safe.len()); - for (idx, col_null_matched) in self.null_safe.iter().copied().enumerate() { - null_matched.set(idx, col_null_matched); - } - null_matched - }; + let null_matched: NullBitmap = self.null_safe.into(); let mut outer_side_batch_read_stream: BoxedDataChunkListStream = utils::batch_read(self.outer_side_input.execute(), AT_LEAST_OUTER_SIDE_ROWS); diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index d042be4a7a74..6573eec4ba49 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -28,7 +28,7 @@ use std::hash::{BuildHasher, Hash, Hasher}; use std::io::{Cursor, Read}; use chrono::{Datelike, Timelike}; -use fixedbitset::FixedBitSet; + use smallbitset::Set64; use crate::array::serial_array::Serial; @@ -89,7 +89,7 @@ impl From> for NullBitmap { let mut bitmap = NullBitmap::empty(); for (idx, is_true) in value.into_iter().enumerate() { if is_true { - null_matched.add_inplace(idx); + bitmap.set_true(idx); } } bitmap diff --git a/src/stream/src/executor/hash_join.rs b/src/stream/src/executor/hash_join.rs index 75e9b5949e08..2991e5db8263 100644 --- a/src/stream/src/executor/hash_join.rs +++ b/src/stream/src/executor/hash_join.rs @@ -17,14 +17,14 @@ use std::sync::Arc; use std::time::Duration; use await_tree::InstrumentAwait; -use fixedbitset::FixedBitSet; + use futures::{pin_mut, StreamExt}; use futures_async_stream::try_stream; use itertools::Itertools; use multimap::MultiMap; use risingwave_common::array::{Op, RowRef, StreamChunk}; use risingwave_common::catalog::Schema; -use risingwave_common::hash::HashKey; +use risingwave_common::hash::{HashKey, NullBitmap}; use risingwave_common::row::{OwnedRow, Row}; use risingwave_common::types::{DataType, ToOwnedDatum}; use risingwave_common::util::epoch::EpochPair; @@ -538,13 +538,7 @@ impl HashJoinExecutor { /// Data types of the join key columns join_key_data_types: Vec, /// Null safe bitmap for each join pair - null_matched: FixedBitSet, + null_matched: NullBitmap, /// The memcomparable serializer of primary key. pk_serializer: OrderedRowSerde, /// State table. Contains the data from upstream. @@ -248,7 +248,7 @@ impl JoinHashMap { degree_all_data_types: Vec, degree_table: StateTable, degree_pk_indices: Vec, - null_matched: FixedBitSet, + null_matched: NullBitmap, need_degree_table: bool, pk_contained_in_jk: bool, metrics: Arc, @@ -562,7 +562,7 @@ impl JoinHashMap { self.inner.len() } - pub fn null_matched(&self) -> &FixedBitSet { + pub fn null_matched(&self) -> &NullBitmap { &self.null_matched } } From bf7a623158e6bfbd24a783394cd814def75c5092 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Tue, 4 Apr 2023 15:46:30 +0800 Subject: [PATCH 12/24] fmt --- src/batch/src/executor/join/hash_join.rs | 1 - src/batch/src/executor/join/lookup_join_base.rs | 1 - src/common/src/hash/key.rs | 1 - src/stream/src/executor/hash_join.rs | 1 - src/stream/src/executor/managed_state/join/mod.rs | 1 - 5 files changed, 5 deletions(-) diff --git a/src/batch/src/executor/join/hash_join.rs b/src/batch/src/executor/join/hash_join.rs index fca7f85c9241..2b4957d428b8 100644 --- a/src/batch/src/executor/join/hash_join.rs +++ b/src/batch/src/executor/join/hash_join.rs @@ -18,7 +18,6 @@ use std::iter::empty; use std::marker::PhantomData; use std::sync::Arc; - use futures_async_stream::try_stream; use itertools::Itertools; use risingwave_common::array::{Array, DataChunk, RowRef}; diff --git a/src/batch/src/executor/join/lookup_join_base.rs b/src/batch/src/executor/join/lookup_join_base.rs index 28661524431a..83c14bb10bb4 100644 --- a/src/batch/src/executor/join/lookup_join_base.rs +++ b/src/batch/src/executor/join/lookup_join_base.rs @@ -14,7 +14,6 @@ use std::marker::PhantomData; - use futures::StreamExt; use futures_async_stream::try_stream; use itertools::Itertools; diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 6573eec4ba49..a00310f5d8b8 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -28,7 +28,6 @@ use std::hash::{BuildHasher, Hash, Hasher}; use std::io::{Cursor, Read}; use chrono::{Datelike, Timelike}; - use smallbitset::Set64; use crate::array::serial_array::Serial; diff --git a/src/stream/src/executor/hash_join.rs b/src/stream/src/executor/hash_join.rs index 2991e5db8263..c7c834ff06d7 100644 --- a/src/stream/src/executor/hash_join.rs +++ b/src/stream/src/executor/hash_join.rs @@ -17,7 +17,6 @@ use std::sync::Arc; use std::time::Duration; use await_tree::InstrumentAwait; - use futures::{pin_mut, StreamExt}; use futures_async_stream::try_stream; use itertools::Itertools; diff --git a/src/stream/src/executor/managed_state/join/mod.rs b/src/stream/src/executor/managed_state/join/mod.rs index feb3b2de88df..70549c8435ff 100644 --- a/src/stream/src/executor/managed_state/join/mod.rs +++ b/src/stream/src/executor/managed_state/join/mod.rs @@ -18,7 +18,6 @@ use std::alloc::Global; use std::ops::{Deref, DerefMut}; use std::sync::Arc; - use futures::future::try_join; use futures::StreamExt; use futures_async_stream::for_await; From f750b08d1357b45c2757e1b76115b39093351c2d Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Tue, 4 Apr 2023 16:09:36 +0800 Subject: [PATCH 13/24] improve description --- src/common/benches/bench_hash_key_encoding.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/benches/bench_hash_key_encoding.rs b/src/common/benches/bench_hash_key_encoding.rs index a9c8bffc9e77..f6e65b8a8a7e 100644 --- a/src/common/benches/bench_hash_key_encoding.rs +++ b/src/common/benches/bench_hash_key_encoding.rs @@ -194,11 +194,11 @@ fn case_builders() -> Vec { }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Int32, DataType::Int32, DataType::Int32], - describe: "composite fixed".to_string(), + describe: "composite fixed, case 1".to_string(), }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Int32, DataType::Int64, DataType::Int32], - describe: "composite fixed".to_string(), + describe: "composite fixed, case 2".to_string(), }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Int32, DataType::Varchar], From fd1f96610ccfb0e5cb1f1ca4721a8f62515aeb5f Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 5 Apr 2023 14:01:36 +0800 Subject: [PATCH 14/24] fix deps --- Cargo.lock | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 3a6cd7d95ecb..b642e4956631 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8919,7 +8919,6 @@ dependencies = [ "memchr", "miniz_oxide", "multimap", - "num", "num-bigint", "num-integer", "num-traits", From afce304abd3fd4dd624832335a0795debdf1a95f Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 5 Apr 2023 15:48:44 +0800 Subject: [PATCH 15/24] make benchmarks easier to filter by row --- src/common/benches/bench_hash_key_encoding.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/benches/bench_hash_key_encoding.rs b/src/common/benches/bench_hash_key_encoding.rs index f6e65b8a8a7e..c9f19e9c1815 100644 --- a/src/common/benches/bench_hash_key_encoding.rs +++ b/src/common/benches/bench_hash_key_encoding.rs @@ -50,10 +50,10 @@ impl HashKeyDispatcher for HashKeyBenchCaseBuilder { for null_ratio in NULL_RATIOS { for chunk_size in CHUNK_SIZES { let id = format!( - "{} {:?}, {} rows, Pr[null]={}", + "{} rows, {} {:?}, Pr[null]={}", + chunk_size, self.describe, calc_hash_key_kind(self.data_types()), - chunk_size, null_ratio ); let input_chunk = gen_chunk(self.data_types(), *chunk_size, SEED, *null_ratio); From 5ab3953daa284859ec813263990880559aa616cb Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 5 Apr 2023 15:56:36 +0800 Subject: [PATCH 16/24] update deps --- Cargo.lock | 1 + src/workspace-hack/Cargo.toml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index b642e4956631..3a6cd7d95ecb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8919,6 +8919,7 @@ dependencies = [ "memchr", "miniz_oxide", "multimap", + "num", "num-bigint", "num-integer", "num-traits", diff --git a/src/workspace-hack/Cargo.toml b/src/workspace-hack/Cargo.toml index d1d57fd0ac90..d14210d3f116 100644 --- a/src/workspace-hack/Cargo.toml +++ b/src/workspace-hack/Cargo.toml @@ -64,6 +64,7 @@ madsim-tokio = { version = "0.2", default-features = false, features = ["fs", "i memchr = { version = "2" } miniz_oxide = { version = "0.6", default-features = false, features = ["with-alloc"] } multimap = { version = "0.8" } +num = { version = "0.4" } num-bigint = { version = "0.4" } num-integer = { version = "0.1", features = ["i128"] } num-traits = { version = "0.2", features = ["i128", "libm"] } @@ -158,6 +159,7 @@ madsim-tokio = { version = "0.2", default-features = false, features = ["fs", "i memchr = { version = "2" } miniz_oxide = { version = "0.6", default-features = false, features = ["with-alloc"] } multimap = { version = "0.8" } +num = { version = "0.4" } num-bigint = { version = "0.4" } num-integer = { version = "0.1", features = ["i128"] } num-traits = { version = "0.2", features = ["i128", "libm"] } From 05919c0a54a7eb36116f4ac50183a9294b4ddf01 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 5 Apr 2023 16:31:03 +0800 Subject: [PATCH 17/24] add 64 group key limit to frontend --- src/common/src/hash/key.rs | 2 ++ src/frontend/planner_test/tests/testdata/agg.yaml | 15 +++++++++++++++ src/frontend/src/binder/select.rs | 10 ++++++++++ 3 files changed, 27 insertions(+) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index a00310f5d8b8..7e0824840a8d 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -42,6 +42,8 @@ use crate::util::hash_util::Crc32FastBuilder; use crate::util::iter_util::ZipEqFast; use crate::util::value_encoding::{deserialize_datum, serialize_datum_into}; +pub static MAX_GROUP_KEYS: usize = 64; + /// Bitmap for null values in key. /// This is specialized for key, /// since it usually has few group keys. diff --git a/src/frontend/planner_test/tests/testdata/agg.yaml b/src/frontend/planner_test/tests/testdata/agg.yaml index 3d2ff0f1f709..a2011360ca33 100644 --- a/src/frontend/planner_test/tests/testdata/agg.yaml +++ b/src/frontend/planner_test/tests/testdata/agg.yaml @@ -1247,3 +1247,18 @@ BatchExchange { order: [idx_desc.a DESC], dist: Single } └─BatchSortAgg { group_key: [idx_desc.a], aggs: [count] } └─BatchScan { table: idx_desc, columns: [idx_desc.a], distribution: UpstreamHashShard(idx_desc.a) } +- name: sort agg on an ascending index + sql: | + create table t (a int, b int); + create index idx_asc on t(a asc); + create index idx_desc on t(a desc); + select a, count(*) cnt from t group by a order by a asc; + batch_plan: | + BatchExchange { order: [idx_asc.a ASC], dist: Single } + └─BatchSortAgg { group_key: [idx_asc.a], aggs: [count] } + └─BatchScan { table: idx_asc, columns: [idx_asc.a], distribution: UpstreamHashShard(idx_asc.a) } +- name: max group keys exceed 64 + sql: | + create table t(v1 int, v2 int, v3 int, v4 int, v5 int, v6 int, v7 int, v8 int, v9 int, v10 int, v11 int, v12 int, v13 int, v14 int, v15 int, v16 int, v17 int, v18 int, v19 int, v20 int, v21 int, v22 int, v23 int, v24 int, v25 int, v26 int, v27 int, v28 int, v29 int, v30 int, v31 int, v32 int, v33 int, v34 int, v35 int, v36 int, v37 int, v38 int, v39 int, v40 int, v41 int, v42 int, v43 int, v44 int, v45 int, v46 int, v47 int, v48 int, v49 int, v50 int, v51 int, v52 int, v53 int, v54 int, v55 int, v56 int, v57 int, v58 int, v59 int, v60 int, v61 int, v62 int, v63 int, v64 int, v65 int); + select * from t group by v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63, v64, v65; + binder_error: 'Bind error: Number of Group Keys: 65, exceeded maximum: 64' diff --git a/src/frontend/src/binder/select.rs b/src/frontend/src/binder/select.rs index ad1f11356175..eacf17b501a3 100644 --- a/src/frontend/src/binder/select.rs +++ b/src/frontend/src/binder/select.rs @@ -16,7 +16,9 @@ use std::fmt::Debug; use itertools::Itertools; use risingwave_common::catalog::{Field, Schema, PG_CATALOG_SCHEMA_NAME}; +use risingwave_common::error::ErrorCode::BindError; use risingwave_common::error::{ErrorCode, Result, RwError}; +use risingwave_common::hash::MAX_GROUP_KEYS; use risingwave_common::types::DataType; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_sqlparser::ast::{DataType as AstDataType, Distinct, Expr, Select, SelectItem}; @@ -178,6 +180,14 @@ impl Binder { // Bind GROUP BY clause. self.context.clause = Some(Clause::GroupBy); + let number_of_group_keys = select.group_by.len(); + if number_of_group_keys > MAX_GROUP_KEYS { + return Err(BindError(format!( + "Number of Group Keys: {}, exceeded maximum: {}", + number_of_group_keys, MAX_GROUP_KEYS, + )) + .into()); + } let group_by = select .group_by .into_iter() From f4b8799afbd577561da1af26cb8c4f44c0d2e7e6 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 5 Apr 2023 16:32:19 +0800 Subject: [PATCH 18/24] clean --- src/frontend/planner_test/tests/testdata/agg.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/frontend/planner_test/tests/testdata/agg.yaml b/src/frontend/planner_test/tests/testdata/agg.yaml index a2011360ca33..4c97695dcb6f 100644 --- a/src/frontend/planner_test/tests/testdata/agg.yaml +++ b/src/frontend/planner_test/tests/testdata/agg.yaml @@ -1247,16 +1247,6 @@ BatchExchange { order: [idx_desc.a DESC], dist: Single } └─BatchSortAgg { group_key: [idx_desc.a], aggs: [count] } └─BatchScan { table: idx_desc, columns: [idx_desc.a], distribution: UpstreamHashShard(idx_desc.a) } -- name: sort agg on an ascending index - sql: | - create table t (a int, b int); - create index idx_asc on t(a asc); - create index idx_desc on t(a desc); - select a, count(*) cnt from t group by a order by a asc; - batch_plan: | - BatchExchange { order: [idx_asc.a ASC], dist: Single } - └─BatchSortAgg { group_key: [idx_asc.a], aggs: [count] } - └─BatchScan { table: idx_asc, columns: [idx_asc.a], distribution: UpstreamHashShard(idx_asc.a) } - name: max group keys exceed 64 sql: | create table t(v1 int, v2 int, v3 int, v4 int, v5 int, v6 int, v7 int, v8 int, v9 int, v10 int, v11 int, v12 int, v13 int, v14 int, v15 int, v16 int, v17 int, v18 int, v19 int, v20 int, v21 int, v22 int, v23 int, v24 int, v25 int, v26 int, v27 int, v28 int, v29 int, v30 int, v31 int, v32 int, v33 int, v34 int, v35 int, v36 int, v37 int, v38 int, v39 int, v40 int, v41 int, v42 int, v43 int, v44 int, v45 int, v46 int, v47 int, v48 int, v49 int, v50 int, v51 int, v52 int, v53 int, v54 int, v55 int, v56 int, v57 int, v58 int, v59 int, v60 int, v61 int, v62 int, v63 int, v64 int, v65 int); From e9adce68d118f29220519c4b8f96f8f07be087fc Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 5 Apr 2023 16:44:58 +0800 Subject: [PATCH 19/24] fix is_subset --- src/common/src/hash/key.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 7e0824840a8d..e20b8e4ede1b 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -81,7 +81,7 @@ impl NullBitmap { } pub fn is_subset(&self, other: &NullBitmap) -> bool { - self.inner.contains_all(other.inner) + other.inner.contains_all(self.inner) } } From 340afe5ae1af9816204ed3256409412052669893 Mon Sep 17 00:00:00 2001 From: Noel Kwan <47273164+kwannoel@users.noreply.github.com> Date: Thu, 6 Apr 2023 12:42:16 +0800 Subject: [PATCH 20/24] Update src/common/src/hash/key.rs Co-authored-by: Bugen Zhao --- src/common/src/hash/key.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index e20b8e4ede1b..f5d732605f25 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -85,7 +85,7 @@ impl NullBitmap { } } -impl From> for NullBitmap { +impl> From for NullBitmap { fn from(value: Vec) -> Self { let mut bitmap = NullBitmap::empty(); for (idx, is_true) in value.into_iter().enumerate() { From d6121fe69ef8cb3585b51e04a82e162bc0242d26 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 6 Apr 2023 12:46:39 +0800 Subject: [PATCH 21/24] fix review comments --- src/common/src/hash/key.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index f5d732605f25..1451ebfde970 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -68,10 +68,6 @@ impl NullBitmap { self.inner.add_inplace(idx); } - fn estimated_heap_size(&self) -> usize { - self.inner.capacity() - } - fn len(&self) -> usize { self.inner.len() } @@ -85,6 +81,12 @@ impl NullBitmap { } } +impl EstimateSize for NullBitmap { + fn estimated_heap_size(&self) -> usize { + 0 + } +} + impl> From for NullBitmap { fn from(value: Vec) -> Self { let mut bitmap = NullBitmap::empty(); @@ -207,7 +209,6 @@ pub trait HashKey: /// Designed for hash keys with at most `N` serialized bytes. /// /// See [`crate::hash::calc_hash_key_kind`] -/// TODO(kwannoel): add benchmark and test, this should also use small bitset. #[derive(Clone, Debug)] pub struct FixedSizeKey { key: [u8; N], From ce3fcc7d0d13b57646593b0c35829d671a1dfbcb Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 6 Apr 2023 12:59:01 +0800 Subject: [PATCH 22/24] fix --- src/common/src/hash/key.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 1451ebfde970..8e7ba2862296 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -87,8 +87,8 @@ impl EstimateSize for NullBitmap { } } -impl> From for NullBitmap { - fn from(value: Vec) -> Self { +impl + IntoIterator> From for NullBitmap { + fn from(value: T) -> Self { let mut bitmap = NullBitmap::empty(); for (idx, is_true) in value.into_iter().enumerate() { if is_true { From a4b54c610e326855e036584bc9f366cc7d30c977 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 6 Apr 2023 17:53:59 +0800 Subject: [PATCH 23/24] clean --- src/common/src/hash/key.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index 8e7ba2862296..e484604ac3b1 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -651,7 +651,6 @@ impl HashKeySerializer for SerializedKeySerializer { } fn append<'a, D: HashKeySerDe<'a>>(&mut self, data: Option) { - let _len_bitmap = self.null_bitmap.len(); match data { Some(v) => { serialize_datum_into(&Some(v.to_owned_scalar().into()), &mut self.buffer); From a9abe56387e56ea3812a4c4eeed3d8f056e37231 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Thu, 6 Apr 2023 18:12:42 +0800 Subject: [PATCH 24/24] fix clippy --- src/common/src/hash/key.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index e484604ac3b1..608999fec600 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -68,10 +68,6 @@ impl NullBitmap { self.inner.add_inplace(idx); } - fn len(&self) -> usize { - self.inner.len() - } - fn contains(&self, x: usize) -> bool { self.inner.contains(x) }