Skip to content

Commit

Permalink
feat(storage): add record_first_key option to ColumnBuilderOptions
Browse files Browse the repository at this point in the history
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
  • Loading branch information
adlternative committed Apr 9, 2022
1 parent 5301767 commit 8bcc46e
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 17 deletions.
3 changes: 3 additions & 0 deletions proto/src/proto/rowset.proto
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ message BlockIndex {

// Statistics of the block.
repeated BlockStatistics stats = 7;

// If first_key is null
bool is_first_key_null = 8;
}

// An entry of a delete record.
Expand Down
5 changes: 3 additions & 2 deletions src/storage/secondary/block/block_index_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,16 @@ impl BlockIndexBuilder {
column_data: &mut Vec<u8>,
block_data: &mut Vec<u8>,
stats: Vec<BlockStatistics>,
first_key: &mut Vec<u8>,
first_key: Option<Vec<u8>>,
) {
self.indexes.push(BlockIndex {
offset: column_data.len() as u64,
length: block_data.len() as u64 + BLOCK_HEADER_SIZE as u64,
first_rowid: self.last_row_count as u32,
row_count: (self.row_count - self.last_row_count) as u32,
/// TODO(chi): support sort key
first_key: first_key.drain(..).collect(),
is_first_key_null: first_key.is_none(),
first_key: first_key.unwrap_or_default(),
stats,
});

Expand Down
11 changes: 6 additions & 5 deletions src/storage/secondary/column/blob_column_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use super::super::{BlockBuilder, BlockIndexBuilder, PlainBlobBlockBuilder};
use super::{append_one_by_one, ColumnBuilder};
use crate::array::{Array, BlobArray};
use crate::storage::secondary::block::RleBlockBuilder;
use crate::storage::secondary::encode::BlobEncode;
use crate::storage::secondary::ColumnBuilderOptions;
use crate::types::BlobRef;

Expand All @@ -28,7 +29,7 @@ pub struct BlobColumnBuilder {
block_index_builder: BlockIndexBuilder,

/// First key
first_key: Vec<u8>,
first_key: Option<Vec<u8>>,
}

impl BlobColumnBuilder {
Expand All @@ -38,7 +39,7 @@ impl BlobColumnBuilder {
block_index_builder: BlockIndexBuilder::new(options.clone()),
options,
current_builder: None,
first_key: vec![],
first_key: None,
}
}

Expand All @@ -65,7 +66,7 @@ impl BlobColumnBuilder {
&mut self.data,
&mut block_data,
stats,
&mut self.first_key,
self.first_key.clone(),
);
}
}
Expand All @@ -90,8 +91,8 @@ impl ColumnBuilder<BlobArray> for BlobColumnBuilder {
));
}
if let Some(to_be_appended) = iter.peek() {
if to_be_appended.is_some() {
self.first_key = to_be_appended.unwrap().iter().cloned().collect();
if self.options.record_first_key {
self.first_key = to_be_appended.map(|x| x.to_byte_slice().to_vec());
}
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/storage/secondary/column/char_column_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub struct CharColumnBuilder {
char_width: Option<u64>,

/// First key
first_key: Vec<u8>,
first_key: Option<Vec<u8>>,
}

impl CharColumnBuilder {
Expand All @@ -47,7 +47,7 @@ impl CharColumnBuilder {
current_builder: None,
nullable,
char_width,
first_key: vec![],
first_key: None,
}
}

Expand Down Expand Up @@ -84,7 +84,7 @@ impl CharColumnBuilder {
&mut self.data,
&mut block_data,
stats,
&mut self.first_key,
self.first_key.clone(),
);
}
}
Expand Down Expand Up @@ -141,8 +141,8 @@ impl ColumnBuilder<Utf8Array> for CharColumnBuilder {
}

if let Some(to_be_appended) = iter.peek() {
if to_be_appended.is_some() {
self.first_key = to_be_appended.unwrap().as_bytes().to_vec();
if self.options.record_first_key {
self.first_key = to_be_appended.map(|x| x.as_bytes().to_vec());
}
}
}
Expand Down
53 changes: 48 additions & 5 deletions src/storage/secondary/column/primitive_column_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub struct PrimitiveColumnBuilder<T: PrimitiveFixedWidthEncode> {
block_index_builder: BlockIndexBuilder,

/// First key
first_key: Vec<u8>,
first_key: Option<Vec<u8>>,
}

impl<T: PrimitiveFixedWidthEncode> PrimitiveColumnBuilder<T> {
Expand All @@ -58,7 +58,7 @@ impl<T: PrimitiveFixedWidthEncode> PrimitiveColumnBuilder<T> {
options,
current_builder: None,
nullable,
first_key: vec![],
first_key: None,
}
}

Expand Down Expand Up @@ -93,7 +93,7 @@ impl<T: PrimitiveFixedWidthEncode> PrimitiveColumnBuilder<T> {
&mut self.data,
&mut block_data,
stats,
&mut self.first_key,
self.first_key.clone(),
);
}
}
Expand Down Expand Up @@ -171,8 +171,12 @@ impl<T: PrimitiveFixedWidthEncode> ColumnBuilder<T::ArrayType> for PrimitiveColu
}

if let Some(to_be_appended) = iter.peek() {
if to_be_appended.is_some() {
to_be_appended.unwrap().encode(&mut self.first_key);
if self.options.record_first_key {
self.first_key = to_be_appended.map(|x| {
let mut first_key = vec![];
x.encode(&mut first_key);
first_key
});
}
}
}
Expand Down Expand Up @@ -293,4 +297,43 @@ mod tests {
}
assert_eq!(builder.finish().0.len(), 2);
}

#[test]
fn test_i32_block_index_first_key() {
let item_each_block = (128 - 16) / 4;

// Test for first key
let mut builder =
I32ColumnBuilder::new(true, ColumnBuilderOptions::record_first_key_test());
for _ in 0..10 {
builder.append(&I32Array::from_iter(
[Some(1)].iter().cycle().cloned().take(item_each_block),
));
}

let (index, _) = builder.finish();
assert_eq!(index.len(), 11);

let mut f2: &[u8];
for item in index {
f2 = &item.first_key;
let x: i32 = PrimitiveFixedWidthEncode::decode(&mut f2);
assert_eq!(x, 1);
}

// Test for null first key
let mut builder =
I32ColumnBuilder::new(true, ColumnBuilderOptions::record_first_key_test());
for _ in 0..10 {
builder.append(&I32Array::from_iter(
[None].iter().cycle().cloned().take(item_each_block),
));
}
let (index, _) = builder.finish();
assert_eq!(index.len(), 11);

for item in index {
assert!(item.is_first_key_null);
}
}
}
22 changes: 22 additions & 0 deletions src/storage/secondary/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ pub struct StorageOptions {

/// Whether using run-length encoding
pub is_rle: bool,

/// Whether record first_key of each block into block_index
pub record_first_key: bool,
}

impl StorageOptions {
Expand All @@ -70,6 +73,7 @@ impl StorageOptions {
},
checksum_type: ChecksumType::Crc32,
is_rle: false,
record_first_key: false,
}
}

Expand All @@ -82,6 +86,7 @@ impl StorageOptions {
io_backend: IOBackend::in_memory(),
checksum_type: ChecksumType::None,
is_rle: false,
record_first_key: false,
}
}
}
Expand All @@ -97,6 +102,9 @@ pub struct ColumnBuilderOptions {

/// Whether using run-length encoding
pub is_rle: bool,

/// Whether record first_key of each block
pub record_first_key: bool,
}

impl ColumnBuilderOptions {
Expand All @@ -105,6 +113,7 @@ impl ColumnBuilderOptions {
target_block_size: options.target_block_size,
checksum_type: options.checksum_type,
is_rle: options.is_rle,
record_first_key: options.record_first_key,
}
}

Expand All @@ -114,6 +123,7 @@ impl ColumnBuilderOptions {
target_block_size: 4096,
checksum_type: ChecksumType::Crc32,
is_rle: false,
record_first_key: false,
}
}

Expand All @@ -123,6 +133,7 @@ impl ColumnBuilderOptions {
target_block_size: 128,
checksum_type: ChecksumType::None,
is_rle: false,
record_first_key: false,
}
}

Expand All @@ -132,6 +143,17 @@ impl ColumnBuilderOptions {
target_block_size: 128,
checksum_type: ChecksumType::None,
is_rle: true,
record_first_key: false,
}
}

#[cfg(test)]
pub fn record_first_key_test() -> Self {
Self {
target_block_size: 128,
checksum_type: ChecksumType::None,
is_rle: false,
record_first_key: true,
}
}
}

0 comments on commit 8bcc46e

Please sign in to comment.