From bb08bce65db6a97c00dd053607107d042d4ef302 Mon Sep 17 00:00:00 2001 From: kikkon Date: Sun, 16 Oct 2022 13:50:18 +0800 Subject: [PATCH 1/2] feat(storage): use dict encoding Signed-off-by: kikkon --- src/storage/secondary/block/dict_block_builder.rs | 9 +++++++-- src/storage/secondary/compactor.rs | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/storage/secondary/block/dict_block_builder.rs b/src/storage/secondary/block/dict_block_builder.rs index 3d473ca6..67fe6bdd 100644 --- a/src/storage/secondary/block/dict_block_builder.rs +++ b/src/storage/secondary/block/dict_block_builder.rs @@ -4,6 +4,7 @@ use std::collections::HashMap; use std::hash::Hash; use bytes::BufMut; +use risinglight_proto::rowset::block_statistics::BlockStatisticsType; use risinglight_proto::rowset::BlockStatistics; use super::PlainPrimitiveBlockBuilder; @@ -79,8 +80,12 @@ where } fn get_statistics(&self) -> Vec { - // Tracking issue: https://github.com/risinglightdb/risinglight/issues/674 - vec![] + let distinct_count = self.dict_map.len() as u64; + let distinct_stat = BlockStatistics { + block_stat_type: BlockStatisticsType::DistinctValue as i32, + body: distinct_count.to_le_bytes().to_vec(), + }; + vec![distinct_stat] } fn should_finish(&self, next_item: &Option<&A::Item>) -> bool { diff --git a/src/storage/secondary/compactor.rs b/src/storage/secondary/compactor.rs index d750dc68..e09f987e 100644 --- a/src/storage/secondary/compactor.rs +++ b/src/storage/secondary/compactor.rs @@ -130,7 +130,7 @@ impl Compactor { let mut builder = if distinct_value < row_count / 5 { let mut column_options = ColumnBuilderOptions::from_storage_options(&table.storage_options); - column_options.encode_type = EncodeType::RunLength; + column_options.encode_type = EncodeType::Dictionary; RowsetBuilder::new(table.columns.clone(), column_options) } else { RowsetBuilder::new( From 5295d1245105b163dfca9fc537244f70a2b8d530 Mon Sep 17 00:00:00 2001 From: kikkon Date: Fri, 21 Oct 2022 00:14:12 +0800 Subject: [PATCH 2/2] [WIP] feat(storage): use dict encoding --- .../column/primitive_column_factory.rs | 30 ++++++++++++++++++- src/storage/secondary/compactor.rs | 2 +- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/storage/secondary/column/primitive_column_factory.rs b/src/storage/secondary/column/primitive_column_factory.rs index a43954c9..d85d4f03 100644 --- a/src/storage/secondary/column/primitive_column_factory.rs +++ b/src/storage/secondary/column/primitive_column_factory.rs @@ -12,7 +12,8 @@ use super::super::{ }; use super::{BlockIteratorFactory, ConcreteColumnIterator}; use crate::array::Array; -use crate::storage::secondary::block::{decode_rle_block, FakeBlockIterator, RleBlockIterator}; +use crate::array::ArrayBuilder; +use crate::storage::secondary::block::{decode_rle_block, decode_dict_block,DictBlockIterator, FakeBlockIterator, RleBlockIterator}; use crate::types::{Date, Interval, F64}; /// All supported block iterators for primitive types. @@ -21,6 +22,8 @@ pub enum PrimitiveBlockIteratorImpl { PlainNullable(PlainPrimitiveNullableBlockIterator), RunLength(RleBlockIterator>), RleNullable(RleBlockIterator>), + Dictionary(DictBlockIterator>), + DictNullable(DictBlockIterator>), Fake(FakeBlockIterator), } @@ -35,6 +38,8 @@ impl BlockIterator for PrimitiveBloc Self::PlainNullable(it) => it.next_batch(expected_size, builder), Self::RunLength(it) => it.next_batch(expected_size, builder), Self::RleNullable(it) => it.next_batch(expected_size, builder), + Self::Dictionary(it) => it.next_batch(expected_size, builder), + Self::DictNullable(it) => it.next_batch(expected_size, builder), Self::Fake(it) => it.next_batch(expected_size, builder), } } @@ -45,6 +50,8 @@ impl BlockIterator for PrimitiveBloc Self::PlainNullable(it) => it.skip(cnt), Self::RunLength(it) => it.skip(cnt), Self::RleNullable(it) => it.skip(cnt), + Self::Dictionary(it) => it.skip(cnt), + Self::DictNullable(it) => it.skip(cnt), Self::Fake(it) => it.skip(cnt), } } @@ -55,6 +62,8 @@ impl BlockIterator for PrimitiveBloc Self::PlainNullable(it) => it.remaining_items(), Self::RunLength(it) => it.remaining_items(), Self::RleNullable(it) => it.remaining_items(), + Self::Dictionary(it) => it.remaining_items(), + Self::DictNullable(it) => it.remaining_items(), Self::Fake(it) => it.remaining_items(), } } @@ -124,6 +133,25 @@ impl BlockIteratorFactory ); PrimitiveBlockIteratorImpl::RleNullable(it) } + BlockType::Dictionary => { + let (rle_num, block_data, rle_data) = decode_dict_block(block); + let mut block_builder = <::ArrayType as Array>::Builder::new(); + let mut block_iter = PlainPrimitiveBlockIterator::::new(block_data, rle_num); + let it = DictBlockIterator::>::new( + &mut block_builder, &mut block_iter, rle_data, rle_num, + ); + PrimitiveBlockIteratorImpl::Dictionary(it) + } + BlockType::DictNullable => { + let (rle_num, block_data, rle_data) = decode_dict_block(block); + let mut block_builder = <::ArrayType as Array>::Builder::new(); + let mut block_iter = PlainPrimitiveNullableBlockIterator::::new(block_data, rle_num); + let it = + DictBlockIterator::>::new( + &mut block_builder, &mut block_iter, rle_data, rle_num, + ); + PrimitiveBlockIteratorImpl::DictNullable(it) + } _ => todo!(), }; it.skip(start_pos - index.first_rowid as usize); diff --git a/src/storage/secondary/compactor.rs b/src/storage/secondary/compactor.rs index e09f987e..64af031c 100644 --- a/src/storage/secondary/compactor.rs +++ b/src/storage/secondary/compactor.rs @@ -223,7 +223,7 @@ impl Compactor { .txn_mgr .try_lock_for_compaction(table.table_id()) { - if let Err(err) = self.compact_table(&pin_version.snapshot, table).await { + if let Err(err) = self.compact_table(&pin_version.snapshot, table).await { warn!("failed to compact: {:?}", err); } }