risinglightdb · skyzh · May 7, 2022 · May 4, 2022 · May 6, 2022 · May 7, 2022
diff --git a/src/executor/table_scan.rs b/src/executor/table_scan.rs
@@ -89,8 +89,8 @@ impl<S: Storage> TableScanExecutor<S> {
         let mut it = match unified_select_with_token(
             &token,
             txn.scan(
-                None,
-                None,
+                &[],
+                &[],
                 &col_idx,
                 self.plan.logical().is_sorted(),
                 false,

diff --git a/src/storage/memory/transaction.rs b/src/storage/memory/transaction.rs
@@ -13,6 +13,7 @@ use crate::array::{ArrayBuilderImpl, ArrayImplBuilderPickExt, ArrayImplSortExt,
 use crate::binder::BoundExpr;
 use crate::catalog::{find_sort_key_id, ColumnCatalog};
 use crate::storage::{StorageColumnRef, StorageResult, Transaction};
+use crate::types::DataValue;
 
 /// A transaction running on `InMemoryStorage`.
 pub struct InMemoryTransaction {
@@ -116,24 +117,25 @@ impl Transaction for InMemoryTransaction {
     // TODO: remove this unused variable
     fn scan<'a>(
         &'a self,
-        begin_sort_key: Option<&'a [u8]>,
-        end_sort_key: Option<&'a [u8]>,
+        begin_sort_key: &'a [DataValue],
+        end_sort_key: &'a [DataValue],
         col_idx: &'a [StorageColumnRef],
         is_sorted: bool,
         reversed: bool,
         expr: Option<BoundExpr>,
     ) -> Self::ScanResultFuture<'a> {
         async move {
             assert!(expr.is_none(), "MemTxn doesn't support filter scan");
+            assert!(!reversed, "reverse iterator is not supported for now");
+
             assert!(
-                begin_sort_key.is_none(),
+                begin_sort_key.is_empty(),
                 "sort_key is not supported in InMemoryEngine for now"
             );
             assert!(
-                end_sort_key.is_none(),
+                end_sort_key.is_empty(),
                 "sort_key is not supported in InMemoryEngine for now"
             );
-            assert!(!reversed, "reverse iterator is not supported for now");
 
             let snapshot = if is_sorted {
                 sort_datachunk_by_pk(&self.snapshot, &self.column_infos)

diff --git a/src/storage/mod.rs b/src/storage/mod.rs
@@ -21,7 +21,7 @@ use enum_dispatch::enum_dispatch;
 use crate::array::{ArrayImpl, DataChunk};
 use crate::binder::BoundExpr;
 use crate::catalog::{ColumnCatalog, TableRefId};
-use crate::types::{ColumnId, DatabaseId, SchemaId};
+use crate::types::{ColumnId, DataValue, DatabaseId, SchemaId};
 
 #[enum_dispatch(StorageDispatch)]
 #[derive(Clone)]
@@ -158,8 +158,8 @@ pub trait Transaction: Sync + Send + 'static {
     /// Scan one or multiple columns.
     fn scan<'a>(
         &'a self,
-        begin_sort_key: Option<&'a [u8]>,
-        end_sort_key: Option<&'a [u8]>,
+        begin_sort_key: &'a [DataValue],
+        end_sort_key: &'a [DataValue],
         col_idx: &'a [StorageColumnRef],
         is_sorted: bool,
         reversed: bool,

diff --git a/src/storage/secondary/compactor.rs b/src/storage/secondary/compactor.rs
@@ -74,7 +74,14 @@ impl Compactor {
 
             iters.push(
                 rowset
-                    .iter(column_refs.clone(), dvs, ColumnSeekPosition::start(), None)
+                    .iter(
+                        column_refs.clone(),
+                        dvs,
+                        ColumnSeekPosition::start(),
+                        None,
+                        &[],
+                        &[],
+                    )
                     .await?,
             );
         }

diff --git a/src/storage/secondary/rowset/disk_rowset.rs b/src/storage/secondary/rowset/disk_rowset.rs
@@ -1,5 +1,6 @@
 // Copyright 2022 RisingLight Project Authors. Licensed under Apache-2.0.
 
+use std::borrow::Borrow;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex};
 
@@ -14,8 +15,10 @@ use super::{path_of_data_column, path_of_index_column, RowSetIterator};
 use crate::binder::BoundExpr;
 use crate::catalog::ColumnCatalog;
 use crate::storage::secondary::column::ColumnReadableFile;
+use crate::storage::secondary::encode::PrimitiveFixedWidthEncode;
 use crate::storage::secondary::DeleteVector;
 use crate::storage::{StorageColumnRef, StorageResult};
+use crate::types::DataValue;
 
 /// Represents a column in Secondary.
 ///
@@ -120,8 +123,19 @@ impl DiskRowset {
         dvs: Vec<Arc<DeleteVector>>,
         seek_pos: ColumnSeekPosition,
         expr: Option<BoundExpr>,
+        begin_keys: &[DataValue],
+        end_keys: &[DataValue],
     ) -> StorageResult<RowSetIterator> {
-        RowSetIterator::new(self.clone(), column_refs, dvs, seek_pos, expr).await
+        RowSetIterator::new(
+            self.clone(),
+            column_refs,
+            dvs,
+            seek_pos,
+            expr,
+            begin_keys,
+            end_keys,
+        )
+        .await
     }
 
     pub fn on_disk_size(&self) -> u64 {
@@ -131,10 +145,52 @@ impl DiskRowset {
             .sum1()
             .unwrap_or(0)
     }
+
+    /// Get the start row id to begin with for later table scanning.
+    /// If `begin_keys` is empty, we return `ColumnSeekPosition::RowId(0)` to indicate scanning
+    /// from the beginning, otherwise we scan the rowsets' first column indexes, find the first
+    /// block who contains data greater than or equal to `begin_key` and return the row id of
+    /// the block's first key. Currently, only the first column of the rowsets can be used to get
+    /// the start row id and this column should be primary key.
+    /// If `begin_key` is greater than all blocks' `first_key`, we return the `first_key` of the
+    /// last block.
+    /// Todo: support multi sort-keys range filter
+    pub async fn start_rowid(&self, begin_keys: &[DataValue]) -> ColumnSeekPosition {
+        if begin_keys.is_empty() {
+            return ColumnSeekPosition::RowId(0);
+        }
+
+        // for now, we only use the first column to get the start row id, which means the length
+        // of `begin_keys` can only be 0 or 1.
+        let begin_key = begin_keys[0].borrow();
+        let column = self.column(0);
+        let column_index = column.index();
+
+        let start_row_id = match *begin_key {
+            DataValue::Int32(begin_val) => {
+                let mut pre_block_first_key = 0;
+                for index in column_index.indexes() {
+                    let mut first_key: &[u8] = &index.first_key;
+                    let first_val: i32 = PrimitiveFixedWidthEncode::decode(&mut first_key);
+
+                    if first_val > begin_val {
+                        break;
+                    }
+                    pre_block_first_key = index.first_rowid;
+                }
+                pre_block_first_key
+            }
+            // Todo: support ohter type
+            _ => panic!("for now support range-filter scan by sort key type of int32"),
+        };
+        ColumnSeekPosition::RowId(start_row_id)
+    }
 }
 
 #[cfg(test)]
 pub mod tests {
+    use std::borrow::Borrow;
+
     use tempfile::TempDir;
 
     use super::*;
@@ -270,11 +326,98 @@ pub mod tests {
         .unwrap()
     }
 
+    pub async fn helper_build_rowset_with_first_key_recorded(tempdir: &TempDir) -> DiskRowset {
+        let columns = vec![
+            ColumnCatalog::new(
+                0,
+                DataTypeKind::Int(None)
+                    .not_null()
+                    .to_column_primary_key("v1".to_string()),
+            ),
+            ColumnCatalog::new(
+                1,
+                DataTypeKind::Int(None)
+                    .not_null()
+                    .to_column("v2".to_string()),
+            ),
+            ColumnCatalog::new(
+                2,
+                DataTypeKind::Int(None)
+                    .not_null()
+                    .to_column("v3".to_string()),
+            ),
+        ];
+
+        let mut builder = RowsetBuilder::new(
+            columns.clone().into(),
+            ColumnBuilderOptions::record_first_key_test(),
+        );
+        let mut key = 0;
+        for _ in 0..10 {
+            let mut array0 = vec![];
+            let mut array1 = vec![];
+            let mut array2 = vec![];
+            for _ in 0..28 {
+                array0.push(key);
+                array1.push(key + 1);
+                array2.push(key + 2);
+                key += 1;
+            }
+            builder.append(
+                [
+                    ArrayImpl::new_int32(array0.clone().into_iter().collect()),
+                    ArrayImpl::new_int32(array1.clone().into_iter().collect()),
+                    ArrayImpl::new_int32(array2.clone().into_iter().collect()),
+                ]
+                .into_iter()
+                .collect(),
+            );
+        }
+
+        let backend = IOBackend::in_memory();
+
+        let writer = RowsetWriter::new(tempdir.path(), backend.clone());
+        writer.flush(builder.finish()).await.unwrap();
+
+        DiskRowset::open(
+            tempdir.path().to_path_buf(),
+            columns.into(),
+            Cache::new(2333),
+            0,
+            backend,
+        )
+        .await
+        .unwrap()
+    }
+
     #[tokio::test]
     async fn test_get_block() {
         let tempdir = tempfile::tempdir().unwrap();
         let rowset = helper_build_rowset(&tempdir, true, 1000).await;
         let column = rowset.column(0);
         column.get_block(0).await.unwrap();
     }
+
+    #[tokio::test]
+    async fn test_get_start_id() {
+        let tempdir = tempfile::tempdir().unwrap();
+        let rowset = helper_build_rowset_with_first_key_recorded(&tempdir).await;
+        let start_keys = vec![DataValue::Int32(222)];
+
+        {
+            let start_rid = match rowset.start_rowid(start_keys.borrow()).await {
+                ColumnSeekPosition::RowId(x) => x,
+                _ => panic!("Unable to reach the branch"),
+            };
+            assert_eq!(start_rid, 196_u32);
+        }
+        {
+            let start_keys = vec![DataValue::Int32(10000)];
+            let start_rid = match rowset.start_rowid(start_keys.borrow()).await {
+                ColumnSeekPosition::RowId(x) => x,
+                _ => panic!("Unable to reach the branch"),
+            };
+            assert_eq!(start_rid, 252_u32);
+        }
+    }
 }