diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs index 418fc6db9c..399f05b88c 100644 --- a/rust/lance-index/src/vector.rs +++ b/rust/lance-index/src/vector.rs @@ -19,6 +19,7 @@ pub mod residual; pub mod sq; pub mod transform; pub mod utils; +pub mod v3; // TODO: Make these crate private once the migration from lance to lance-index is done. pub const PQ_CODE_COLUMN: &str = "__pq_code"; diff --git a/rust/lance-index/src/vector/v3.rs b/rust/lance-index/src/vector/v3.rs new file mode 100644 index 0000000000..ff6d05ed1a --- /dev/null +++ b/rust/lance-index/src/vector/v3.rs @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +pub mod shuffler; diff --git a/rust/lance-index/src/vector/v3/shuffler.rs b/rust/lance-index/src/vector/v3/shuffler.rs new file mode 100644 index 0000000000..3a119999b5 --- /dev/null +++ b/rust/lance-index/src/vector/v3/shuffler.rs @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Shuffler is a component that takes a stream of record batches and shuffles them into +//! the corresponding IVF partitions. + +use lance_core::Result; +use lance_io::stream::RecordBatchStream; + +#[async_trait::async_trait] +/// A reader that can read the shuffled partitions. +pub trait IvfShuffleReader: Send + Sync { + /// Read a partition by partition_id + /// will return Ok(None) if partition_size is 0 + /// check reader.partiton_size(partition_id) before calling this function + async fn read_partition( + &self, + partition_id: usize, + ) -> Result>>; + + /// Get the size of the partition by partition_id + fn partiton_size(&self, partition_id: usize) -> Result; +} + +#[async_trait::async_trait] +/// A shuffler that can shuffle the incoming stream of record batches into IVF partitions. +/// Returns a IvfShuffleReader that can be used to read the shuffled partitions. +pub trait IvfShuffler { + /// Shuffle the incoming stream of record batches into IVF partitions. + /// Returns a IvfShuffleReader that can be used to read the shuffled partitions. + async fn shuffle( + mut self, + data: Box, + ) -> Result>; +}