Skip to content

Commit

Permalink
Index cache settable in bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
rok committed Nov 19, 2023
1 parent 336140f commit af7b448
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 18 deletions.
4 changes: 2 additions & 2 deletions python/python/tests/test_vector_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def query_index(ds, ntimes):
index_type="IVF_PQ",
num_partitions=128,
num_sub_vectors=2,
index_cache_size=10,
index_cache_size=584,
)

assert (
Expand All @@ -508,7 +508,7 @@ def query_index(ds, ntimes):
indexed_dataset.stats.index_stats("vector_idx")["index_cache_entry_count"] == 10
)

indexed_dataset = lance.LanceDataset(indexed_dataset.uri, index_cache_size=5)
indexed_dataset = lance.LanceDataset(indexed_dataset.uri, index_cache_size=288)
query_index(indexed_dataset, 128)
assert (
indexed_dataset.stats.index_stats("vector_idx")["index_cache_entry_count"] == 5
Expand Down
2 changes: 1 addition & 1 deletion rust/lance/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ pub use write::{write_fragments, WriteMode, WriteParams};
const INDICES_DIR: &str = "_indices";

const DATA_DIR: &str = "data";
pub(crate) const DEFAULT_INDEX_CACHE_SIZE: usize = 256;
pub(crate) const DEFAULT_INDEX_CACHE_SIZE: usize = 256 * 20;
pub(crate) const DEFAULT_METADATA_CACHE_SIZE: usize = 256;

/// Lance Dataset
Expand Down
26 changes: 13 additions & 13 deletions rust/lance/src/index/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::mem;
use std::sync::Arc;

use lance_index::scalar::ScalarIndex;
use moka::sync::{Cache, CacheBuilder, ConcurrentCacheExt};
use moka::sync::{Cache, ConcurrentCacheExt};

use super::vector::VectorIndex;

Expand Down Expand Up @@ -46,20 +47,18 @@ pub struct IndexCache {

impl IndexCache {
pub(crate) fn new(capacity: usize) -> Self {
// Element size is the size of the u32 key (4 bytes) + the size of the uuid (16 bytes)
let element_size : u32 = 20;


let vector_cache = CacheBuilder::new(capacity as u64)
.weigher(move |&_,&_| -> u32 { element_size })
let vector_cache = Cache::builder()
.weigher(|k: &String, v: &Arc<dyn VectorIndex>| -> u32 { (k.len() + mem::size_of_val(v)) as u32 })
.max_capacity(capacity as u64)
.build();

let scalar_cache = Arc::new(Cache::new(capacity as u64));
// let vector_cahce = Arc::new(Cache::new(capacity as u64));
let scalar_cache = Cache::builder()
.weigher(|k: &String, v: &Arc<dyn ScalarIndex>| -> u32 { (k.len() + mem::size_of_val(v)) as u32 })
.max_capacity(capacity as u64)
.build();

Self {
scalar_cache: scalar_cache,
scalar_cache: Arc::new(scalar_cache),
vector_cache: Arc::new(vector_cache),
cache_stats: Arc::new(CacheStats::default()),
}
Expand All @@ -74,13 +73,14 @@ impl IndexCache {
pub(crate) fn get_size(&self) -> usize {
self.scalar_cache.sync();
self.vector_cache.sync();
self.scalar_cache.entry_count() as usize + self.vector_cache.entry_count() as usize
(self.scalar_cache.entry_count() + self.vector_cache.entry_count()) as usize
}

pub(crate) fn get_byte_size(&self) {
#[allow(dead_code)]
pub(crate) fn get_byte_size(&self) -> u64 {
self.scalar_cache.sync();
self.vector_cache.sync();
self.vector_cache.weighted_size();
self.scalar_cache.weighted_size() + self.vector_cache.weighted_size()
}

/// Get an Index if present. Otherwise returns [None].
Expand Down
6 changes: 4 additions & 2 deletions rust/lance/src/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ mod tests {

#[test]
fn test_basic() {
let session = Session::new(10, 1);
let session = Session::new(172, 1);
let session = Arc::new(session);

let pq = Arc::new(ProductQuantizerImpl::<Float32Type>::new(
Expand All @@ -114,6 +114,7 @@ mod tests {
assert!(session.index_cache.get_vector("abc").is_some());
assert_eq!(session.index_cache.len_vector(), 1);
assert_eq!(session.index_cache.get_size(), 1);
assert_eq!(session.index_cache.get_byte_size(), 19);

for iter_idx in 0..100 {
let pq_other = Arc::new(ProductQuantizerImpl::<Float32Type>::new(
Expand All @@ -129,7 +130,8 @@ mod tests {
.insert_vector(format!("{iter_idx}").as_str(), idx_other.clone());
}

// Capacity is 10 so there should be at most 10 items
// Set capacity allows for 10 entries so there should be at most 10 items
assert_eq!(session.index_cache.len_vector(), 10);
assert_eq!(session.index_cache.get_byte_size(), 172);
}
}

0 comments on commit af7b448

Please sign in to comment.