Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions diskann-benchmark/src/backend/disk_index/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use diskann::{
use diskann_benchmark_runner::utils::MicroSeconds;
use diskann_disk::{
build::builder::build::DiskIndexBuilder,
data_model::AdHoc,
disk_index_build_parameter::{
DiskIndexBuildParameters, MemoryBudget, NumPQChunks, DISK_SECTOR_LEN,
},
Expand All @@ -27,8 +28,7 @@ use opentelemetry_sdk::trace::SdkTracerProvider;
use scopeguard::defer;

use crate::{
backend::disk_index::{graph_data_type::GraphData, json_spancollector::JsonSpanCollector},
inputs::disk::DiskIndexBuild,
backend::disk_index::json_spancollector::JsonSpanCollector, inputs::disk::DiskIndexBuild,
};

#[derive(Serialize, Debug)]
Expand Down Expand Up @@ -114,7 +114,7 @@ where
DISK_SECTOR_LEN,
)?;

let mut disk_index = DiskIndexBuilder::<GraphData<T>, StorageProviderType>::new(
let mut disk_index = DiskIndexBuilder::<AdHoc<T>, StorageProviderType>::new(
storage_provider,
build_parameters,
index_configuration,
Expand Down
20 changes: 0 additions & 20 deletions diskann-benchmark/src/backend/disk_index/graph_data_type.rs

This file was deleted.

1 change: 0 additions & 1 deletion diskann-benchmark/src/backend/disk_index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ cfg_if::cfg_if! {
if #[cfg(feature = "disk-index")] {
mod benchmarks;
mod build;
mod graph_data_type;
mod search;
mod json_spancollector;

Expand Down
6 changes: 3 additions & 3 deletions diskann-benchmark/src/backend/disk_index/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use opentelemetry_sdk::trace::SdkTracerProvider;
use diskann::utils::VectorRepr;
use diskann_benchmark_runner::{files::InputFile, utils::MicroSeconds};
use diskann_disk::{
data_model::CachingStrategy,
data_model::{AdHoc, CachingStrategy},
search::provider::{
disk_provider::DiskIndexSearcher, disk_vertex_provider_factory::DiskVertexProviderFactory,
},
Expand All @@ -31,7 +31,7 @@ use diskann_utils::views::Matrix;
use serde::Serialize;

use crate::{
backend::disk_index::{graph_data_type::GraphData, json_spancollector::JsonSpanCollector},
backend::disk_index::json_spancollector::JsonSpanCollector,
inputs::disk::{DiskIndexLoad, DiskSearchPhase},
utils::{datafiles, SimilarityMeasure},
};
Expand Down Expand Up @@ -217,7 +217,7 @@ where
let reader_factory = AlignedFileReaderFactory::new(disk_index_path);
let vertex_provider_factory = DiskVertexProviderFactory::new(reader_factory, caching_strategy)?;

let searcher = &DiskIndexSearcher::<GraphData<T>, _>::new(
let searcher = &DiskIndexSearcher::<AdHoc<T>, _>::new(
search_params.num_threads,
if let Some(lim) = search_params.search_io_limit {
lim
Expand Down
11 changes: 4 additions & 7 deletions diskann-disk/src/build/builder/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,16 @@ use std::{
sync::{Arc, Mutex},
};

use crate::data_model::GraphDataType;
use diskann::{
utils::{async_tools, vecid_from_usize, TryIntoVectorId, VectorRepr, ONE},
ANNError, ANNErrorKind, ANNResult,
};
use diskann_providers::storage::{StorageReadProvider, StorageWriteProvider};
use diskann_providers::{
model::{
graph::{
provider::async_::inmem::DefaultProviderParameters,
traits::{AdHoc, GraphDataType},
},
IndexConfiguration, MAX_PQ_TRAINING_SET_SIZE, NUM_KMEANS_REPS_PQ, NUM_PQ_CENTROIDS,
graph::provider::async_::inmem::DefaultProviderParameters, IndexConfiguration,
MAX_PQ_TRAINING_SET_SIZE, NUM_KMEANS_REPS_PQ, NUM_PQ_CENTROIDS,
},
storage::{AsyncIndexMetadata, DiskGraphOnly, PQStorage},
utils::{
Expand Down Expand Up @@ -510,8 +508,7 @@ where

// Associated data will only be used in the write_disk_layout function which only requires the none-partitioned associated data stream.
let dataset_iter = Arc::new(Mutex::new({
let iter =
VectorDataIterator::<_, AdHoc<T>>::new(data_path, Option::None, storage_provider)?;
let iter = VectorDataIterator::<_, T>::new(data_path, Option::None, storage_provider)?;
iter.enumerate().skip(offset)
}));

Expand Down
10 changes: 3 additions & 7 deletions diskann-disk/src/build/builder/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
*/
use std::mem::{self, size_of};

use crate::data_model::GraphDataType;
use diskann::ANNResult;
use diskann_providers::storage::{StorageReadProvider, StorageWriteProvider};
use diskann_providers::{
model::{
graph::traits::GraphDataType, IndexConfiguration, GRAPH_SLACK_FACTOR,
MAX_PQ_TRAINING_SET_SIZE,
},
model::{IndexConfiguration, GRAPH_SLACK_FACTOR, MAX_PQ_TRAINING_SET_SIZE},
storage::PQStorage,
utils::{
load_metadata_from_file, RayonThreadPool, SampleVectorReader, SamplingDensity,
Expand Down Expand Up @@ -628,6 +626,7 @@ impl<'a> MergedVamanaIndexWorkflow<'a> {
pub(crate) mod disk_index_builder_tests {
use std::{io::Read, sync::Arc};

use crate::test_utils::{GraphDataF32VectorU32Data, GraphDataF32VectorUnitData};
use diskann::{
graph::config,
utils::{IntoUsize, VectorRepr, ONE},
Expand All @@ -637,9 +636,6 @@ pub(crate) mod disk_index_builder_tests {
use diskann_providers::{
common::AlignedBoxWithSlice,
storage::{get_compressed_pq_file, get_disk_index_file, get_pq_pivot_file},
test_utils::graph_data_type_utils::{
GraphDataF32VectorU32Data, GraphDataF32VectorUnitData,
},
utils::Timer,
};
use diskann_utils::test_data_root;
Expand Down
6 changes: 2 additions & 4 deletions diskann-disk/src/build/builder/quantizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,13 @@
* Licensed under the MIT license.
*/
//! Disk index quantizer implementation.
use crate::data_model::GraphDataType;
use diskann::{ANNError, ANNResult};
use diskann_providers::storage::{StorageReadProvider, StorageWriteProvider};
use diskann_providers::{
index::diskann_async::train_pq,
model::{
graph::{
provider::async_::{common::NoStore, inmem::WithBits},
traits::GraphDataType,
},
graph::provider::async_::{common::NoStore, inmem::WithBits},
FixedChunkPQTable, IndexConfiguration, MAX_PQ_TRAINING_SET_SIZE,
},
storage::{PQStorage, SQStorage},
Expand Down
4 changes: 1 addition & 3 deletions diskann-disk/src/build/builder/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@ mod chunkable_disk_index_build_tests {
time::Duration,
};

use diskann_providers::test_utils::{
graph_data_type_utils::GraphDataF32VectorUnitData, GraphDataMinMaxVectorUnitData,
};
use crate::test_utils::{GraphDataF32VectorUnitData, GraphDataMinMaxVectorUnitData};
use diskann_utils::test_data_root;
use rstest::rstest;

Expand Down
4 changes: 2 additions & 2 deletions diskann-disk/src/build/configuration/filter_parameter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Licensed under the MIT license.
*/

use diskann_providers::model::graph::traits::GraphDataType;
use crate::data_model::GraphDataType;

pub type AssociatedDataFilter<Data> =
Box<dyn Fn(&<Data as GraphDataType>::AssociatedDataType) -> bool>;
Expand All @@ -22,7 +22,7 @@ pub fn default_vector_filter<Data: GraphDataType>() -> VectorFilter<'static, Dat
#[cfg(test)]
mod tests {
use super::*;
use diskann_providers::test_utils::graph_data_type_utils::GraphDataF32VectorUnitData;
use crate::test_utils::GraphDataF32VectorUnitData;

type TestGraphData = GraphDataF32VectorUnitData;

Expand Down
5 changes: 3 additions & 2 deletions diskann-disk/src/data_model/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
* Licensed under the MIT license.
*/

use crate::data_model::GraphDataType;
use diskann::{graph::AdjacencyList, ANNError, ANNResult};
use diskann_providers::{common::AlignedBoxWithSlice, model::graph::traits::GraphDataType};
use diskann_providers::common::AlignedBoxWithSlice;
use hashbrown::{hash_map::Entry::Occupied, HashMap};

use super::FP_VECTOR_MEM_ALIGN;
Expand Down Expand Up @@ -147,8 +148,8 @@ pub enum CachingStrategy {

#[cfg(test)]
mod tests {
use crate::test_utils::GraphDataF32VectorUnitData;
use diskann::graph::AdjacencyList;
use diskann_providers::test_utils::graph_data_type_utils::GraphDataF32VectorUnitData;
use rstest::rstest;

use crate::data_model::Cache;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,6 @@ use serde::{Deserialize, Serialize};
/// namely to allow instances of this type to be saved and loaded.
///
/// * `Send` & `Sync`: These traits are again necessary for multithreaded access.
///
/// * `AdjacencyListType`: This type represents the adjacency list used for the graph.
///
/// * `AdjacencyListTrait`: The behavior required for the adjacency list type.
///
/// * `Send` & `Sync`: These traits have the same purposes as explained above.
///
/// * `TryFrom<&'a [u8]>`: This trait allows the adjacency list to be constructed from a slice of bytes. (Not using Serde here because we want to precisely control the format of the bytes)
///
/// * `TryInto<Vec<u8>>`: This trait allows the adjacency list to be converted into a vector of bytes. (Not using Serde here because we want to precisely control the format of the bytes)
///
/// * `for<'a> AdjacencyListIterator<'a>' expresses that the a reference to the AdjacencyList should be able to convert into a Iterator of &'a u32.
pub trait GraphDataType: Send + Sync + 'static {
type VectorDataType: VectorRepr;

Expand All @@ -51,7 +39,7 @@ pub trait GraphDataType: Send + Sync + 'static {
type VectorIdType: VectorId;
}

/// An adhoc `GraphDataType` for implementations that only need an the `VectorDataType`
/// An adhoc `GraphDataType` for implementations that only need the `VectorDataType`
/// and `VectorIdType`.
///
/// This type defaults to using `u32` for the ID type for extra convenience.
Expand Down
3 changes: 3 additions & 0 deletions diskann-disk/src/data_model/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ pub use graph_header::GraphHeader;
mod cache;
pub use cache::{Cache, CachingStrategy};

pub mod graph_data_types;
pub use graph_data_types::{AdHoc, GraphDataType};

pub const FP_VECTOR_MEM_ALIGN: usize = 32;
3 changes: 3 additions & 0 deletions diskann-disk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
//! This crate provides disk-based indexing capabilities for DiskANN,
//! including builders, providers, and utilities specific to disk storage.

#[cfg(test)]
pub(crate) mod test_utils;

pub mod build;
pub use build::{
disk_index_build_parameter, filter_parameter, DiskIndexBuildParameters, QuantizationType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

use std::sync::Arc;

use crate::data_model::GraphDataType;
use diskann::{ANNError, ANNResult};
use diskann_providers::model::graph::traits::GraphDataType;

use crate::utils::aligned_file_reader::traits::AlignedFileReader;
use hashbrown::HashMap;
Expand Down
9 changes: 3 additions & 6 deletions diskann-disk/src/search/provider/disk_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use std::{
time::Instant,
};

use crate::data_model::GraphDataType;
use diskann::{
graph::{
self,
Expand All @@ -38,9 +39,7 @@ use diskann::{
};
use diskann_providers::storage::StorageReadProvider;
use diskann_providers::{
model::{
compute_pq_distance, compute_pq_distance_for_pq_coordinates, graph::traits::GraphDataType,
},
model::{compute_pq_distance, compute_pq_distance_for_pq_coordinates},
storage::{get_compressed_pq_file, get_disk_index_file, get_pq_pivot_file, LoadWith},
};

Expand Down Expand Up @@ -1053,6 +1052,7 @@ fn ensure_vertex_loaded<Data: GraphDataType, V: VertexProvider<Data>>(

#[cfg(test)]
mod disk_provider_tests {
use crate::test_utils::{GraphDataF32VectorU32Data, GraphDataF32VectorUnitData};
use diskann::{
graph::{
search::{record::VisitedSearchRecord, Knn},
Expand All @@ -1066,9 +1066,6 @@ mod disk_provider_tests {
};
use diskann_providers::{
common::AlignedBoxWithSlice,
test_utils::graph_data_type_utils::{
GraphDataF32VectorU32Data, GraphDataF32VectorUnitData,
},
utils::{create_thread_pool, load_aligned_bin, PQPathNames, ParallelIteratorInPool},
};
use diskann_utils::{io::read_bin, test_data_root};
Expand Down
9 changes: 4 additions & 5 deletions diskann-disk/src/search/provider/disk_vertex_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

use std::ptr;

use crate::data_model::GraphDataType;
use byteorder::{ByteOrder, LittleEndian};
use diskann::{ANNError, ANNResult};
use diskann_providers::{common::AlignedBoxWithSlice, model::graph::traits::GraphDataType};
use diskann_providers::common::AlignedBoxWithSlice;
use hashbrown::HashMap;

use crate::{
Expand Down Expand Up @@ -276,15 +277,13 @@ where
mod disk_vertex_provider_tests {
use std::sync::Arc;

use crate::{data_model::GraphDataType, test_utils::GraphDataF32VectorU32Data};
use diskann::{graph::config, utils::ONE};
use diskann_providers::storage::{
StorageReadProvider, StorageWriteProvider, VirtualStorageProvider,
};
use diskann_providers::{
model::{graph::traits::GraphDataType, IndexConfiguration},
storage::get_disk_index_file,
test_utils::graph_data_type_utils::GraphDataF32VectorU32Data,
utils::load_metadata_from_file,
model::IndexConfiguration, storage::get_disk_index_file, utils::load_metadata_from_file,
};
use diskann_utils::test_data_root;
use vfs::OverlayFS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
*/
use std::{cmp::min, collections::VecDeque, sync::Arc, time::Instant};

use crate::data_model::GraphDataType;
use diskann::{graph::AdjacencyList, utils::TryIntoVectorId, ANNError, ANNResult};
use diskann_providers::{common::AlignedBoxWithSlice, model::graph::traits::GraphDataType};
use diskann_providers::common::AlignedBoxWithSlice;
use hashbrown::HashSet;
use tracing::info;

Expand Down Expand Up @@ -231,11 +232,9 @@ impl<Data: GraphDataType<VectorIdType = u32>, ReaderFactory: AlignedReaderFactor
#[cfg(test)]
pub(crate) mod tests {
use super::*;
use crate::test_utils::GraphDataF32VectorUnitData;
use crate::utils::VirtualAlignedReaderFactory;
use diskann_providers::{
storage::VirtualStorageProvider,
test_utils::graph_data_type_utils::GraphDataF32VectorUnitData,
};
use diskann_providers::storage::VirtualStorageProvider;
use diskann_utils::test_data_root;
use vfs::OverlayFS;

Expand Down
2 changes: 1 addition & 1 deletion diskann-disk/src/search/traits/vertex_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
* Licensed under the MIT license.
*/

use crate::data_model::GraphDataType;
use diskann::ANNResult;
use diskann_providers::model::graph::traits::GraphDataType;

/// `VertexProvider` is a trait that abstracts the access to Vertex data.
///
Expand Down
2 changes: 1 addition & 1 deletion diskann-disk/src/search/traits/vertex_provider_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
* Licensed under the MIT license.
*/

use crate::data_model::GraphDataType;
use diskann::ANNResult;
use diskann_providers::model::graph::traits::GraphDataType;

use super::VertexProvider;
use crate::data_model::GraphHeader;
Expand Down
Loading
Loading