Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2068,7 +2068,13 @@ def create_index(
kwargs["metric_type"] = metric

index_type = index_type.upper()
valid_index_types = ["IVF_FLAT", "IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ"]
valid_index_types = [
"IVF_FLAT",
"IVF_PQ",
"IVF_HNSW_FLAT",
"IVF_HNSW_PQ",
"IVF_HNSW_SQ",
]
if index_type not in valid_index_types:
raise NotImplementedError(
f"Only {valid_index_types} index types supported. Got {index_type}"
Expand Down
12 changes: 12 additions & 0 deletions python/python/tests/test_vector_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,18 @@ def test_create_ivf_hnsw_sq_index(dataset, tmp_path):
assert ann_ds.list_indices()[0]["fields"] == ["vector"]


def test_create_ivf_hnsw_flat_index(dataset, tmp_path):
assert not dataset.has_index
ann_ds = lance.write_dataset(dataset.to_table(), tmp_path / "indexed.lance")
ann_ds = ann_ds.create_index(
"vector",
index_type="IVF_HNSW_FLAT",
num_partitions=4,
num_sub_vectors=16,
)
assert ann_ds.list_indices()[0]["fields"] == ["vector"]


def test_multivec_ann(indexed_multivec_dataset: lance.LanceDataset):
query = np.random.rand(5, 128)
results = indexed_multivec_dataset.scanner(
Expand Down
10 changes: 9 additions & 1 deletion python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,9 @@ impl Dataset {
"NGRAM" => IndexType::NGram,
"LABEL_LIST" => IndexType::LabelList,
"INVERTED" | "FTS" => IndexType::Inverted,
"IVF_FLAT" | "IVF_PQ" | "IVF_HNSW_PQ" | "IVF_HNSW_SQ" => IndexType::Vector,
"IVF_FLAT" | "IVF_PQ" | "IVF_HNSW_FLAT" | "IVF_HNSW_PQ" | "IVF_HNSW_SQ" => {
IndexType::Vector
}
_ => {
return Err(PyValueError::new_err(format!(
"Index type '{index_type}' is not supported."
Expand Down Expand Up @@ -2153,6 +2155,12 @@ fn prepare_vector_index_params(
m_type, ivf_params, pq_params,
))),

"IVF_HNSW_FLAT" => Ok(Box::new(VectorIndexParams::ivf_hnsw(
m_type,
ivf_params,
hnsw_params,
))),

"IVF_HNSW_PQ" => Ok(Box::new(VectorIndexParams::with_ivf_hnsw_pq_params(
m_type,
ivf_params,
Expand Down
7 changes: 6 additions & 1 deletion rust/lance-index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ pub enum IndexType {
IvfPq = 103,
IvfHnswSq = 104,
IvfHnswPq = 105,
IvfHnswFlat = 106,
}

impl std::fmt::Display for IndexType {
Expand All @@ -115,6 +116,7 @@ impl std::fmt::Display for IndexType {
Self::IvfSq => write!(f, "IVF_SQ"),
Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
}
}
}
Expand All @@ -136,6 +138,7 @@ impl TryFrom<i32> for IndexType {
v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
_ => Err(Error::InvalidInput {
source: format!("the input value {} is not a valid IndexType", value).into(),
location: location!(),
Expand Down Expand Up @@ -164,6 +167,7 @@ impl IndexType {
| Self::IvfPq
| Self::IvfHnswSq
| Self::IvfHnswPq
| Self::IvfHnswFlat
| Self::IvfFlat
| Self::IvfSq
)
Expand Down Expand Up @@ -191,7 +195,8 @@ impl IndexType {
| Self::IvfSq
| Self::IvfPq
| Self::IvfHnswSq
| Self::IvfHnswPq => 1,
| Self::IvfHnswPq
| Self::IvfHnswFlat => 1,
}
}
}
Expand Down
12 changes: 12 additions & 0 deletions rust/lance/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,18 @@ impl DatasetIndexInternalExt for Dataset {
Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
}

"IVF_HNSW_FLAT" => {
let ivf = IVFIndex::<HNSW, FlatQuantizer>::try_new(
self.object_store.clone(),
self.indices_dir(),
uuid.to_owned(),
Arc::downgrade(&self.session),
fri,
)
.await?;
Ok(Arc::new(ivf) as Arc<dyn VectorIndex>)
}

"IVF_HNSW_SQ" => {
let ivf = IVFIndex::<HNSW, ScalarQuantizer>::try_new(
self.object_store.clone(),
Expand Down
28 changes: 28 additions & 0 deletions rust/lance/src/index/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,19 @@ impl VectorIndexParams {
}
}

pub fn ivf_hnsw(
distance_type: DistanceType,
ivf: IvfBuildParams,
hnsw: HnswBuildParams,
) -> Self {
let stages = vec![StageParams::Ivf(ivf), StageParams::Hnsw(hnsw)];
Self {
stages,
metric_type: distance_type,
version: IndexFileVersion::V3,
}
}

/// Create index parameters with `IVF`, `PQ` and `HNSW` parameters, respectively.
/// This is used for `IVF_HNSW_PQ` index.
pub fn with_ivf_hnsw_pq_params(
Expand Down Expand Up @@ -392,6 +405,21 @@ pub(crate) async fn build_vector_index(
});
}
}
} else {
// without quantization
IvfIndexBuilder::<HNSW, FlatQuantizer>::new(
dataset.clone(),
column.to_owned(),
dataset.indices_dir().child(uuid),
params.metric_type,
Box::new(shuffler),
Some(ivf_params.clone()),
Some(()),
hnsw_params.clone(),
fri,
)?
.build()
.await?;
}
} else {
return Err(Error::Index {
Expand Down
23 changes: 23 additions & 0 deletions rust/lance/src/index/vector/ivf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,29 @@ pub(crate) async fn optimize_vector_indices_v2(
.build()
.await?;
}
// IVF_HNSW_FLAT
(SubIndexType::Hnsw, QuantizationType::Flat) => {
IvfIndexBuilder::<HNSW, FlatQuantizer>::new(
dataset.clone(),
vector_column.to_owned(),
index_dir,
distance_type,
shuffler,
None,
None,
// TODO: get the HNSW parameters from the existing indices
HnswBuildParams::default(),
fri,
)?
.with_ivf(ivf_model.clone())
.with_quantizer(quantizer.try_into()?)
.with_existing_indices(indices_to_merge)
.retrain(options.retrain)
.shuffle_data(unindexed)
.await?
.build()
.await?;
}
// IVF_HNSW_SQ
(SubIndexType::Hnsw, QuantizationType::Scalar) => {
IvfIndexBuilder::<HNSW, ScalarQuantizer>::new(
Expand Down
22 changes: 21 additions & 1 deletion rust/lance/src/index/vector/ivf/v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> Index for IVFIndex<S,
(SubIndexType::Flat, QuantizationType::Scalar) => IndexType::IvfSq,
(SubIndexType::Hnsw, QuantizationType::Product) => IndexType::IvfHnswPq,
(SubIndexType::Hnsw, QuantizationType::Scalar) => IndexType::IvfHnswSq,
_ => IndexType::Vector,
(SubIndexType::Hnsw, QuantizationType::Flat) => IndexType::IvfHnswFlat,
}
}

Expand Down Expand Up @@ -1244,6 +1244,26 @@ mod tests {
test_optimize_strategy(params).await;
}

#[rstest]
#[case(4, DistanceType::L2, 0.9)]
#[case(4, DistanceType::Cosine, 0.9)]
#[case(4, DistanceType::Dot, 0.85)]
#[tokio::test]
async fn test_create_ivf_hnsw_flat(
#[case] nlist: usize,
#[case] distance_type: DistanceType,
#[case] recall_requirement: f32,
) {
let ivf_params = IvfBuildParams::new(nlist);
let hnsw_params = HnswBuildParams::default();
let params = VectorIndexParams::ivf_hnsw(distance_type, ivf_params, hnsw_params);
test_index(params.clone(), nlist, recall_requirement, None).await;
if distance_type == DistanceType::Cosine {
test_index_multivec(params.clone(), nlist, recall_requirement).await;
}
test_optimize_strategy(params).await;
}

#[rstest]
#[case(4, DistanceType::L2, 0.9)]
#[case(4, DistanceType::Cosine, 0.9)]
Expand Down
Loading