Skip to content

Commit

Permalink
feat: define Flat index as a scan over VectorStorage
Browse files Browse the repository at this point in the history
ghstack-source-id: 5d8adb1c67e07d251cc9c617dd144b7f9660242d
Pull Request resolved: #2380
  • Loading branch information
chebbyChefNEQ committed May 22, 2024
1 parent 965b330 commit 43d4632
Showing 1 changed file with 80 additions and 3 deletions.
83 changes: 80 additions & 3 deletions rust/lance-index/src/vector/v3/subindex.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

use arrow_array::{RecordBatch, StructArray};
use arrow_schema::SchemaRef;
use lance_core::Result;
use std::sync::Arc;

use arrow_array::{Float32Array, RecordBatch, StructArray, UInt64Array};
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use itertools::Itertools;
use lance_core::{Result, ROW_ID_FIELD};
use num_traits::Num;
use roaring::RoaringBitmap;

use crate::vector::v3::storage::DistCalculator;
use crate::vector::{
graph::{OrderedFloat, OrderedNode},
DIST_COL,
};

use super::storage::VectorStore;

/// A sub index for IVF index
Expand Down Expand Up @@ -60,3 +69,71 @@ pub trait IvfSubIndex<T: Num, Store: VectorStore>: Send + Sync + Sized {
self.to_array()
}
}

struct FlatIndex {}

lazy_static::lazy_static! {
static ref ANN_SEARCH_SCHEMA: SchemaRef = Arc::new(Schema::new(vec![
Field::new(DIST_COL, DataType::Float32, true),
ROW_ID_FIELD.clone(),
]));
}

impl<T: Num, Store: VectorStore> IvfSubIndex<T, Store> for FlatIndex {
type QueryParams = ();

fn index_name(&self) -> &str {
"FLAT"
}

fn search(
&self,
query: &[T],
k: usize,
_params: Self::QueryParams,
storage: &Store,
pre_filter_bitmap: Option<RoaringBitmap>,
) -> Result<RecordBatch> {
let dist_calc = storage.dist_calculator_from_native(query);
let (row_ids, dists): (Vec<u64>, Vec<f32>) = (0..storage.len())
.filter(|id| {
let should_drop = pre_filter_bitmap
.as_ref()
.map(|bitmap| bitmap.contains(*id as u32));
let should_drop = should_drop.unwrap_or(false);
!should_drop
})
.map(|id| OrderedNode {
id: id as u32,
dist: OrderedFloat(dist_calc.distance(id as u32)),
})
.sorted_unstable()
.take(k)
.map(
|OrderedNode {
id,
dist: OrderedFloat(dist),
}| (storage.row_ids()[id as usize], dist),
)
.unzip();

let (row_ids, dists) = (UInt64Array::from(row_ids), Float32Array::from(dists));

Ok(RecordBatch::try_new(
ANN_SEARCH_SCHEMA.clone(),
vec![Arc::new(dists), Arc::new(row_ids)],
)?)
}

fn load(_: StructArray) -> Result<Self> {
Ok(Self {})
}

fn index(&self, _: &Store) -> Result<()> {
Ok(())
}

fn to_array(&self) -> Result<StructArray> {
Ok(StructArray::from(vec![]))
}
}

0 comments on commit 43d4632

Please sign in to comment.