diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 34232348c20..46fd16cf3aa 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -28,8 +28,8 @@ from pyarrow import RecordBatch, Schema from pyarrow._compute import Expression -from .lance import __version__, _Dataset, _Scanner, _write_dataset from .fragment import LanceFragment +from .lance import __version__, _Dataset, _Scanner, _write_dataset class LanceDataset(pa.dataset.Dataset): @@ -193,7 +193,9 @@ def replace_schema(self, schema: Schema): """ raise NotImplementedError("not changing schemas yet") - def get_fragments(self, filter: Optional[Expression] = None) -> Iterator[pa.dataset.Fragment]: + def get_fragments( + self, filter: Optional[Expression] = None + ) -> Iterator[pa.dataset.Fragment]: """Get all fragments from the dataset. Note: filter is not supported yet. diff --git a/python/python/lance/fragment.py b/python/python/lance/fragment.py index 9fdeb703b8e..36bc31db362 100644 --- a/python/python/lance/fragment.py +++ b/python/python/lance/fragment.py @@ -16,7 +16,8 @@ """Dataset Fragment""" from __future__ import annotations -from typing import Union, Optional, Iterator + +from typing import Iterator, Optional, Union import pyarrow as pa diff --git a/python/python/lance/vector.py b/python/python/lance/vector.py index 7ffe3efd0b8..b3f700f7573 100644 --- a/python/python/lance/vector.py +++ b/python/python/lance/vector.py @@ -93,13 +93,6 @@ def vec_to_table( values = list(data.values()) if check_ndim: ndim = _validate_ndim(values, ndim) - if ndim % 8 != 0: - raise ValueError( - "Vector dimensions should be multiples of 8 " - "for SIMD performance. To continue creating " - f"vectors with {ndim}-dimensions, " - "set `check_ndim=False" - ) vectors = _normalize_vectors(values, ndim) ids = pa.array(data.keys()) arrays = [ids, vectors] @@ -112,13 +105,6 @@ def vec_to_table( raise ValueError(f"names cannot be more than 1 got {len(names)}") if check_ndim: ndim = _validate_ndim(data, ndim) - if ndim % 8 != 0: - raise ValueError( - "Vector dimensions should be multiples of 8 " - "for SIMD performance. To continue creating " - f"vectors with {ndim}-dimensions, " - "set `check_ndim=False" - ) vectors = _normalize_vectors(data, ndim) arrays = [vectors] else: diff --git a/python/python/tests/test_dataset.py b/python/python/tests/test_dataset.py index 0cc62d73430..e0c3b11da6c 100644 --- a/python/python/tests/test_dataset.py +++ b/python/python/tests/test_dataset.py @@ -20,9 +20,8 @@ from pathlib import Path from unittest import mock -import pandas as pd - import lance +import pandas as pd import pandas.testing as tm import polars as pl import pyarrow as pa diff --git a/python/python/tests/test_lance.py b/python/python/tests/test_lance.py index 0c4fa84c01c..a9ba20ff34a 100644 --- a/python/python/tests/test_lance.py +++ b/python/python/tests/test_lance.py @@ -20,6 +20,7 @@ import pyarrow as pa import pyarrow.dataset import pytest +from lance.vector import vec_to_table def test_table_roundtrip(tmp_path): @@ -104,6 +105,24 @@ def l2sq(vec, mat): return np.sum((mat - vec) ** 2, axis=1) +def test_nearest_cosine(tmp_path): + tbl = vec_to_table([[2.5, 6], [7.4, 3.3]]) + lance.write_dataset(tbl, tmp_path) + + q = [483.5, 1384.5] + dataset = lance.dataset(tmp_path) + rs = dataset.to_table( + nearest={"column": "vector", "q": q, "k": 10, "metric": "cosine"} + ).to_pandas() + for i in range(len(rs)): + assert rs.score[i] == pytest.approx(cosine_distance(rs.vector[i], q), abs=1e-6) + assert 0 <= rs.score[i] <= 1 + + +def cosine_distance(vec1, vec2): + return 1 - np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) + + def test_count_rows(tmp_path): df = pd.DataFrame({"values": range(100)}) tbl = pa.Table.from_pandas(df) diff --git a/rust/src/index/vector/pq.rs b/rust/src/index/vector/pq.rs index f19bd475a45..e1e699e1835 100644 --- a/rust/src/index/vector/pq.rs +++ b/rust/src/index/vector/pq.rs @@ -34,7 +34,7 @@ use crate::arrow::*; use crate::dataset::ROW_ID; use crate::index::{pb, vector::kmeans::train_kmeans, vector::SCORE_COL}; use crate::io::object_reader::{read_fixed_stride_array, ObjectReader}; -use crate::linalg::{l2::l2_distance_batch, normalize::normalize}; +use crate::linalg::{l2::l2_distance_batch, norm_l2::norm_l2}; use crate::{Error, Result}; /// Product Quantization Index. @@ -133,7 +133,7 @@ impl PQIndex { let mut xy_table: Vec = vec![]; let mut y_norm_table: Vec = vec![]; - let x_norm = normalize(key.values()).powi(2); + let x_norm = norm_l2(key.values()).powi(2); let sub_vector_length = self.dimension / self.num_sub_vectors; for i in 0..self.num_sub_vectors { diff --git a/rust/src/linalg.rs b/rust/src/linalg.rs index 8b9708a65de..f8460fb7e05 100644 --- a/rust/src/linalg.rs +++ b/rust/src/linalg.rs @@ -15,7 +15,7 @@ pub mod cosine; pub mod dot; pub mod l2; -pub mod normalize; +pub mod norm_l2; #[cfg(target_arch = "x86_64")] pub mod x86_64; diff --git a/rust/src/linalg/cosine.rs b/rust/src/linalg/cosine.rs index 9cf2bd649e0..f4be811712c 100644 --- a/rust/src/linalg/cosine.rs +++ b/rust/src/linalg/cosine.rs @@ -19,7 +19,7 @@ use arrow_array::Float32Array; use num_traits::real::Real; use super::dot::dot; -use super::normalize::normalize; +use super::norm_l2::norm_l2; /// Cosine Distance pub trait Cosine { @@ -37,7 +37,7 @@ impl Cosine for [f32] { #[inline] fn cosine(&self, other: &[f32]) -> f32 { - let x_norm = normalize(self); + let x_norm = norm_l2(self); self.cosine_fast(x_norm, other) } @@ -79,7 +79,7 @@ pub fn cosine_distance(from: &[f32], to: &[f32]) -> f32 { /// /// pub fn cosine_distance_batch(from: &[f32], to: &[f32], dimension: usize) -> Arc { - let x_norm = normalize(from); + let x_norm = norm_l2(from); let dists = unsafe { Float32Array::from_trusted_len_iter( @@ -94,6 +94,9 @@ pub fn cosine_distance_batch(from: &[f32], to: &[f32], dimension: usize) -> Arc< mod x86_64 { use std::arch::x86_64::*; + use super::dot; + use super::norm_l2; + pub(crate) mod avx { use super::*; @@ -102,7 +105,7 @@ mod x86_64 { unsafe { use crate::linalg::x86_64::avx::add_f32_register; - let len = x_vector.len(); + let len = x_vector.len() / 8 * 8; let mut xy = _mm256_setzero_ps(); let mut y_sq = _mm256_setzero_ps(); for i in (0..len).step_by(8) { @@ -111,7 +114,12 @@ mod x86_64 { xy = _mm256_fmadd_ps(x, y, xy); y_sq = _mm256_fmadd_ps(y, y, y_sq); } - 1.0 - add_f32_register(xy) / (x_norm * add_f32_register(y_sq).sqrt()) + // handle remaining elements + let mut dotprod = add_f32_register(xy); + dotprod += dot(&x_vector[len..], &y_vector[len..]); + let mut y_sq_sum = add_f32_register(y_sq); + y_sq_sum += norm_l2(&y_vector[len..]).powi(2); + 1.0 - dotprod / (x_norm * y_sq_sum.sqrt()) } } } @@ -121,13 +129,16 @@ mod x86_64 { mod aarch64 { use std::arch::aarch64::*; + use super::dot; + use super::norm_l2; + pub(crate) mod neon { use super::*; #[inline] pub(crate) fn cosine_f32(x: &[f32], y: &[f32], x_norm: f32) -> f32 { unsafe { - let len = x.len(); + let len = x.len() / 4 * 4; let buf = [0.0_f32; 4]; let mut xy = vld1q_f32(buf.as_ptr()); let mut y_sq = xy; @@ -137,7 +148,12 @@ mod aarch64 { xy = vfmaq_f32(xy, left, right); y_sq = vfmaq_f32(y_sq, right, right); } - 1.0 - vaddvq_f32(xy) / (x_norm * vaddvq_f32(y_sq).sqrt()) + // handle remaining elements + let mut dotprod = vaddvq_f32(xy); + dotprod += dot(&x[len..], &y[len..]); + let mut y_sq_sum = vaddvq_f32(y_sq); + y_sq_sum += norm_l2(&y[len..]).powi(2); + 1.0 - dotprod / (x_norm * y_sq_sum.sqrt()) } } } @@ -163,4 +179,13 @@ mod tests { // from sklearn.metrics.pairwise import cosine_similarity assert_relative_eq!(d.value(0), 1.0 - 0.8735806510613104); } + + #[test] + fn test_cosine_not_aligned() { + let x: Float32Array = vec![16 as f32, 32 as f32].into(); + let y: Float32Array = vec![1 as f32, 2 as f32, 4 as f32, 8 as f32].into(); + let d = cosine_distance_batch(x.values(), y.values(), 2); + assert_relative_eq!(d.value(0), 0.0); + assert_relative_eq!(d.value(1), 0.0); + } } diff --git a/rust/src/linalg/normalize.rs b/rust/src/linalg/norm_l2.rs similarity index 87% rename from rust/src/linalg/normalize.rs rename to rust/src/linalg/norm_l2.rs index 585198af38a..5b03ced9ae8 100644 --- a/rust/src/linalg/normalize.rs +++ b/rust/src/linalg/norm_l2.rs @@ -17,16 +17,16 @@ /// The parameters must be cache line aligned. For example, from /// Arrow Arrays, i.e., Float32Array #[inline] -pub fn normalize(vector: &[f32]) -> f32 { +pub fn norm_l2(vector: &[f32]) -> f32 { #[cfg(target_arch = "aarch64")] { - aarch64::neon::normalize_f32(vector) + aarch64::neon::norm_l2(vector) } #[cfg(target_arch = "x86_64")] { if is_x86_feature_detected!("fma") { - return x86_64::avx::normalize_f32(vector); + return x86_64::avx::norm_l2_f32(vector); } } @@ -42,7 +42,7 @@ mod x86_64 { use std::arch::x86_64::*; #[inline] - pub(crate) fn normalize_f32(vector: &[f32]) -> f32 { + pub(crate) fn norm_l2_f32(vector: &[f32]) -> f32 { let len = vector.len() / 8 * 8; let mut sum = unsafe { let mut sums = _mm256_setzero_ps(); @@ -64,7 +64,7 @@ mod aarch64 { use std::arch::aarch64::*; #[inline] - pub(crate) fn normalize_f32(vector: &[f32]) -> f32 { + pub(crate) fn norm_l2(vector: &[f32]) -> f32 { let len = vector.len() / 4 * 4; let mut sum = unsafe { let buf = [0.0_f32; 4]; @@ -86,13 +86,13 @@ mod tests { use super::*; #[test] - fn test_normalize() { + fn test_norm_l2() { let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; - let result = normalize(&data); + let result = norm_l2(&data); assert_eq!(result, (1..=8).map(|v| (v * v) as f32).sum::().sqrt()); - let not_aligned = normalize(&data[2..]); + let not_aligned = norm_l2(&data[2..]); assert_eq!( not_aligned, (3..=8).map(|v| (v * v) as f32).sum::().sqrt()