Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ paste.workspace = true
pgrx = { version = "=0.16.0", default-features = false, features = ["cshim"] }
pgrx-catalog = "0.3.1"
rand.workspace = true
rusqlite = { version = "0.37.0", features = ["bundled"] }
seq-macro.workspace = true
serde.workspace = true
toml = "0.9.5"
Expand Down
2 changes: 1 addition & 1 deletion scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import numpy as np

DEFAULT_LISTS = 4096
N_ITER = 25
N_ITER = 10
CHUNKS = 10
SEED = 42
MAX_POINTS_PER_CLUSTER = 256
Expand Down
24 changes: 24 additions & 0 deletions src/index/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// Copyright (c) 2025 TensorChord Inc.

use crate::index::storage::PostgresRelation;
use crate::recorder::dump;
use pgrx::iter::SetOfIterator;
use pgrx::pg_sys::Oid;
use pgrx_catalog::{PgAm, PgClass, PgClassRelkind};

Expand Down Expand Up @@ -84,3 +86,25 @@ impl Drop for Index {
}
}
}

#[pgrx::pg_extern(sql = "")]
fn _vchordrq_sampled_vectors(indexrelid: Oid) -> SetOfIterator<'static, String> {
let pg_am = PgAm::search_amname(c"vchordrq").unwrap();
let Some(pg_am) = pg_am.get() else {
pgrx::error!("vchord is not installed");
};
let pg_class = PgClass::search_reloid(indexrelid).unwrap();
let Some(pg_class) = pg_class.get() else {
pgrx::error!("the relation does not exist");
};
if pg_class.relkind() != PgClassRelkind::Index {
pgrx::error!("the relation {:?} is not an index", pg_class.relname());
}
if pg_class.relam() != pg_am.oid() {
pgrx::error!("the index {:?} is not a vchordrq index", pg_class.relname());
}
// The user must have access to the index, if not, raise an error from Postgres.
let _relation = Index::open(indexrelid, pgrx::pg_sys::AccessShareLock as _);
let queries = dump(indexrelid.to_u32());
SetOfIterator::new(queries)
}
46 changes: 46 additions & 0 deletions src/index/gucs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ pub enum PostgresIo {
ReadStream,
}

static VCHORDRQ_QUERY_SAMPLING_ENABLE: GucSetting<bool> = GucSetting::<bool>::new(false);

static VCHORDRQ_QUERY_SAMPLING_MAX_RECORDS: GucSetting<i32> = GucSetting::<i32>::new(0);

static VCHORDRQ_QUERY_SAMPLING_RATE: GucSetting<f64> = GucSetting::<f64>::new(0.0);

static VCHORDG_ENABLE_SCAN: GucSetting<bool> = GucSetting::<bool>::new(true);

static VCHORDG_EF_SEARCH: GucSetting<i32> = GucSetting::<i32>::new(64);
Expand Down Expand Up @@ -158,6 +164,34 @@ pub fn init() {
GucContext::Userset,
GucFlags::default(),
);
GucRegistry::define_bool_guc(
c"vchordrq.query_sampling_enable",
c"`query_sampling_enable` argument of vchordrq.",
c"`query_sampling_enable` argument of vchordrq.",
&VCHORDRQ_QUERY_SAMPLING_ENABLE,
GucContext::Userset,
GucFlags::default(),
);
GucRegistry::define_int_guc(
c"vchordrq.query_sampling_max_records",
c"`query_sampling_max_records` argument of vchordrq.",
c"`query_sampling_max_records` argument of vchordrq.",
&VCHORDRQ_QUERY_SAMPLING_MAX_RECORDS,
0,
10000,
GucContext::Userset,
GucFlags::default(),
);
GucRegistry::define_float_guc(
c"vchordrq.query_sampling_rate",
c"`query_sampling_rate` argument of vchordrq.",
c"`query_sampling_rate` argument of vchordrq.",
&VCHORDRQ_QUERY_SAMPLING_RATE,
0.0,
1.0,
GucContext::Userset,
GucFlags::default(),
);
unsafe {
#[cfg(any(feature = "pg13", feature = "pg14"))]
pgrx::pg_sys::EmitWarningsOnPlaceholders(c"vchordrq".as_ptr());
Expand Down Expand Up @@ -331,3 +365,15 @@ pub fn vchordrq_io_rerank() -> Io {
PostgresIo::ReadStream => Io::Stream,
}
}

pub fn vchordrq_query_sampling_enable() -> bool {
VCHORDRQ_QUERY_SAMPLING_ENABLE.get()
}

pub fn vchordrq_query_sampling_max_records() -> u32 {
VCHORDRQ_QUERY_SAMPLING_MAX_RECORDS.get() as u32
}

pub fn vchordrq_query_sampling_rate() -> f64 {
VCHORDRQ_QUERY_SAMPLING_RATE.get()
}
2 changes: 2 additions & 0 deletions src/index/scanners.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// Copyright (c) 2025 TensorChord Inc.

use crate::index::fetcher::Fetcher;
use crate::recorder::Recorder;
use algo::{Bump, Page, RelationPrefetch, RelationRead, RelationReadStream};
use pgrx::pg_sys::Datum;

Expand Down Expand Up @@ -44,6 +45,7 @@ pub trait SearchBuilder: 'static {
options: Self::Options,
fetcher: impl Fetcher + 'b,
bump: &'b impl Bump,
recorder: impl Recorder,
) -> Box<dyn Iterator<Item = (f32, [u16; 3], bool)> + 'b>
where
R: RelationRead + RelationPrefetch + RelationReadStream,
Expand Down
10 changes: 9 additions & 1 deletion src/index/vchordg/am/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use crate::index::scanners::SearchBuilder;
use crate::index::storage::PostgresRelation;
use crate::index::vchordg::opclass::opfamily;
use crate::index::vchordg::scanners::*;
use crate::recorder::DefaultRecorder;
use pgrx::datum::Internal;
use pgrx::pg_sys::Datum;
use std::cell::LazyCell;
Expand Down Expand Up @@ -372,6 +373,13 @@ pub unsafe extern "C-unwind" fn amrescan(
)
})
};
// Query recorde is disable for vchordg indexes for now.
let recorder = DefaultRecorder {
enable: false,
rate: None,
max_records: 0,
index: (*(*scan).indexRelation).rd_id.to_u32(),
};
// PAY ATTENTATION: `scanning` references `bump`, so `scanning` must be dropped before `bump`.
let bump = scanner.bump.as_ref();
scanner.scanning = match opfamily {
Expand All @@ -397,7 +405,7 @@ pub unsafe extern "C-unwind" fn amrescan(
LazyCell::new(Box::new(move || {
// only do this since `PostgresRelation` has no destructor
let index = bump.alloc(index.clone());
builder.build(index, options, fetcher, bump)
builder.build(index, options, fetcher, bump, recorder)
}))
}
};
Expand Down
21 changes: 17 additions & 4 deletions src/index/vchordg/scanners/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use crate::index::scanners::{Io, SearchBuilder};
use crate::index::vchordg::algo::*;
use crate::index::vchordg::opclass::Opfamily;
use crate::index::vchordg::scanners::SearchOptions;
use crate::recorder::{Recorder, halfvec_out, vector_out};
use algo::accessor::{Dot, L2S};
use algo::*;
use distance::Distance;
Expand All @@ -26,6 +27,7 @@ use std::num::NonZero;
use vchordg::operator::{self};
use vchordg::types::{DistanceKind, OwnedVector, VectorKind};
use vchordg::*;
use vector::VectorOwned;
use vector::vect::{VectBorrowed, VectOwned};

pub struct DefaultBuilder {
Expand Down Expand Up @@ -78,6 +80,7 @@ impl SearchBuilder for DefaultBuilder {
options: SearchOptions,
_fetcher: impl Fetcher + 'b,
bump: &'b impl Bump,
recorder: impl Recorder,
) -> Box<dyn Iterator<Item = (f32, [u16; 3], bool)> + 'b>
where
R: RelationRead + RelationPrefetch + RelationReadStream,
Expand Down Expand Up @@ -120,7 +123,7 @@ impl SearchBuilder for DefaultBuilder {
match (opfamily.vector_kind(), opfamily.distance_kind()) {
(VectorKind::Vecf32, DistanceKind::L2S) => {
type Op = operator::Op<VectOwned<f32>, L2S>;
let unprojected = if let OwnedVector::Vecf32(vector) = vector {
let unprojected = if let OwnedVector::Vecf32(vector) = vector.clone() {
VectBorrowed::new(bump.alloc_slice(vector.slice()))
} else {
unreachable!()
Expand Down Expand Up @@ -215,7 +218,7 @@ impl SearchBuilder for DefaultBuilder {
}
(VectorKind::Vecf16, DistanceKind::L2S) => {
type Op = operator::Op<VectOwned<f16>, L2S>;
let unprojected = if let OwnedVector::Vecf16(vector) = vector {
let unprojected = if let OwnedVector::Vecf16(vector) = vector.clone() {
VectBorrowed::new(bump.alloc_slice(vector.slice()))
} else {
unreachable!()
Expand Down Expand Up @@ -310,7 +313,7 @@ impl SearchBuilder for DefaultBuilder {
}
(VectorKind::Vecf32, DistanceKind::Dot) => {
type Op = operator::Op<VectOwned<f32>, Dot>;
let unprojected = if let OwnedVector::Vecf32(vector) = vector {
let unprojected = if let OwnedVector::Vecf32(vector) = vector.clone() {
VectBorrowed::new(bump.alloc_slice(vector.slice()))
} else {
unreachable!()
Expand Down Expand Up @@ -405,7 +408,7 @@ impl SearchBuilder for DefaultBuilder {
}
(VectorKind::Vecf16, DistanceKind::Dot) => {
type Op = operator::Op<VectOwned<f16>, Dot>;
let unprojected = if let OwnedVector::Vecf16(vector) = vector {
let unprojected = if let OwnedVector::Vecf16(vector) = vector.clone() {
VectBorrowed::new(bump.alloc_slice(vector.slice()))
} else {
unreachable!()
Expand Down Expand Up @@ -509,6 +512,16 @@ impl SearchBuilder for DefaultBuilder {
} else {
iter
};
if recorder.is_enabled() {
match &vector {
OwnedVector::Vecf32(v) => {
recorder.send(&vector_out(v.as_borrowed()));
}
OwnedVector::Vecf16(v) => {
recorder.send(&halfvec_out(v.as_borrowed()));
}
}
}
Box::new(iter.map(move |(distance, pointer)| {
let (key, _) = pointer_to_kv(pointer);
(opfamily.output(distance), key, recheck)
Expand Down
15 changes: 13 additions & 2 deletions src/index/vchordrq/am/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use crate::index::scanners::SearchBuilder;
use crate::index::storage::PostgresRelation;
use crate::index::vchordrq::opclass::{Opfamily, opfamily};
use crate::index::vchordrq::scanners::*;
use crate::recorder::DefaultRecorder;
use pgrx::datum::Internal;
use pgrx::pg_sys::Datum;
use std::cell::LazyCell;
Expand Down Expand Up @@ -455,6 +456,16 @@ pub unsafe extern "C-unwind" fn amrescan(
)
})
};
let rate = match gucs::vchordrq_query_sampling_rate() {
0.0 => None,
rate => Some(rate),
};
let recorder = DefaultRecorder {
enable: gucs::vchordrq_query_sampling_enable(),
rate,
max_records: gucs::vchordrq_query_sampling_max_records(),
index: (*(*scan).indexRelation).rd_id.to_u32(),
};
// PAY ATTENTATION: `scanning` references `bump`, so `scanning` must be dropped before `bump`.
let bump = scanner.bump.as_ref();
scanner.scanning = match opfamily {
Expand All @@ -480,7 +491,7 @@ pub unsafe extern "C-unwind" fn amrescan(
LazyCell::new(Box::new(move || {
// only do this since `PostgresRelation` has no destructor
let index = bump.alloc(index.clone());
builder.build(index, options, fetcher, bump)
builder.build(index, options, fetcher, bump, recorder)
}))
}
Opfamily::VectorMaxsim | Opfamily::HalfvecMaxsim => {
Expand All @@ -500,7 +511,7 @@ pub unsafe extern "C-unwind" fn amrescan(
LazyCell::new(Box::new(move || {
// only do this since `PostgresRelation` has no destructor
let index = bump.alloc(index.clone());
builder.build(index, options, fetcher, bump)
builder.build(index, options, fetcher, bump, recorder)
}))
}
};
Expand Down
Loading
Loading