From de13995e3dd81964ffa86349d9c44385acef61b6 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 12 Mar 2024 11:10:59 -0700 Subject: [PATCH 1/2] Add Rust API docs This adds the rust API docs to the generated sphinx documentation. The rust docs are generated by `cargo doc` and then copied over to the sphinx `_static` html directory, and included in the generated docs site with an iframe. --- build.sh | 4 ++ dependencies.yaml | 2 + docs/source/rust_api.rst | 13 ++++-- rust/cuvs/src/cagra/index.rs | 18 +++++++- rust/cuvs/src/cagra/index_params.rs | 1 + rust/cuvs/src/cagra/mod.rs | 63 ++++++++++++++++++++++++++++ rust/cuvs/src/cagra/search_params.rs | 1 + rust/cuvs/src/dlpack.rs | 6 +++ rust/cuvs/src/lib.rs | 5 +++ rust/cuvs/src/resources.rs | 4 ++ 10 files changed, 113 insertions(+), 4 deletions(-) diff --git a/build.sh b/build.sh index 8571a574c..37aadad88 100755 --- a/build.sh +++ b/build.sh @@ -54,6 +54,7 @@ LIBCUVS_BUILD_DIR=${LIBCUVS_BUILD_DIR:=${REPODIR}/cpp/build} SPHINX_BUILD_DIR=${REPODIR}/docs DOXYGEN_BUILD_DIR=${REPODIR}/cpp/doxygen PYTHON_BUILD_DIR=${REPODIR}/python/cuvs/_skbuild +RUST_BUILD_DIR=${REPODIR}/rust BUILD_DIRS="${LIBCUVS_BUILD_DIR} ${PYTHON_BUILD_DIR}" # Set defaults for vars modified by flags to this script @@ -395,6 +396,9 @@ if hasArg docs; then set -x cd ${DOXYGEN_BUILD_DIR} doxygen Doxyfile + cd ${RUST_BUILD_DIR} + cargo doc -p cuvs --no-deps + rsync -av ${RUST_BUILD_DIR}/target/doc/ ${SPHINX_BUILD_DIR}/source/_static/rust cd ${SPHINX_BUILD_DIR} sphinx-build -b html source _html fi diff --git a/dependencies.yaml b/dependencies.yaml index 7fff7811a..cd6cb0cf0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -44,6 +44,8 @@ files: - cupy - docs - py_version + - rust + - build rust: output: none includes: diff --git a/docs/source/rust_api.rst b/docs/source/rust_api.rst index 254136e3a..8297dbef9 100644 --- a/docs/source/rust_api.rst +++ b/docs/source/rust_api.rst @@ -2,7 +2,14 @@ Rust API Documentation ~~~~~~~~~~~~~~~~~~~~~~ -.. _api: +.. raw:: html -.. toctree:: - :maxdepth: 4 + + + + diff --git a/rust/cuvs/src/cagra/index.rs b/rust/cuvs/src/cagra/index.rs index 3c45efafd..3394889aa 100644 --- a/rust/cuvs/src/cagra/index.rs +++ b/rust/cuvs/src/cagra/index.rs @@ -21,11 +21,18 @@ use crate::dlpack::ManagedTensor; use crate::error::{check_cuvs, Result}; use crate::resources::Resources; +/// CAGRA ANN Index #[derive(Debug)] pub struct Index(ffi::cuvsCagraIndex_t); impl Index { - /// Builds a new index + /// Builds a new Index from the dataset for efficient search. + /// + /// # Arguments + /// + /// * `res` - Resources to use + /// * `params` - Parameters for building the index + /// * `dataset` - A row-major matrix on either the host or device to index pub fn build>( res: &Resources, params: &IndexParams, @@ -53,6 +60,15 @@ impl Index { } } + /// Perform a Approximate Nearest Neighbors search on the Index + /// + /// # Arguments + /// + /// * `res` - Resources to use + /// * `params` - Parameters to use in searching the index + /// * `queries` - A matrix in device memory to query for + /// * `neighbors` - Matrix in device memory that receives the indices of the nearest neighbors + /// * `distances` - Matrix in device memory that receives the distances of the nearest neighbors pub fn search( self, res: &Resources, diff --git a/rust/cuvs/src/cagra/index_params.rs b/rust/cuvs/src/cagra/index_params.rs index 7ff68d3f3..ecc660531 100644 --- a/rust/cuvs/src/cagra/index_params.rs +++ b/rust/cuvs/src/cagra/index_params.rs @@ -24,6 +24,7 @@ pub type BuildAlgo = ffi::cuvsCagraGraphBuildAlgo; pub struct IndexParams(pub ffi::cuvsCagraIndexParams_t); impl IndexParams { + /// Returns a new IndexParams pub fn new() -> Result { unsafe { let mut params = core::mem::MaybeUninit::::uninit(); diff --git a/rust/cuvs/src/cagra/mod.rs b/rust/cuvs/src/cagra/mod.rs index 55705c27a..417ed9b0d 100644 --- a/rust/cuvs/src/cagra/mod.rs +++ b/rust/cuvs/src/cagra/mod.rs @@ -14,6 +14,69 @@ * limitations under the License. */ +//! CAGRA is a graph-based nearest neighbors implementation with state-of-the art +//! query performance for both small- and large-batch sized search. +//! +//! Example: +//! ``` +//! +//! use cuvs::cagra::{Index, IndexParams, SearchParams}; +//! use cuvs::{ManagedTensor, Resources, Result}; +//! +//! use ndarray::s; +//! use ndarray_rand::rand_distr::Uniform; +//! use ndarray_rand::RandomExt; +//! +//! fn cagra_example() -> Result<()> { +//! let res = Resources::new()?; +//! +//! // Create a new random dataset to index +//! let n_datapoints = 65536; +//! let n_features = 512; +//! let dataset = +//! ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); +//! +//! // build the cagra index +//! let build_params = IndexParams::new()?; +//! let index = Index::build(&res, &build_params, &dataset)?; +//! println!( +//! "Indexed {}x{} datapoints into cagra index", +//! n_datapoints, n_features +//! ); +//! +//! // use the first 4 points from the dataset as queries : will test that we get them back +//! // as their own nearest neighbor +//! let n_queries = 4; +//! let queries = dataset.slice(s![0..n_queries, ..]); +//! +//! let k = 10; +//! +//! // CAGRA search API requires queries and outputs to be on device memory +//! // copy query data over, and allocate new device memory for the distances/ neighbors +//! // outputs +//! let queries = ManagedTensor::from(&queries).to_device(&res)?; +//! let mut neighbors_host = ndarray::Array::::zeros((n_queries, k)); +//! let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res)?; +//! +//! let mut distances_host = ndarray::Array::::zeros((n_queries, k)); +//! let distances = ManagedTensor::from(&distances_host).to_device(&res)?; +//! +//! let search_params = SearchParams::new()?; +//! +//! index.search(&res, &search_params, &queries, &neighbors, &distances)?; +//! +//! // Copy back to host memory +//! distances.to_host(&res, &mut distances_host)?; +//! neighbors.to_host(&res, &mut neighbors_host)?; +//! +//! // nearest neighbors should be themselves, since queries are from the +//! // dataset +//! println!("Neighbors {:?}", neighbors_host); +//! println!("Distances {:?}", distances_host); +//! Ok(()) +//! } +//! ``` + mod index; mod index_params; mod search_params; diff --git a/rust/cuvs/src/cagra/search_params.rs b/rust/cuvs/src/cagra/search_params.rs index 11ac92bdd..14956966e 100644 --- a/rust/cuvs/src/cagra/search_params.rs +++ b/rust/cuvs/src/cagra/search_params.rs @@ -25,6 +25,7 @@ pub type HashMode = ffi::cuvsCagraHashMode; pub struct SearchParams(pub ffi::cuvsCagraSearchParams_t); impl SearchParams { + /// Returns a new SearchParams object pub fn new() -> Result { unsafe { let mut params = core::mem::MaybeUninit::::uninit(); diff --git a/rust/cuvs/src/dlpack.rs b/rust/cuvs/src/dlpack.rs index b86959db1..a1d4e41c6 100644 --- a/rust/cuvs/src/dlpack.rs +++ b/rust/cuvs/src/dlpack.rs @@ -19,6 +19,8 @@ use std::convert::From; use crate::error::{check_cuda, Result}; use crate::resources::Resources; +/// ManagedTensor is a wrapper around a dlpack DLManagedTensor object. +/// This lets you pass matrices in device or host memory into cuvs. #[derive(Debug)] pub struct ManagedTensor(ffi::DLManagedTensor); @@ -41,6 +43,8 @@ impl ManagedTensor { bytes } + /// Creates a new ManagedTensor on the current GPU device, and copies + /// the data into it. pub fn to_device(&self, _res: &Resources) -> Result { unsafe { let bytes = self.bytes(); @@ -64,6 +68,8 @@ impl ManagedTensor { Ok(ManagedTensor(ret)) } } + + /// Copies data from device memory into host memory pub fn to_host< T: IntoDtype, S: ndarray::RawData + ndarray::RawDataMut, diff --git a/rust/cuvs/src/lib.rs b/rust/cuvs/src/lib.rs index 7a6f847f5..273f04723 100644 --- a/rust/cuvs/src/lib.rs +++ b/rust/cuvs/src/lib.rs @@ -14,6 +14,11 @@ * limitations under the License. */ +//! cuVS: Rust bindings for Vector Search on the GPU +//! +//! This crate provides Rust bindings for cuVS, allowing you to run +//! approximate nearest neighbors search on the GPU. + pub mod cagra; mod dlpack; mod error; diff --git a/rust/cuvs/src/resources.rs b/rust/cuvs/src/resources.rs index ad7113e6b..a5c503dc5 100644 --- a/rust/cuvs/src/resources.rs +++ b/rust/cuvs/src/resources.rs @@ -17,10 +17,14 @@ use crate::error::{check_cuvs, Result}; use std::io::{stderr, Write}; +/// Resources are objects that are shared between function calls, +/// and includes things like CUDA streams, cuBLAS handles and other +/// resources that are expensive to create. #[derive(Debug)] pub struct Resources(pub ffi::cuvsResources_t); impl Resources { + /// Returns a new Resources object pub fn new() -> Result { let mut res: ffi::cuvsResources_t = 0; unsafe { From 7cb3bea4296e4a13848c1bb3fd32d110fbb8e509 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 12 Mar 2024 12:01:49 -0700 Subject: [PATCH 2/2] try to fix docs build --- ci/build_docs.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 983e97385..fbb004da4 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -12,7 +12,12 @@ rapids-dependency-file-generator \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml rapids-mamba-retry env create --force -f env.yaml -n docs + +# seeing failures on activating the environment here on unbound locals +# apply workaround from https://github.com/conda/conda/issues/8186#issuecomment-532874667 +set +eu conda activate docs +set -eu rapids-print-env