Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accept host_mdspan for IVF-PQ build and extend #148

Merged
merged 6 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
311 changes: 311 additions & 0 deletions cpp/include/cuvs/neighbors/ivf_pq.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,146 @@ void build(raft::resources const& handle,
const cuvs::neighbors::ivf_pq::index_params& index_params,
raft::device_matrix_view<const uint8_t, int64_t, raft::row_major> dataset,
cuvs::neighbors::ivf_pq::index<int64_t>* idx);
/**
* @brief Build the index from the dataset for efficient search.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* ivf_pq::index_params index_params;
* // create and fill the index from a [N, D] dataset
* auto index = ivf_pq::build(handle, index_params, dataset);
* @endcode
*
* @param[in] handle
* @param[in] index_params configure the index building
* @param[in] dataset a host_matrix_view to a row-major matrix [n_rows, dim]
*
* @return the constructed ivf-pq index
*/
auto build(raft::resources const& handle,
cjnolet marked this conversation as resolved.
Show resolved Hide resolved
const cuvs::neighbors::ivf_pq::index_params& index_params,
raft::host_matrix_view<const float, int64_t, raft::row_major> dataset)
-> cuvs::neighbors::ivf_pq::index<int64_t>;

/**
* @brief Build the index from the dataset for efficient search.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* ivf_pq::index_params index_params;
* // create and fill the index from a [N, D] dataset
* ivf_pq::index<decltype(dataset::value_type), decltype(dataset::index_type)> index;
* ivf_pq::build(handle, index_params, dataset, index);
* @endcode
*
* @param[in] handle
* @param[in] index_params configure the index building
* @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim]
* @param[out] idx reference to ivf_pq::index
*
*/
void build(raft::resources const& handle,
const cuvs::neighbors::ivf_pq::index_params& index_params,
raft::host_matrix_view<const float, int64_t, raft::row_major> dataset,
cuvs::neighbors::ivf_pq::index<int64_t>* idx);

/**
* @brief Build the index from the dataset for efficient search.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* ivf_pq::index_params index_params;
* // create and fill the index from a [N, D] dataset
* auto index = ivf_pq::build(handle, index_params, dataset);
* @endcode
*
* @param[in] handle
* @param[in] index_params configure the index building
* @param[in] dataset a host_matrix_view to a row-major matrix [n_rows, dim]
*
* @return the constructed ivf-pq index
*/
auto build(raft::resources const& handle,
const cuvs::neighbors::ivf_pq::index_params& index_params,
raft::host_matrix_view<const int8_t, int64_t, raft::row_major> dataset)
-> cuvs::neighbors::ivf_pq::index<int64_t>;

/**
* @brief Build the index from the dataset for efficient search.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* ivf_pq::index_params index_params;
* // create and fill the index from a [N, D] dataset
* ivf_pq::index<decltype(dataset::value_type), decltype(dataset::index_type)> index;
* ivf_pq::build(handle, index_params, dataset, index);
* @endcode
*
* @param[in] handle
* @param[in] index_params configure the index building
* @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim]
* @param[out] idx reference to ivf_pq::index
*
*/
void build(raft::resources const& handle,
const cuvs::neighbors::ivf_pq::index_params& index_params,
raft::host_matrix_view<const int8_t, int64_t, raft::row_major> dataset,
cuvs::neighbors::ivf_pq::index<int64_t>* idx);

/**
* @brief Build the index from the dataset for efficient search.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* ivf_pq::index_params index_params;
* // create and fill the index from a [N, D] dataset
* auto index = ivf_pq::build(handle, index_params, dataset);
* @endcode
*
* @param[in] handle
* @param[in] index_params configure the index building
* @param[in] dataset a host_matrix_view to a row-major matrix [n_rows, dim]
*
* @return the constructed ivf-pq index
*/
auto build(raft::resources const& handle,
const cuvs::neighbors::ivf_pq::index_params& index_params,
raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> dataset)
-> cuvs::neighbors::ivf_pq::index<int64_t>;

/**
* @brief Build the index from the dataset for efficient search.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* ivf_pq::index_params index_params;
* // create and fill the index from a [N, D] dataset
* ivf_pq::index<decltype(dataset::value_type), decltype(dataset::index_type)> index;
* ivf_pq::build(handle, index_params, dataset, index);
* @endcode
*
* @param[in] handle
* @param[in] index_params configure the index building
* @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim]
* @param[out] idx reference to ivf_pq::index
*
*/
void build(raft::resources const& handle,
const cuvs::neighbors::ivf_pq::index_params& index_params,
raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> dataset,
cuvs::neighbors::ivf_pq::index<int64_t>* idx);
/**
* @}
*/
Expand Down Expand Up @@ -772,6 +912,177 @@ void extend(raft::resources const& handle,
raft::device_matrix_view<const uint8_t, int64_t, raft::row_major> new_vectors,
std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,
cuvs::neighbors::ivf_pq::index<int64_t>* idx);

/**
* @brief Extend the index with the new data.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* ivf_pq::index_params index_params;
* index_params.add_data_on_build = false; // don't populate index on build
* index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
* // train the index from a [N, D] dataset
* auto index_empty = ivf_pq::build(handle, index_params, dataset);
* // fill the index with the data
* std::optional<raft::host_vector_view<const IdxT, IdxT>> no_op = std::nullopt;
* auto index = ivf_pq::extend(handle, new_vectors, no_op, index_empty);
* @endcode
*
* @param[in] handle
* @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
* @param[in] new_indices a device vector view to a vector of indices [n_rows].
* If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
* here to imply a continuous range `[0...n_rows)`.
* @param[inout] idx
*/
auto extend(raft::resources const& handle,
raft::host_matrix_view<const float, int64_t, raft::row_major> new_vectors,
std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
const cuvs::neighbors::ivf_pq::index<int64_t>& idx)
-> cuvs::neighbors::ivf_pq::index<int64_t>;

/**
* @brief Extend the index with the new data.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* ivf_pq::index_params index_params;
* index_params.add_data_on_build = false; // don't populate index on build
* index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
* // train the index from a [N, D] dataset
* auto index_empty = ivf_pq::build(handle, index_params, dataset);
* // fill the index with the data
* std::optional<raft::host_vector_view<const IdxT, IdxT>> no_op = std::nullopt;
* ivf_pq::extend(handle, new_vectors, no_op, &index_empty);
* @endcode
*
* @param[in] handle
* @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
* @param[in] new_indices a device vector view to a vector of indices [n_rows].
* If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
* here to imply a continuous range `[0...n_rows)`.
* @param[inout] idx
*/
void extend(raft::resources const& handle,
raft::host_matrix_view<const float, int64_t, raft::row_major> new_vectors,
std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
cuvs::neighbors::ivf_pq::index<int64_t>* idx);

/**
* @brief Extend the index with the new data.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* ivf_pq::index_params index_params;
* index_params.add_data_on_build = false; // don't populate index on build
* index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
* // train the index from a [N, D] dataset
* auto index_empty = ivf_pq::build(handle, index_params, dataset);
* // fill the index with the data
* std::optional<raft::host_vector_view<const IdxT, IdxT>> no_op = std::nullopt;
* auto index = ivf_pq::extend(handle, new_vectors, no_op, index_empty);
* @endcode
*
* @param[in] handle
* @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
* @param[in] new_indices a device vector view to a vector of indices [n_rows].
* If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
* here to imply a continuous range `[0...n_rows)`.
* @param[inout] idx
*/
auto extend(raft::resources const& handle,
raft::host_matrix_view<const int8_t, int64_t, raft::row_major> new_vectors,
std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
const cuvs::neighbors::ivf_pq::index<int64_t>& idx)
-> cuvs::neighbors::ivf_pq::index<int64_t>;

/**
* @brief Extend the index with the new data.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* ivf_pq::index_params index_params;
* index_params.add_data_on_build = false; // don't populate index on build
* index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
* // train the index from a [N, D] dataset
* auto index_empty = ivf_pq::build(handle, index_params, dataset);
* // fill the index with the data
* std::optional<raft::host_vector_view<const IdxT, IdxT>> no_op = std::nullopt;
* ivf_pq::extend(handle, new_vectors, no_op, &index_empty);
* @endcode
*
* @param[in] handle
* @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
* @param[in] new_indices a device vector view to a vector of indices [n_rows].
* If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
* here to imply a continuous range `[0...n_rows)`.
* @param[inout] idx
*/
void extend(raft::resources const& handle,
raft::host_matrix_view<const int8_t, int64_t, raft::row_major> new_vectors,
std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
cuvs::neighbors::ivf_pq::index<int64_t>* idx);

/**
* @brief Extend the index with the new data.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* ivf_pq::index_params index_params;
* index_params.add_data_on_build = false; // don't populate index on build
* index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
* // train the index from a [N, D] dataset
* auto index_empty = ivf_pq::build(handle, index_params, dataset);
* // fill the index with the data
* std::optional<raft::host_vector_view<const IdxT, IdxT>> no_op = std::nullopt;
* auto index = ivf_pq::extend(handle, new_vectors, no_op, index_empty);
* @endcode
*
* @param[in] handle
* @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
* @param[in] new_indices a device vector view to a vector of indices [n_rows].
* If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
* here to imply a continuous range `[0...n_rows)`.
* @param[inout] idx
*/
auto extend(raft::resources const& handle,
raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> new_vectors,
std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
const cuvs::neighbors::ivf_pq::index<int64_t>& idx)
-> cuvs::neighbors::ivf_pq::index<int64_t>;

/**
* @brief Extend the index with the new data.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* ivf_pq::index_params index_params;
* index_params.add_data_on_build = false; // don't populate index on build
* index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
* // train the index from a [N, D] dataset
* auto index_empty = ivf_pq::build(handle, index_params, dataset);
* // fill the index with the data
* std::optional<raft::host_vector_view<const IdxT, IdxT>> no_op = std::nullopt;
* ivf_pq::extend(handle, new_vectors, no_op, &index_empty);
* @endcode
*
* @param[in] handle
* @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
* @param[in] new_indices a device vector view to a vector of indices [n_rows].
* If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
* here to imply a continuous range `[0...n_rows)`.
* @param[inout] idx
*/
void extend(raft::resources const& handle,
raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> new_vectors,
std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
cuvs::neighbors::ivf_pq::index<int64_t>* idx);
/**
* @}
*/
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/neighbors/detail/cagra/cagra_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,7 @@ void build_knn_graph(
}();

RAFT_LOG_DEBUG("# Building IVF-PQ index %s", model_name.c_str());
auto index = cuvs::neighbors::ivf_pq::detail::build<DataT, int64_t>(
res, *build_params, dataset.data_handle(), dataset.extent(0), dataset.extent(1));
auto index = cuvs::neighbors::ivf_pq::detail::build<DataT, int64_t>(res, *build_params, dataset);

//
// search top (k + 1) neighbors
Expand Down
39 changes: 2 additions & 37 deletions cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"""

build_include_macro = """
#include "../ivf_pq_build.cuh"
#include "ivf_pq_build_extend_inst.cuh"
"""
search_include_macro = """
#include "../ivf_pq_search.cuh"
Expand All @@ -61,42 +61,7 @@
uint8_t_int64_t=("uint8_t", "int64_t"),
)

build_extend_macro = """
#define CUVS_INST_IVF_PQ_BUILD_EXTEND(T, IdxT) \\
auto build(raft::resources const& handle, \\
const cuvs::neighbors::ivf_pq::index_params& params, \\
raft::device_matrix_view<const T, IdxT, raft::row_major> dataset) \\
->cuvs::neighbors::ivf_pq::index<IdxT> \\
{ \\
return cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset); \\
} \\
\\
void build(raft::resources const& handle, \\
const cuvs::neighbors::ivf_pq::index_params& params, \\
raft::device_matrix_view<const T, IdxT, raft::row_major> dataset, \\
cuvs::neighbors::ivf_pq::index<IdxT>* idx) \\
{ \\
cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset, idx); \\
} \\
auto extend(raft::resources const& handle, \\
raft::device_matrix_view<const T, IdxT, raft::row_major> new_vectors, \\
std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \\
const cuvs::neighbors::ivf_pq::index<IdxT>& orig_index) \\
->cuvs::neighbors::ivf_pq::index<IdxT> \\
{ \\
return cuvs::neighbors::ivf_pq::detail::extend( \\
handle, new_vectors, new_indices, orig_index); \\
} \\
\\
void extend(raft::resources const& handle, \\
raft::device_matrix_view<const T, IdxT, raft::row_major> new_vectors, \\
std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \\
cuvs::neighbors::ivf_pq::index<IdxT>* idx) \\
{ \\
cuvs::neighbors::ivf_pq::detail::extend( \\
handle, new_vectors, new_indices, idx); \\
}
"""
build_extend_macro = "" # moved to header ivf_pq_build_extend_inst.cuh

search_macro = """
#define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \\
Expand Down