-
Notifications
You must be signed in to change notification settings - Fork 184
[FEA] View Type PQ Preprocessor #1764
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2accbba
4c6182c
f18e00c
fa70a01
bf763e3
ac85ece
2728273
a0f6c76
1620486
b0aaa05
b479c34
04be0a0
f80280e
51e8209
c558964
ebfc7d2
b949c2c
a8b3ce4
4e9565f
f8432b5
a15e054
12a872a
34ce8ff
be681d3
65437ec
fc30857
faa46f9
2c1aa71
d6a8364
963f16e
cbb5d75
68f016d
0070cda
819eef8
19fe976
77bd557
1c85f16
4708434
55db0a4
6408cbb
c4250f5
8c1f792
f5ac6ba
4b7015f
cc2a900
70b5d04
7247688
be4cb4a
12cd162
ee0170e
04888f9
fd1eb38
075c1ba
740b78e
901c8bb
c5e231c
f74a663
f8a5415
216a512
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -393,100 +393,6 @@ auto make_aligned_dataset(const raft::resources& res, SrcT src, uint32_t align_b | |
| raft::round_up_safe<size_t>(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize; | ||
| return make_strided_dataset(res, std::forward<SrcT>(src), required_stride); | ||
| } | ||
| /** | ||
| * @brief VPQ compressed dataset. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks an awful lot like what @HowardHuang1 and @divyegala have been working on. I think we should consolidate the two- we only need 1 way to represent pq compressed datasets. Ideally it would be done with composition in mind over inheritance, and we could use templates like "container policy" as we've done in other APIs (like the sparse apis) to make the apis extensible/pluggable. |
||
| * | ||
| * The dataset is compressed using two level quantization | ||
| * | ||
| * 1. Vector Quantization | ||
| * 2. Product Quantization of residuals | ||
| * | ||
| * @tparam MathT the type of elements in the codebooks | ||
| * @tparam IdxT type of the vector indices (represent dataset.extent(0)) | ||
| * | ||
| */ | ||
| template <typename MathT, typename IdxT> | ||
| struct vpq_dataset : public dataset<IdxT> { | ||
|
tarang-jain marked this conversation as resolved.
|
||
| using index_type = IdxT; | ||
| using math_type = MathT; | ||
| /** Vector Quantization codebook - "coarse cluster centers". */ | ||
| raft::device_matrix<math_type, uint32_t, raft::row_major> vq_code_book; | ||
| /** Product Quantization codebook - "fine cluster centers". */ | ||
| raft::device_matrix<math_type, uint32_t, raft::row_major> pq_code_book; | ||
| /** Compressed dataset. */ | ||
| raft::device_matrix<uint8_t, index_type, raft::row_major> data; | ||
|
|
||
| vpq_dataset(raft::device_matrix<math_type, uint32_t, raft::row_major>&& vq_code_book, | ||
| raft::device_matrix<math_type, uint32_t, raft::row_major>&& pq_code_book, | ||
| raft::device_matrix<uint8_t, index_type, raft::row_major>&& data) | ||
| : vq_code_book{std::move(vq_code_book)}, | ||
| pq_code_book{std::move(pq_code_book)}, | ||
| data{std::move(data)} | ||
| { | ||
| } | ||
|
|
||
| [[nodiscard]] auto n_rows() const noexcept -> index_type final { return data.extent(0); } | ||
| [[nodiscard]] auto dim() const noexcept -> uint32_t final { return vq_code_book.extent(1); } | ||
| [[nodiscard]] auto is_owning() const noexcept -> bool final { return true; } | ||
|
|
||
| /** Row length of the encoded data in bytes. */ | ||
| [[nodiscard]] constexpr inline auto encoded_row_length() const noexcept -> uint32_t | ||
| { | ||
| return data.extent(1); | ||
| } | ||
| /** The number of "coarse cluster centers" */ | ||
| [[nodiscard]] constexpr inline auto vq_n_centers() const noexcept -> uint32_t | ||
| { | ||
| return vq_code_book.extent(0); | ||
| } | ||
| /** The bit length of an encoded vector element after compression by PQ. */ | ||
| [[nodiscard]] constexpr inline auto pq_bits() const noexcept -> uint32_t | ||
| { | ||
| /* | ||
| NOTE: pq_bits and the book size | ||
|
|
||
| Normally, we'd store `pq_bits` as a part of the index. | ||
| However, we know there's an invariant `pq_n_centers = 1 << pq_bits`, i.e. the codebook size is | ||
| the same as the number of possible code values. Hence, we don't store the pq_bits and derive it | ||
| from the array dimensions instead. | ||
| */ | ||
| auto pq_width = pq_n_centers(); | ||
| #ifdef __cpp_lib_bitops | ||
| return std::countr_zero(pq_width); | ||
| #else | ||
| uint32_t pq_bits = 0; | ||
| while (pq_width > 1) { | ||
| pq_bits++; | ||
| pq_width >>= 1; | ||
| } | ||
| return pq_bits; | ||
| #endif | ||
| } | ||
| /** The dimensionality of an encoded vector after compression by PQ. */ | ||
| [[nodiscard]] constexpr inline auto pq_dim() const noexcept -> uint32_t | ||
| { | ||
| return raft::div_rounding_up_unsafe(dim(), pq_len()); | ||
| } | ||
| /** Dimensionality of a subspaces, i.e. the number of vector components mapped to a subspace */ | ||
| [[nodiscard]] constexpr inline auto pq_len() const noexcept -> uint32_t | ||
| { | ||
| return pq_code_book.extent(1); | ||
| } | ||
| /** The number of vectors in a PQ codebook (`1 << pq_bits`). */ | ||
| [[nodiscard]] constexpr inline auto pq_n_centers() const noexcept -> uint32_t | ||
| { | ||
| return pq_code_book.extent(0); | ||
| } | ||
| }; | ||
|
|
||
| template <typename DatasetT> | ||
| struct is_vpq_dataset : std::false_type {}; | ||
|
|
||
| template <typename MathT, typename IdxT> | ||
| struct is_vpq_dataset<vpq_dataset<MathT, IdxT>> : std::true_type {}; | ||
|
|
||
| template <typename DatasetT> | ||
| inline constexpr bool is_vpq_dataset_v = is_vpq_dataset<DatasetT>::value; | ||
|
|
||
| namespace filtering { | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
|
|
||
| #include <cuvs/cluster/kmeans.hpp> | ||
| #include <cuvs/neighbors/common.hpp> | ||
| #include <cuvs/preprocessing/quantize/vpq_dataset.hpp> | ||
| #include <raft/core/device_mdspan.hpp> | ||
| #include <raft/core/handle.hpp> | ||
| #include <raft/core/host_mdspan.hpp> | ||
|
|
@@ -139,19 +140,21 @@ struct params { | |
| /** | ||
| * @brief Defines and stores VPQ codebooks upon training | ||
| * | ||
| * @tparam T data element type | ||
| * The quantizer holds a vpq_dataset, which can either own the codebooks | ||
| * or non-owning (referencing external codebooks). | ||
|
Comment on lines
141
to
+144
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix the This comment no longer matches the type below, so the generated public API docs now describe the wrong ownership model. As per coding guidelines, public C++ API headers must include complete Doxygen documentation for public functions/classes. 🤖 Prompt for AI Agents |
||
| * | ||
| * @tparam T data element type | ||
| */ | ||
| template <typename T> | ||
| struct quantizer { | ||
| /** Parameters used to build this quantizer. */ | ||
| params params_quantizer; | ||
| /** VPQ codebooks produced during training. */ | ||
| cuvs::neighbors::vpq_dataset<T, int64_t> vpq_codebooks; | ||
| /** VPQ codebooks (owning or view). */ | ||
| cuvs::preprocessing::quantize::pq::vpq_codebooks<T> codebooks; | ||
| }; | ||
|
Comment on lines
149
to
154
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This public member rename needs a deprecation or migration path.
As per coding guidelines, breaking changes require deprecation warnings and migration guide updates. 🤖 Prompt for AI Agents |
||
|
|
||
| /** | ||
| * @brief Initializes a product quantizer to be used later for quantizing the dataset. | ||
| * @brief Initializes a product quantizer by training on the dataset (owning). | ||
| * | ||
| * The use of a pool memory resource is recommended for more consistent training performance. | ||
| * | ||
|
|
@@ -165,7 +168,7 @@ struct quantizer { | |
| * @endcode | ||
| * | ||
| * @param[in] res raft resource | ||
| * @param[in] params configure product quantizer, e.g. quantile | ||
| * @param[in] params configure product quantizer, e.g. pq_bits, pq_dim | ||
| * @param[in] dataset a row-major matrix view on device or host | ||
| * | ||
| * @return quantizer | ||
|
|
@@ -179,6 +182,48 @@ quantizer<float> build(raft::resources const& res, | |
| const params params, | ||
| raft::host_matrix_view<const float, int64_t> dataset); | ||
|
|
||
| /** | ||
| * @brief Creates a product quantizer from pre-computed codebooks. | ||
| * | ||
| * This function creates a non-owning quantizer that references the provided codebooks. | ||
| * | ||
| * Usage example: | ||
| * @code{.cpp} | ||
| * raft::handle_t handle; | ||
| * // Assume pq_centers and vq_centers are pre-computed on device | ||
| * cuvs::preprocessing::quantize::pq::params params; | ||
| * params.pq_bits = 8; | ||
| * params.pq_dim = 32; | ||
| * params.use_vq = true; | ||
| * params.use_subspaces = true; | ||
| * // With VQ centers: | ||
| * auto quant_view = cuvs::preprocessing::quantize::pq::build(handle, params, | ||
| * pq_centers_view, | ||
| * std::make_optional<raft::device_matrix_view<const | ||
| * float, uint32_t, raft::row_major>>(vq_centers_view)); | ||
| * // Without VQ (PQ only): | ||
| * auto quant_pq_only = cuvs::preprocessing::quantize::pq::build(handle, params, pq_centers_view); | ||
| * @endcode | ||
| * | ||
| * @param[in] res raft resource | ||
| * @param[in] params configure product quantizer parameters. Must be fully specified | ||
| * (pq_bits, pq_dim must be set; use_subspaces and use_vq must match the codebook shapes). | ||
| * @param[in] pq_centers PQ codebook on device memory: | ||
| * - For use_subspaces=true: [pq_dim * pq_n_centers, pq_len] | ||
| * - For use_subspaces=false: [pq_n_centers, pq_len] | ||
| * where pq_n_centers = (1 << pq_bits), pq_len = dim / pq_dim | ||
| * @param[in] vq_centers Optional VQ codebook on device memory [vq_n_centers, dim]. | ||
| * Required when use_vq=true. Defaults to std::nullopt (no VQ). | ||
| * | ||
| * @return A view-type quantizer that references the provided data | ||
| */ | ||
| quantizer<float> build( | ||
| raft::resources const& res, | ||
| const params params, | ||
| raft::device_matrix_view<const float, uint32_t, raft::row_major> pq_centers, | ||
| std::optional<raft::device_matrix_view<const float, uint32_t, raft::row_major>> vq_centers = | ||
| std::nullopt); | ||
|
|
||
| /** | ||
| * @brief Applies quantization transform to given dataset | ||
| * | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't RAFT and the library isn't header-only. Items declared in the headers are much more light weight than in RAFT. I'd like to avoid having to define a header per class/abstraction. It gets confusing to users when they have to explicitly include a thousand headers to use common APIs.
What's the main intention of the "dataset" here? Is this meant to be used internally or directly by users Are users interacting with it directly? If header sizes are what are getting out of control, I'd much rather break the implementation into the compiled source files and keep the prototype/definition in the header files and then instantiate for the types we support, rather than break each one into separate headers that become nebulus to the users.