Merge branch 'branch-24.06' into upgrade-sklearn-1.4

rapidsai · May 24, 2024 · 04ac30c · 04ac30c
2 parents 9ac5425 + 47416d7
commit 04ac30c
Show file tree

Hide file tree

Showing 9 changed files with 348 additions and 136 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -33,7 +33,7 @@ jobs:
     with:
       enable_check_generated_files: false
       ignored_pr_jobs: >-
-        conda-python-tests-cudf-pandas-integration
+        optional-job-conda-python-tests-cudf-pandas-integration
   clang-tidy:
     needs: checks
     secrets: inherit
@@ -77,11 +77,12 @@ jobs:
     with:
       build_type: pull-request
       script: "ci/test_python_singlegpu.sh"
-  conda-python-tests-cudf-pandas-integration:
+  optional-job-conda-python-tests-cudf-pandas-integration:
     needs: conda-python-build
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06
     with:
+      matrix_filter: map(select(.ARCH == "amd64"))
       build_type: pull-request
       script: "ci/test_python_integration.sh"
   conda-python-tests-dask:

diff --git a/cpp/include/cuml/linear_model/qn_mg.hpp b/cpp/include/cuml/linear_model/qn_mg.hpp
@@ -37,9 +37,10 @@ namespace opg {
  * @param[in] labels: labels data
  * @returns host vector that stores the distinct labels
  */
-std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
-                                     Matrix::PartDescriptor& input_desc,
-                                     std::vector<Matrix::Data<float>*>& labels);
+template <typename T>
+std::vector<T> getUniquelabelsMG(const raft::handle_t& handle,
+                                 Matrix::PartDescriptor& input_desc,
+                                 std::vector<Matrix::Data<T>*>& labels);
 
 /**
  * @brief performs MNMG fit operation for the logistic regression using quasi newton methods
@@ -55,16 +56,17 @@ std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
  * @param[out] f: host pointer holding the final objective value
  * @param[out] num_iters: host pointer holding the actual number of iterations taken
  */
+template <typename T>
 void qnFit(raft::handle_t& handle,
-           std::vector<Matrix::Data<float>*>& input_data,
+           std::vector<Matrix::Data<T>*>& input_data,
            Matrix::PartDescriptor& input_desc,
-           std::vector<Matrix::Data<float>*>& labels,
-           float* coef,
+           std::vector<Matrix::Data<T>*>& labels,
+           T* coef,
            const qn_params& pams,
            bool X_col_major,
            bool standardization,
            int n_classes,
-           float* f,
+           T* f,
            int* num_iters);
 
 /**
@@ -86,18 +88,19 @@ void qnFit(raft::handle_t& handle,
  * @param[out] f: host pointer holding the final objective value
  * @param[out] num_iters: host pointer holding the actual number of iterations taken
  */
+template <typename T>
 void qnFitSparse(raft::handle_t& handle,
-                 std::vector<Matrix::Data<float>*>& input_values,
+                 std::vector<Matrix::Data<T>*>& input_values,
                  int* input_cols,
                  int* input_row_ids,
                  int X_nnz,
                  Matrix::PartDescriptor& input_desc,
-                 std::vector<Matrix::Data<float>*>& labels,
-                 float* coef,
+                 std::vector<Matrix::Data<T>*>& labels,
+                 T* coef,
                  const qn_params& pams,
                  bool standardization,
                  int n_classes,
-                 float* f,
+                 T* f,
                  int* num_iters);
 
 };  // namespace opg

diff --git a/cpp/src/glm/qn_mg.cu b/cpp/src/glm/qn_mg.cu
@@ -183,42 +183,76 @@ void qnFit_impl(raft::handle_t& handle,
                 input_desc.uniqueRanks().size());
 }
 
-std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
-                                     Matrix::PartDescriptor& input_desc,
-                                     std::vector<Matrix::Data<float>*>& labels)
+template <typename T>
+std::vector<T> getUniquelabelsMG(const raft::handle_t& handle,
+                                 Matrix::PartDescriptor& input_desc,
+                                 std::vector<Matrix::Data<T>*>& labels)
 {
   RAFT_EXPECTS(labels.size() == 1,
                "getUniqueLabelsMG currently does not accept more than one data chunk");
-  Matrix::Data<float>* data_y = labels[0];
-  int n_rows                  = input_desc.totalElementsOwnedBy(input_desc.rank);
-  return distinct_mg<float>(handle, data_y->ptr, n_rows);
+  Matrix::Data<T>* data_y = labels[0];
+  size_t n_rows           = input_desc.totalElementsOwnedBy(input_desc.rank);
+  return distinct_mg<T>(handle, data_y->ptr, n_rows);
 }
 
+template std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
+                                              Matrix::PartDescriptor& input_desc,
+                                              std::vector<Matrix::Data<float>*>& labels);
+
+template std::vector<double> getUniquelabelsMG(const raft::handle_t& handle,
+                                               Matrix::PartDescriptor& input_desc,
+                                               std::vector<Matrix::Data<double>*>& labels);
+
+template <typename T>
 void qnFit(raft::handle_t& handle,
-           std::vector<Matrix::Data<float>*>& input_data,
+           std::vector<Matrix::Data<T>*>& input_data,
            Matrix::PartDescriptor& input_desc,
-           std::vector<Matrix::Data<float>*>& labels,
-           float* coef,
+           std::vector<Matrix::Data<T>*>& labels,
+           T* coef,
            const qn_params& pams,
            bool X_col_major,
            bool standardization,
            int n_classes,
-           float* f,
+           T* f,
            int* num_iters)
 {
-  qnFit_impl<float>(handle,
-                    input_data,
-                    input_desc,
-                    labels,
-                    coef,
-                    pams,
-                    X_col_major,
-                    standardization,
-                    n_classes,
-                    f,
-                    num_iters);
+  qnFit_impl<T>(handle,
+                input_data,
+                input_desc,
+                labels,
+                coef,
+                pams,
+                X_col_major,
+                standardization,
+                n_classes,
+                f,
+                num_iters);
 }
 
+template void qnFit(raft::handle_t& handle,
+                    std::vector<Matrix::Data<float>*>& input_data,
+                    Matrix::PartDescriptor& input_desc,
+                    std::vector<Matrix::Data<float>*>& labels,
+                    float* coef,
+                    const qn_params& pams,
+                    bool X_col_major,
+                    bool standardization,
+                    int n_classes,
+                    float* f,
+                    int* num_iters);
+
+template void qnFit(raft::handle_t& handle,
+                    std::vector<Matrix::Data<double>*>& input_data,
+                    Matrix::PartDescriptor& input_desc,
+                    std::vector<Matrix::Data<double>*>& labels,
+                    double* coef,
+                    const qn_params& pams,
+                    bool X_col_major,
+                    bool standardization,
+                    int n_classes,
+                    double* f,
+                    int* num_iters);
+
 template <typename T, typename I>
 void qnFitSparse_impl(const raft::handle_t& handle,
                       const qn_params& pams,
@@ -269,18 +303,19 @@ void qnFitSparse_impl(const raft::handle_t& handle,
   return;
 }
 
+template <typename T>
 void qnFitSparse(raft::handle_t& handle,
-                 std::vector<Matrix::Data<float>*>& input_values,
+                 std::vector<Matrix::Data<T>*>& input_values,
                  int* input_cols,
                  int* input_row_ids,
                  int X_nnz,
                  Matrix::PartDescriptor& input_desc,
-                 std::vector<Matrix::Data<float>*>& labels,
-                 float* coef,
+                 std::vector<Matrix::Data<T>*>& labels,
+                 T* coef,
                  const qn_params& pams,
                  bool standardization,
                  int n_classes,
-                 float* f,
+                 T* f,
                  int* num_iters)
 {
   RAFT_EXPECTS(input_values.size() == 1,
@@ -289,25 +324,53 @@ void qnFitSparse(raft::handle_t& handle,
   auto data_input_values = input_values[0];
   auto data_y            = labels[0];
 
-  qnFitSparse_impl<float, int>(handle,
-                               pams,
-                               data_input_values->ptr,
-                               input_cols,
-                               input_row_ids,
-                               X_nnz,
-                               standardization,
-                               data_y->ptr,
-                               input_desc.totalElementsOwnedBy(input_desc.rank),
-                               input_desc.N,
-                               n_classes,
-                               coef,
-                               f,
-                               num_iters,
-                               input_desc.M,
-                               input_desc.rank,
-                               input_desc.uniqueRanks().size());
+  qnFitSparse_impl<T, int>(handle,
+                           pams,
+                           data_input_values->ptr,
+                           input_cols,
+                           input_row_ids,
+                           X_nnz,
+                           standardization,
+                           data_y->ptr,
+                           input_desc.totalElementsOwnedBy(input_desc.rank),
+                           input_desc.N,
+                           n_classes,
+                           coef,
+                           f,
+                           num_iters,
+                           input_desc.M,
+                           input_desc.rank,
+                           input_desc.uniqueRanks().size());
 }
 
+template void qnFitSparse(raft::handle_t& handle,
+                          std::vector<Matrix::Data<float>*>& input_values,
+                          int* input_cols,
+                          int* input_row_ids,
+                          int X_nnz,
+                          Matrix::PartDescriptor& input_desc,
+                          std::vector<Matrix::Data<float>*>& labels,
+                          float* coef,
+                          const qn_params& pams,
+                          bool standardization,
+                          int n_classes,
+                          float* f,
+                          int* num_iters);
+
+template void qnFitSparse(raft::handle_t& handle,
+                          std::vector<Matrix::Data<double>*>& input_values,
+                          int* input_cols,
+                          int* input_row_ids,
+                          int X_nnz,
+                          Matrix::PartDescriptor& input_desc,
+                          std::vector<Matrix::Data<double>*>& labels,
+                          double* coef,
+                          const qn_params& pams,
+                          bool standardization,
+                          int n_classes,
+                          double* f,
+                          int* num_iters);
+
 };  // namespace opg
 };  // namespace GLM
 };  // namespace ML
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -553,6 +553,7 @@ class RandomForestClassifier(BaseRandomForestModel,
         domain="cuml_python")
     @insert_into_docstring(parameters=[('dense', '(n_samples, n_features)')],
                            return_values=[('dense', '(n_samples, 1)')])
+    @cuml.internals.api_base_return_array(get_output_dtype=True)
     def predict(self, X, predict_model="GPU", threshold=0.5,
                 algo='auto', convert_dtype=True,
                 fil_sparse_format='auto') -> CumlArray: