Skip to content

Commit

Permalink
Merge branch 'branch-24.06' into upgrade-sklearn-1.4
Browse files Browse the repository at this point in the history
  • Loading branch information
betatim committed May 24, 2024
2 parents 9ac5425 + 47416d7 commit 04ac30c
Show file tree
Hide file tree
Showing 9 changed files with 348 additions and 136 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
with:
enable_check_generated_files: false
ignored_pr_jobs: >-
conda-python-tests-cudf-pandas-integration
optional-job-conda-python-tests-cudf-pandas-integration
clang-tidy:
needs: checks
secrets: inherit
Expand Down Expand Up @@ -77,11 +77,12 @@ jobs:
with:
build_type: pull-request
script: "ci/test_python_singlegpu.sh"
conda-python-tests-cudf-pandas-integration:
optional-job-conda-python-tests-cudf-pandas-integration:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06
with:
matrix_filter: map(select(.ARCH == "amd64"))
build_type: pull-request
script: "ci/test_python_integration.sh"
conda-python-tests-dask:
Expand Down
25 changes: 14 additions & 11 deletions cpp/include/cuml/linear_model/qn_mg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ namespace opg {
* @param[in] labels: labels data
* @returns host vector that stores the distinct labels
*/
std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels);
template <typename T>
std::vector<T> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<T>*>& labels);

/**
* @brief performs MNMG fit operation for the logistic regression using quasi newton methods
Expand All @@ -55,16 +56,17 @@ std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
* @param[out] f: host pointer holding the final objective value
* @param[out] num_iters: host pointer holding the actual number of iterations taken
*/
template <typename T>
void qnFit(raft::handle_t& handle,
std::vector<Matrix::Data<float>*>& input_data,
std::vector<Matrix::Data<T>*>& input_data,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels,
float* coef,
std::vector<Matrix::Data<T>*>& labels,
T* coef,
const qn_params& pams,
bool X_col_major,
bool standardization,
int n_classes,
float* f,
T* f,
int* num_iters);

/**
Expand All @@ -86,18 +88,19 @@ void qnFit(raft::handle_t& handle,
* @param[out] f: host pointer holding the final objective value
* @param[out] num_iters: host pointer holding the actual number of iterations taken
*/
template <typename T>
void qnFitSparse(raft::handle_t& handle,
std::vector<Matrix::Data<float>*>& input_values,
std::vector<Matrix::Data<T>*>& input_values,
int* input_cols,
int* input_row_ids,
int X_nnz,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels,
float* coef,
std::vector<Matrix::Data<T>*>& labels,
T* coef,
const qn_params& pams,
bool standardization,
int n_classes,
float* f,
T* f,
int* num_iters);

}; // namespace opg
Expand Down
147 changes: 105 additions & 42 deletions cpp/src/glm/qn_mg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -183,42 +183,76 @@ void qnFit_impl(raft::handle_t& handle,
input_desc.uniqueRanks().size());
}

std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels)
template <typename T>
std::vector<T> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<T>*>& labels)
{
RAFT_EXPECTS(labels.size() == 1,
"getUniqueLabelsMG currently does not accept more than one data chunk");
Matrix::Data<float>* data_y = labels[0];
int n_rows = input_desc.totalElementsOwnedBy(input_desc.rank);
return distinct_mg<float>(handle, data_y->ptr, n_rows);
Matrix::Data<T>* data_y = labels[0];
size_t n_rows = input_desc.totalElementsOwnedBy(input_desc.rank);
return distinct_mg<T>(handle, data_y->ptr, n_rows);
}

template std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels);

template std::vector<double> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<double>*>& labels);

template <typename T>
void qnFit(raft::handle_t& handle,
std::vector<Matrix::Data<float>*>& input_data,
std::vector<Matrix::Data<T>*>& input_data,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels,
float* coef,
std::vector<Matrix::Data<T>*>& labels,
T* coef,
const qn_params& pams,
bool X_col_major,
bool standardization,
int n_classes,
float* f,
T* f,
int* num_iters)
{
qnFit_impl<float>(handle,
input_data,
input_desc,
labels,
coef,
pams,
X_col_major,
standardization,
n_classes,
f,
num_iters);
qnFit_impl<T>(handle,
input_data,
input_desc,
labels,
coef,
pams,
X_col_major,
standardization,
n_classes,
f,
num_iters);
}

template void qnFit(raft::handle_t& handle,
std::vector<Matrix::Data<float>*>& input_data,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels,
float* coef,
const qn_params& pams,
bool X_col_major,
bool standardization,
int n_classes,
float* f,
int* num_iters);

template void qnFit(raft::handle_t& handle,
std::vector<Matrix::Data<double>*>& input_data,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<double>*>& labels,
double* coef,
const qn_params& pams,
bool X_col_major,
bool standardization,
int n_classes,
double* f,
int* num_iters);

template <typename T, typename I>
void qnFitSparse_impl(const raft::handle_t& handle,
const qn_params& pams,
Expand Down Expand Up @@ -269,18 +303,19 @@ void qnFitSparse_impl(const raft::handle_t& handle,
return;
}

template <typename T>
void qnFitSparse(raft::handle_t& handle,
std::vector<Matrix::Data<float>*>& input_values,
std::vector<Matrix::Data<T>*>& input_values,
int* input_cols,
int* input_row_ids,
int X_nnz,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels,
float* coef,
std::vector<Matrix::Data<T>*>& labels,
T* coef,
const qn_params& pams,
bool standardization,
int n_classes,
float* f,
T* f,
int* num_iters)
{
RAFT_EXPECTS(input_values.size() == 1,
Expand All @@ -289,25 +324,53 @@ void qnFitSparse(raft::handle_t& handle,
auto data_input_values = input_values[0];
auto data_y = labels[0];

qnFitSparse_impl<float, int>(handle,
pams,
data_input_values->ptr,
input_cols,
input_row_ids,
X_nnz,
standardization,
data_y->ptr,
input_desc.totalElementsOwnedBy(input_desc.rank),
input_desc.N,
n_classes,
coef,
f,
num_iters,
input_desc.M,
input_desc.rank,
input_desc.uniqueRanks().size());
qnFitSparse_impl<T, int>(handle,
pams,
data_input_values->ptr,
input_cols,
input_row_ids,
X_nnz,
standardization,
data_y->ptr,
input_desc.totalElementsOwnedBy(input_desc.rank),
input_desc.N,
n_classes,
coef,
f,
num_iters,
input_desc.M,
input_desc.rank,
input_desc.uniqueRanks().size());
}

template void qnFitSparse(raft::handle_t& handle,
std::vector<Matrix::Data<float>*>& input_values,
int* input_cols,
int* input_row_ids,
int X_nnz,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels,
float* coef,
const qn_params& pams,
bool standardization,
int n_classes,
float* f,
int* num_iters);

template void qnFitSparse(raft::handle_t& handle,
std::vector<Matrix::Data<double>*>& input_values,
int* input_cols,
int* input_row_ids,
int X_nnz,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<double>*>& labels,
double* coef,
const qn_params& pams,
bool standardization,
int n_classes,
double* f,
int* num_iters);

}; // namespace opg
}; // namespace GLM
}; // namespace ML
1 change: 1 addition & 0 deletions python/cuml/ensemble/randomforestclassifier.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ class RandomForestClassifier(BaseRandomForestModel,
domain="cuml_python")
@insert_into_docstring(parameters=[('dense', '(n_samples, n_features)')],
return_values=[('dense', '(n_samples, 1)')])
@cuml.internals.api_base_return_array(get_output_dtype=True)
def predict(self, X, predict_model="GPU", threshold=0.5,
algo='auto', convert_dtype=True,
fil_sparse_format='auto') -> CumlArray:
Expand Down
Loading

0 comments on commit 04ac30c

Please sign in to comment.