From 67271ebe3cbcf172c8c018d9ac44dc95742b0081 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 12 Jan 2022 18:26:14 -0500 Subject: [PATCH 01/16] Removing deprecated (and unused) functions from cusparse_wrappers --- cpp/include/raft/sparse/cusparse_wrappers.h | 807 -------------------- 1 file changed, 807 deletions(-) diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h index e2306686ce..0e93acf8d0 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/cusparse_wrappers.h @@ -240,77 +240,6 @@ inline void cusparsecoosortByRow( // NOLINT } /** @} */ -/** - * @defgroup Gemmi cusparse gemmi operations - * @{ - */ -template -cusparseStatus_t cusparsegemmi( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int k, - int nnz, - const T* alpha, - const T* A, - int lda, - const T* cscValB, - const int* cscColPtrB, - const int* cscRowIndB, - const T* beta, - T* C, - int ldc, - cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, - int m, - int n, - int k, - int nnz, - const float* alpha, - const float* A, - int lda, - const float* cscValB, - const int* cscColPtrB, - const int* cscRowIndB, - const float* beta, - float* C, - int ldc, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseSgemmi( - handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); -#pragma GCC diagnostic pop -} -template <> -inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, - int m, - int n, - int k, - int nnz, - const double* alpha, - const double* A, - int lda, - const double* cscValB, - const int* cscColPtrB, - const int* cscRowIndB, - const double* beta, - double* C, - int ldc, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDgemmi( - handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); -#pragma GCC diagnostic pop -} -/** @} */ - #if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP /** * @defgroup cusparse Create CSR operations @@ -618,202 +547,6 @@ inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, /** @} */ #endif -#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP -/** - * @defgroup Csrmm cusparse csrmm operations - * @{ - */ -template -cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle, - cusparseOperation_t opA, - cusparseOperation_t opB, - const T* alpha, - const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, - const T* beta, - cusparseDnMatDescr_t matC, - cusparseSpMMAlg_t alg, - size_t* bufferSize, - cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle, - cusparseOperation_t opA, - cusparseOperation_t opB, - const float* alpha, - const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, - const float* beta, - cusparseDnMatDescr_t matC, - cusparseSpMMAlg_t alg, - size_t* bufferSize, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseSpMM_bufferSize( - handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_32F, alg, bufferSize); -} -template <> -inline cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle, - cusparseOperation_t opA, - cusparseOperation_t opB, - const double* alpha, - const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, - const double* beta, - cusparseDnMatDescr_t matC, - cusparseSpMMAlg_t alg, - size_t* bufferSize, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseSpMM_bufferSize( - handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_64F, alg, bufferSize); -} -template -inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, - cusparseOperation_t opA, - cusparseOperation_t opB, - const T* alpha, - const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, - const T* beta, - cusparseDnMatDescr_t matC, - cusparseSpMMAlg_t alg, - T* externalBuffer, - cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, - cusparseOperation_t opA, - cusparseOperation_t opB, - const float* alpha, - const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, - const float* beta, - cusparseDnMatDescr_t matC, - cusparseSpMMAlg_t alg, - float* externalBuffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseSpMM( - handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_32F, alg, externalBuffer); -} -template <> -inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, - cusparseOperation_t opA, - cusparseOperation_t opB, - const double* alpha, - const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, - const double* beta, - cusparseDnMatDescr_t matC, - cusparseSpMMAlg_t alg, - double* externalBuffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseSpMM( - handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_64F, alg, externalBuffer); -} -/** @} */ -#else -/** - * @defgroup Csrmm cusparse csrmm operations - * @{ - */ -template -cusparseStatus_t cusparsecsrmm( // NOLINT - cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int k, - int nnz, - const T* alpha, - const cusparseMatDescr_t descr, - const T* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const T* x, - const int ldx, - const T* beta, - T* y, - const int ldy, - cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int k, - int nnz, - const float* alpha, - const cusparseMatDescr_t descr, - const float* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const float* x, - const int ldx, - const float* beta, - float* y, - const int ldy, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseScsrmm( - handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy); -} -template <> -inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int k, - int nnz, - const double* alpha, - const cusparseMatDescr_t descr, - const double* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const double* x, - const int ldx, - const double* beta, - double* y, - const int ldy, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseDcsrmm( - handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy); -} -/** @} */ -#endif - -/** - * @defgroup csr2coo cusparse CSR to COO converter methods - * @{ - */ -template -void cusparsecsr2coo( // NOLINT - cusparseHandle_t handle, - const int n, - const int nnz, - const T* csrRowPtr, - T* cooRowInd, - cudaStream_t stream); -template <> -inline void cusparsecsr2coo(cusparseHandle_t handle, - const int n, - const int nnz, - const int* csrRowPtr, - int* cooRowInd, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - CUSPARSE_CHECK(cusparseXcsr2coo(handle, csrRowPtr, nnz, n, cooRowInd, CUSPARSE_INDEX_BASE_ZERO)); -} -/** @} */ - /** * @defgroup setpointermode cusparse set pointer mode method * @{ @@ -835,212 +568,6 @@ inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle, } /** @} */ -/** - * @defgroup CsrmvEx cusparse csrmvex operations - * @{ - */ -template -cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const T* alpha, - const cusparseMatDescr_t descrA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const T* x, - const T* beta, - T* y, - size_t* bufferSizeInBytes, - cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const float* alpha, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const float* x, - const float* beta, - float* y, - size_t* bufferSizeInBytes, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseCsrmvEx_bufferSize(handle, - alg, - transA, - m, - n, - nnz, - alpha, - CUDA_R_32F, - descrA, - csrValA, - CUDA_R_32F, - csrRowPtrA, - csrColIndA, - x, - CUDA_R_32F, - beta, - CUDA_R_32F, - y, - CUDA_R_32F, - CUDA_R_32F, - bufferSizeInBytes); -} -template <> -inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const double* alpha, - const cusparseMatDescr_t descrA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const double* x, - const double* beta, - double* y, - size_t* bufferSizeInBytes, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseCsrmvEx_bufferSize(handle, - alg, - transA, - m, - n, - nnz, - alpha, - CUDA_R_64F, - descrA, - csrValA, - CUDA_R_64F, - csrRowPtrA, - csrColIndA, - x, - CUDA_R_64F, - beta, - CUDA_R_64F, - y, - CUDA_R_64F, - CUDA_R_64F, - bufferSizeInBytes); -} - -template -cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const T* alpha, - const cusparseMatDescr_t descrA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const T* x, - const T* beta, - T* y, - T* buffer, - cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const float* alpha, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const float* x, - const float* beta, - float* y, - float* buffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseCsrmvEx(handle, - alg, - transA, - m, - n, - nnz, - alpha, - CUDA_R_32F, - descrA, - csrValA, - CUDA_R_32F, - csrRowPtrA, - csrColIndA, - x, - CUDA_R_32F, - beta, - CUDA_R_32F, - y, - CUDA_R_32F, - CUDA_R_32F, - buffer); -} -template <> -inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const double* alpha, - const cusparseMatDescr_t descrA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const double* x, - const double* beta, - double* y, - double* buffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseCsrmvEx(handle, - alg, - transA, - m, - n, - nnz, - alpha, - CUDA_R_64F, - descrA, - csrValA, - CUDA_R_64F, - csrRowPtrA, - csrColIndA, - x, - CUDA_R_64F, - beta, - CUDA_R_64F, - y, - CUDA_R_64F, - CUDA_R_64F, - buffer); -} - -/** @} */ - /** * @defgroup Csr2cscEx2 cusparse csr->csc conversion * @{ @@ -1225,340 +752,6 @@ inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, /** @} */ -/** - * @defgroup csrgemm2 cusparse sparse gemm operations - * @{ - */ - -template -cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, - int m, - int n, - int k, - const T* alpha, - const T* beta, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - csrgemm2Info_t info, - size_t* pBufferSizeInBytes, - cudaStream_t stream); - -template <> -inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, - int m, - int n, - int k, - const float* alpha, - const float* beta, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - csrgemm2Info_t info, - size_t* pBufferSizeInBytes, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseScsrgemm2_bufferSizeExt(handle, - m, - n, - k, - alpha, - matA, - nnzA, - rowindA, - indicesA, - matB, - nnzB, - rowindB, - indicesB, - beta, - matD, - nnzD, - rowindD, - indicesD, - info, - pBufferSizeInBytes); -#pragma GCC diagnostic pop -} - -template <> -inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, - int m, - int n, - int k, - const double* alpha, - const double* beta, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - csrgemm2Info_t info, - size_t* pBufferSizeInBytes, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDcsrgemm2_bufferSizeExt(handle, - m, - n, - k, - alpha, - matA, - nnzA, - rowindA, - indicesA, - matB, - nnzB, - rowindB, - indicesB, - beta, - matD, - nnzD, - rowindD, - indicesD, - info, - pBufferSizeInBytes); -#pragma GCC diagnostic pop -} - -inline cusparseStatus_t cusparsecsrgemm2nnz(cusparseHandle_t handle, - int m, - int n, - int k, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - const cusparseMatDescr_t matC, - int* rowindC, - int* nnzC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseXcsrgemm2Nnz(handle, - m, - n, - k, - matA, - nnzA, - rowindA, - indicesA, - matB, - nnzB, - rowindB, - indicesB, - matD, - nnzD, - rowindD, - indicesD, - matC, - rowindC, - nnzC, - info, - pBuffer); -#pragma GCC diagnostic pop -} - -template -cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, - int m, - int n, - int k, - const T* alpha, - const cusparseMatDescr_t descrA, - int nnzA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const cusparseMatDescr_t descrB, - int nnzB, - const T* csrValB, - const int* csrRowPtrB, - const int* csrColIndB, - const T* beta, - const cusparseMatDescr_t descrD, - int nnzD, - const T* csrValD, - const int* csrRowPtrD, - const int* csrColIndD, - const cusparseMatDescr_t descrC, - T* csrValC, - const int* csrRowPtrC, - int* csrColIndC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream); - -template <> -inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, - int m, - int n, - int k, - const float* alpha, - const cusparseMatDescr_t descrA, - int nnzA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const cusparseMatDescr_t descrB, - int nnzB, - const float* csrValB, - const int* csrRowPtrB, - const int* csrColIndB, - const float* beta, - const cusparseMatDescr_t descrD, - int nnzD, - const float* csrValD, - const int* csrRowPtrD, - const int* csrColIndD, - const cusparseMatDescr_t descrC, - float* csrValC, - const int* csrRowPtrC, - int* csrColIndC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseScsrgemm2(handle, - m, - n, - k, - alpha, - descrA, - nnzA, - csrValA, - csrRowPtrA, - csrColIndA, - descrB, - nnzB, - csrValB, - csrRowPtrB, - csrColIndB, - beta, - descrD, - nnzD, - csrValD, - csrRowPtrD, - csrColIndD, - descrC, - csrValC, - csrRowPtrC, - csrColIndC, - info, - pBuffer); -#pragma GCC diagnostic pop -} - -template <> -inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, - int m, - int n, - int k, - const double* alpha, - const cusparseMatDescr_t descrA, - int nnzA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const cusparseMatDescr_t descrB, - int nnzB, - const double* csrValB, - const int* csrRowPtrB, - const int* csrColIndB, - const double* beta, - const cusparseMatDescr_t descrD, - int nnzD, - const double* csrValD, - const int* csrRowPtrD, - const int* csrColIndD, - const cusparseMatDescr_t descrC, - double* csrValC, - const int* csrRowPtrC, - int* csrColIndC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDcsrgemm2(handle, - m, - n, - k, - alpha, - descrA, - nnzA, - csrValA, - csrRowPtrA, - csrColIndA, - descrB, - nnzB, - csrValB, - csrRowPtrB, - csrColIndB, - beta, - descrD, - nnzD, - csrValD, - csrRowPtrD, - csrColIndD, - descrC, - csrValC, - csrRowPtrC, - csrColIndC, - info, - pBuffer); -#pragma GCC diagnostic pop -} - -/** @} */ - /** * @defgroup csrgemm2 cusparse sparse gemm operations * @{ From 41676d513f5bc272960bc073b3a0d4a64aaf1ee2 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 31 Jan 2022 15:35:29 -0500 Subject: [PATCH 02/16] Updating cusparse wrappers to remove deprecation warnings --- .../raft/sparse/convert/detail/dense.cuh | 9 +- cpp/include/raft/sparse/cusparse_wrappers.h | 1672 +++++++++++++---- 2 files changed, 1306 insertions(+), 375 deletions(-) diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh index 9f48fd2172..d826d5ea00 100644 --- a/cpp/include/raft/sparse/convert/detail/dense.cuh +++ b/cpp/include/raft/sparse/convert/detail/dense.cuh @@ -79,6 +79,7 @@ template void csr_to_dense(cusparseHandle_t handle, value_idx nrows, value_idx ncols, + value_idx nnz, const value_idx* csr_indptr, const value_idx* csr_indices, const value_t* csr_data, @@ -96,8 +97,14 @@ void csr_to_dense(cusparseHandle_t handle, RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(out_mat, CUSPARSE_INDEX_BASE_ZERO)); RAFT_CUSPARSE_TRY(cusparseSetMatType(out_mat, CUSPARSE_MATRIX_TYPE_GENERAL)); + size_t buffer_size; + RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense_buffersize( + handle, nrows, ncols, nnz, out_mat, csr_data, csr_indptr, csr_indices, out, lda, &buffer_size, stream)); + + rmm::device_uvector buffer(buffer_size, stream); + RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense( - handle, nrows, ncols, out_mat, csr_data, csr_indptr, csr_indices, out, lda, stream)); + handle, nrows, ncols, nnz, out_mat, csr_data, csr_indptr, csr_indices, out, lda, buffer.data(), stream)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyMatDescr(out_mat)); diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h index 0e93acf8d0..ba9f40fd7e 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/cusparse_wrappers.h @@ -41,35 +41,35 @@ namespace raft { /** * @brief Exception thrown when a cuSparse error is encountered. */ -struct cusparse_error : public raft::exception { - explicit cusparse_error(char const* const message) : raft::exception(message) {} - explicit cusparse_error(std::string const& message) : raft::exception(message) {} -}; + struct cusparse_error : public raft::exception { + explicit cusparse_error(char const* const message) : raft::exception(message) {} + explicit cusparse_error(std::string const& message) : raft::exception(message) {} + }; -namespace sparse { -namespace detail { + namespace sparse { + namespace detail { -inline const char* cusparse_error_to_string(cusparseStatus_t err) -{ + inline const char* cusparse_error_to_string(cusparseStatus_t err) + { #if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 - return cusparseGetErrorString(err); + return cusparseGetErrorString(err); #else // CUDART_VERSION - switch (err) { - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); - default: return "CUSPARSE_STATUS_UNKNOWN"; - }; + switch (err) { + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); + default: return "CUSPARSE_STATUS_UNKNOWN"; + }; #endif // CUDART_VERSION -} + } -} // namespace detail -} // namespace sparse + } // namespace detail + } // namespace sparse } // namespace raft #undef _CUSPARSE_ERR_TO_STR @@ -124,124 +124,195 @@ inline const char* cusparse_error_to_string(cusparseStatus_t err) #endif namespace raft { -namespace sparse { + namespace sparse { /** * @defgroup gthr cusparse gather methods * @{ */ -template -cusparseStatus_t cusparsegthr( - cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, - int nnz, - const double* vals, - double* vals_sorted, - int* d_P, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + template + cusparseStatus_t cusparsegthr( + cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream); + template <> + inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, + int nnz, + const double* vals, + double* vals_sorted, + int* d_P, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); + return cusparseDgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); #pragma GCC diagnostic pop -} -template <> -inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, - int nnz, - const float* vals, - float* vals_sorted, - int* d_P, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + } + template <> + inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, + int nnz, + const float* vals, + float* vals_sorted, + int* d_P, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseSgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); + return cusparseSgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); #pragma GCC diagnostic pop -} + } /** @} */ /** * @defgroup coo2csr cusparse COO to CSR converter methods * @{ */ -template -void cusparsecoo2csr( - cusparseHandle_t handle, const T* cooRowInd, int nnz, int m, T* csrRowPtr, cudaStream_t stream); -template <> -inline void cusparsecoo2csr(cusparseHandle_t handle, - const int* cooRowInd, - int nnz, - int m, - int* csrRowPtr, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - CUSPARSE_CHECK(cusparseXcoo2csr(handle, cooRowInd, nnz, m, csrRowPtr, CUSPARSE_INDEX_BASE_ZERO)); -} + template + void cusparsecoo2csr( + cusparseHandle_t handle, const T* cooRowInd, int nnz, int m, T* csrRowPtr, cudaStream_t stream); + template <> + inline void cusparsecoo2csr(cusparseHandle_t handle, + const int* cooRowInd, + int nnz, + int m, + int* csrRowPtr, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + CUSPARSE_CHECK(cusparseXcoo2csr(handle, cooRowInd, nnz, m, csrRowPtr, CUSPARSE_INDEX_BASE_ZERO)); + } /** @} */ /** * @defgroup coosort cusparse coo sort methods * @{ */ -template -size_t cusparsecoosort_bufferSizeExt( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int nnz, - const T* cooRows, - const T* cooCols, - cudaStream_t stream); -template <> -inline size_t cusparsecoosort_bufferSizeExt( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int nnz, - const int* cooRows, - const int* cooCols, - cudaStream_t stream) -{ - size_t val; - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - CUSPARSE_CHECK(cusparseXcoosort_bufferSizeExt(handle, m, n, nnz, cooRows, cooCols, &val)); - return val; -} + template + size_t cusparsecoosort_bufferSizeExt( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int nnz, + const T* cooRows, + const T* cooCols, + cudaStream_t stream); + template <> + inline size_t cusparsecoosort_bufferSizeExt( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int nnz, + const int* cooRows, + const int* cooCols, + cudaStream_t stream) + { + size_t val; + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + CUSPARSE_CHECK(cusparseXcoosort_bufferSizeExt(handle, m, n, nnz, cooRows, cooCols, &val)); + return val; + } -template -void cusparsecoosortByRow( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int nnz, - T* cooRows, - T* cooCols, - T* P, - void* pBuffer, - cudaStream_t stream); -template <> -inline void cusparsecoosortByRow( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int nnz, - int* cooRows, - int* cooCols, - int* P, - void* pBuffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - CUSPARSE_CHECK(cusparseXcoosortByRow(handle, m, n, nnz, cooRows, cooCols, P, pBuffer)); -} + template + void cusparsecoosortByRow( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int nnz, + T* cooRows, + T* cooCols, + T* P, + void* pBuffer, + cudaStream_t stream); + template <> + inline void cusparsecoosortByRow( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int nnz, + int* cooRows, + int* cooCols, + int* P, + void* pBuffer, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + CUSPARSE_CHECK(cusparseXcoosortByRow(handle, m, n, nnz, cooRows, cooCols, P, pBuffer)); + } /** @} */ -#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP /** + * @defgroup Gemmi cusparse gemmi operations + * @{ + */ + template + cusparseStatus_t cusparsegemmi( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int k, + int nnz, + const T* alpha, + const T* A, + int lda, + const T* cscValB, + const int* cscColPtrB, + const int* cscRowIndB, + const T* beta, + T* C, + int ldc, + cudaStream_t stream); + template <> + inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, + int m, + int n, + int k, + int nnz, + const float* alpha, + const float* A, + int lda, + const float* cscValB, + const int* cscColPtrB, + const int* cscRowIndB, + const float* beta, + float* C, + int ldc, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return cusparseSgemmi( + handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); +#pragma GCC diagnostic pop + } + template <> + inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, + int m, + int n, + int k, + int nnz, + const double* alpha, + const double* A, + int lda, + const double* cscValB, + const int* cscColPtrB, + const int* cscRowIndB, + const double* beta, + double* C, + int ldc, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return cusparseDgemmi( + handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); +#pragma GCC diagnostic pop + } +/** @} */ + +#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP + /** * @defgroup cusparse Create CSR operations * @{ */ @@ -488,65 +559,261 @@ inline cusparseStatus_t cusparsespmv(cusparseHandle_t handle, * @defgroup Csrmv cusparse csrmv operations * @{ */ + template + cusparseStatus_t cusparsecsrmv( // NOLINT + cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int nnz, + const T* alpha, + const cusparseMatDescr_t descr, + const T* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const T* x, + const T* beta, + T* y, + cudaStream_t stream); + template <> + inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int nnz, + const float* alpha, + const cusparseMatDescr_t descr, + const float* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const float* x, + const float* beta, + float* y, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseScsrmv( + handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y); + } + template <> + inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int nnz, + const double* alpha, + const cusparseMatDescr_t descr, + const double* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const double* x, + const double* beta, + double* y, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseDcsrmv( + handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y); + } +/** @} */ +#endif + +#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP + /** + * @defgroup Csrmm cusparse csrmm operations + * @{ + */ +template +cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparseOperation_t opB, + const T* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnMatDescr_t matB, + const T* beta, + cusparseDnMatDescr_t matC, + cusparseSpMMAlg_t alg, + size_t* bufferSize, + cudaStream_t stream); +template <> +inline cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparseOperation_t opB, + const float* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnMatDescr_t matB, + const float* beta, + cusparseDnMatDescr_t matC, + cusparseSpMMAlg_t alg, + size_t* bufferSize, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseSpMM_bufferSize( + handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_32F, alg, bufferSize); +} +template <> +inline cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparseOperation_t opB, + const double* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnMatDescr_t matB, + const double* beta, + cusparseDnMatDescr_t matC, + cusparseSpMMAlg_t alg, + size_t* bufferSize, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseSpMM_bufferSize( + handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_64F, alg, bufferSize); +} template -cusparseStatus_t cusparsecsrmv( // NOLINT - cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int nnz, - const T* alpha, - const cusparseMatDescr_t descr, - const T* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const T* x, - const T* beta, - T* y, - cudaStream_t stream); +inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparseOperation_t opB, + const T* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnMatDescr_t matB, + const T* beta, + cusparseDnMatDescr_t matC, + cusparseSpMMAlg_t alg, + T* externalBuffer, + cudaStream_t stream); template <> -inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int nnz, - const float* alpha, - const cusparseMatDescr_t descr, - const float* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const float* x, - const float* beta, - float* y, - cudaStream_t stream) +inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparseOperation_t opB, + const float* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnMatDescr_t matB, + const float* beta, + cusparseDnMatDescr_t matC, + cusparseSpMMAlg_t alg, + float* externalBuffer, + cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseScsrmv( - handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y); + return cusparseSpMM( + handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_32F, alg, externalBuffer); } template <> -inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int nnz, - const double* alpha, - const cusparseMatDescr_t descr, - const double* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const double* x, - const double* beta, - double* y, - cudaStream_t stream) +inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparseOperation_t opB, + const double* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnMatDescr_t matB, + const double* beta, + cusparseDnMatDescr_t matC, + cusparseSpMMAlg_t alg, + double* externalBuffer, + cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseDcsrmv( - handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y); + return cusparseSpMM( + handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_64F, alg, externalBuffer); } /** @} */ +#else +/** + * @defgroup Csrmm cusparse csrmm operations + * @{ + */ + template + cusparseStatus_t cusparsecsrmm( // NOLINT + cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int k, + int nnz, + const T* alpha, + const cusparseMatDescr_t descr, + const T* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const T* x, + const int ldx, + const T* beta, + T* y, + const int ldy, + cudaStream_t stream); + template <> + inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int k, + int nnz, + const float* alpha, + const cusparseMatDescr_t descr, + const float* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const float* x, + const int ldx, + const float* beta, + float* y, + const int ldy, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseScsrmm( + handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy); + } + template <> + inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int k, + int nnz, + const double* alpha, + const cusparseMatDescr_t descr, + const double* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const double* x, + const int ldx, + const double* beta, + double* y, + const int ldy, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseDcsrmm( + handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy); + } +/** @} */ #endif +/** + * @defgroup csr2coo cusparse CSR to COO converter methods + * @{ + */ + template + void cusparsecsr2coo( // NOLINT + cusparseHandle_t handle, + const int n, + const int nnz, + const T* csrRowPtr, + T* cooRowInd, + cudaStream_t stream); + template <> + inline void cusparsecsr2coo(cusparseHandle_t handle, + const int n, + const int nnz, + const int* csrRowPtr, + int* cooRowInd, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + CUSPARSE_CHECK(cusparseXcsr2coo(handle, csrRowPtr, nnz, n, cooRowInd, CUSPARSE_INDEX_BASE_ZERO)); + } +/** @} */ + /** * @defgroup setpointermode cusparse set pointer mode method * @{ @@ -559,13 +826,215 @@ inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, // cudaStream_t stream); // template<> -inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle, - cusparsePointerMode_t mode, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseSetPointerMode(handle, mode); -} + inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle, + cusparsePointerMode_t mode, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseSetPointerMode(handle, mode); + } +/** @} */ + +/** + * @defgroup CsrmvEx cusparse csrmvex operations + * @{ + */ + template + cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const T* alpha, + const cusparseMatDescr_t descrA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const T* x, + const T* beta, + T* y, + size_t* bufferSizeInBytes, + cudaStream_t stream); + template <> + inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const float* alpha, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const float* x, + const float* beta, + float* y, + size_t* bufferSizeInBytes, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F); + + cusparseDnVecDescr_t vecX; + cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F); + + cusparseDnVecDescr_t vecY; + cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F); + + cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, alpha, matA, vecX, beta, vecY, + CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, bufferSizeInBytes); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + + return result; + + } + template <> + inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const double* alpha, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const double* x, + const double* beta, + double* y, + size_t* bufferSizeInBytes, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + + cusparseDnVecDescr_t vecX; + cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F); + + cusparseDnVecDescr_t vecY; + cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F); + + cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, alpha, matA, vecX, beta, vecY, + CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, bufferSizeInBytes); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + + return result; + + } + + template + cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const T* alpha, + const cusparseMatDescr_t descrA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const T* x, + const T* beta, + T* y, + T* buffer, + cudaStream_t stream); + template <> + inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const float* alpha, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const float* x, + const float* beta, + float* y, + float* buffer, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F); + + cusparseDnVecDescr_t vecX; + cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F); + + cusparseDnVecDescr_t vecY; + cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F); + + cusparseStatus_t result = cusparseSpMV(handle, transA, alpha, matA, vecX, beta, vecY, + CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + + return result; + } + template <> + inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const double* alpha, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const double* x, + const double* beta, + double* y, + double* buffer, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + + cusparseDnVecDescr_t vecX; + cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F); + + cusparseDnVecDescr_t vecY; + cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F); + + cusparseStatus_t result = cusparseSpMV(handle, transA, alpha, matA, vecX, beta, vecY, + CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + + return result; + } + /** @} */ /** @@ -573,182 +1042,182 @@ inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle, * @{ */ -template -cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const T* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - size_t* bufferSize, - cudaStream_t stream); + template + cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const T* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + size_t* bufferSize, + cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const float* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - size_t* bufferSize, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + template <> + inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const float* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + size_t* bufferSize, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseCsr2cscEx2_bufferSize(handle, - m, - n, - nnz, - csrVal, - csrRowPtr, - csrColInd, - cscVal, - cscColPtr, - cscRowInd, - CUDA_R_32F, - copyValues, - idxBase, - alg, - bufferSize); -} -template <> -inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const double* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - size_t* bufferSize, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseCsr2cscEx2_bufferSize(handle, + m, + n, + nnz, + csrVal, + csrRowPtr, + csrColInd, + cscVal, + cscColPtr, + cscRowInd, + CUDA_R_32F, + copyValues, + idxBase, + alg, + bufferSize); + } + template <> + inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const double* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + size_t* bufferSize, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseCsr2cscEx2_bufferSize(handle, - m, - n, - nnz, - csrVal, - csrRowPtr, - csrColInd, - cscVal, - cscColPtr, - cscRowInd, - CUDA_R_64F, - copyValues, - idxBase, - alg, - bufferSize); -} + return cusparseCsr2cscEx2_bufferSize(handle, + m, + n, + nnz, + csrVal, + csrRowPtr, + csrColInd, + cscVal, + cscColPtr, + cscRowInd, + CUDA_R_64F, + copyValues, + idxBase, + alg, + bufferSize); + } -template -cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, - int m, - int n, - int nnz, - const T* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - void* buffer, - cudaStream_t stream); + template + cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, + int m, + int n, + int nnz, + const T* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + void* buffer, + cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, - int m, - int n, - int nnz, - const float* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - void* buffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + template <> + inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, + int m, + int n, + int nnz, + const float* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + void* buffer, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseCsr2cscEx2(handle, - m, - n, - nnz, - csrVal, - csrRowPtr, - csrColInd, - cscVal, - cscColPtr, - cscRowInd, - CUDA_R_32F, - copyValues, - idxBase, - alg, - buffer); -} + return cusparseCsr2cscEx2(handle, + m, + n, + nnz, + csrVal, + csrRowPtr, + csrColInd, + cscVal, + cscColPtr, + cscRowInd, + CUDA_R_32F, + copyValues, + idxBase, + alg, + buffer); + } -template <> -inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, - int m, - int n, - int nnz, - const double* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - void* buffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + template <> + inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, + int m, + int n, + int nnz, + const double* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + void* buffer, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseCsr2cscEx2(handle, - m, - n, - nnz, - csrVal, - csrRowPtr, - csrColInd, - cscVal, - cscColPtr, - cscRowInd, - CUDA_R_64F, - copyValues, - idxBase, - alg, - buffer); -} + return cusparseCsr2cscEx2(handle, + m, + n, + nnz, + csrVal, + csrRowPtr, + csrColInd, + cscVal, + cscColPtr, + cscRowInd, + CUDA_R_64F, + copyValues, + idxBase, + alg, + buffer); + } /** @} */ @@ -757,50 +1226,505 @@ inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, * @{ */ -template -cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, - int m, - int n, - const cusparseMatDescr_t descrA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - T* A, - int lda, - cudaStream_t stream); + template + cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, + int m, + int n, + int k, + const T* alpha, + const T* beta, + const cusparseMatDescr_t matA, + int nnzA, + const int* rowindA, + const int* indicesA, + const cusparseMatDescr_t matB, + int nnzB, + const int* rowindB, + const int* indicesB, + const cusparseMatDescr_t matD, + int nnzD, + const int* rowindD, + const int* indicesD, + csrgemm2Info_t info, + size_t* pBufferSizeInBytes, + cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, - int m, - int n, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - float* A, - int lda, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseScsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda); -} -template <> -inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, + template <> + inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, + int m, + int n, + int k, + const float* alpha, + const float* beta, + const cusparseMatDescr_t matA, + int nnzA, + const int* rowindA, + const int* indicesA, + const cusparseMatDescr_t matB, + int nnzB, + const int* rowindB, + const int* indicesB, + const cusparseMatDescr_t matD, + int nnzD, + const int* rowindD, + const int* indicesD, + csrgemm2Info_t info, + size_t* pBufferSizeInBytes, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return cusparseScsrgemm2_bufferSizeExt(handle, + m, + n, + k, + alpha, + matA, + nnzA, + rowindA, + indicesA, + matB, + nnzB, + rowindB, + indicesB, + beta, + matD, + nnzD, + rowindD, + indicesD, + info, + pBufferSizeInBytes); +#pragma GCC diagnostic pop + } + + template <> + inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, + int m, + int n, + int k, + const double* alpha, + const double* beta, + const cusparseMatDescr_t matA, + int nnzA, + const int* rowindA, + const int* indicesA, + const cusparseMatDescr_t matB, + int nnzB, + const int* rowindB, + const int* indicesB, + const cusparseMatDescr_t matD, + int nnzD, + const int* rowindD, + const int* indicesD, + csrgemm2Info_t info, + size_t* pBufferSizeInBytes, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return cusparseDcsrgemm2_bufferSizeExt(handle, + m, + n, + k, + alpha, + matA, + nnzA, + rowindA, + indicesA, + matB, + nnzB, + rowindB, + indicesB, + beta, + matD, + nnzD, + rowindD, + indicesD, + info, + pBufferSizeInBytes); +#pragma GCC diagnostic pop + } + + inline cusparseStatus_t cusparsecsrgemm2nnz(cusparseHandle_t handle, + int m, + int n, + int k, + const cusparseMatDescr_t matA, + int nnzA, + const int* rowindA, + const int* indicesA, + const cusparseMatDescr_t matB, + int nnzB, + const int* rowindB, + const int* indicesB, + const cusparseMatDescr_t matD, + int nnzD, + const int* rowindD, + const int* indicesD, + const cusparseMatDescr_t matC, + int* rowindC, + int* nnzC, + const csrgemm2Info_t info, + void* pBuffer, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return cusparseXcsrgemm2Nnz(handle, + m, + n, + k, + matA, + nnzA, + rowindA, + indicesA, + matB, + nnzB, + rowindB, + indicesB, + matD, + nnzD, + rowindD, + indicesD, + matC, + rowindC, + nnzC, + info, + pBuffer); +#pragma GCC diagnostic pop + } + + template + cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, int m, int n, + int k, + const T* alpha, const cusparseMatDescr_t descrA, - const double* csrValA, + int nnzA, + const T* csrValA, const int* csrRowPtrA, const int* csrColIndA, - double* A, - int lda, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseDcsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda); -} + const cusparseMatDescr_t descrB, + int nnzB, + const T* csrValB, + const int* csrRowPtrB, + const int* csrColIndB, + const T* beta, + const cusparseMatDescr_t descrD, + int nnzD, + const T* csrValD, + const int* csrRowPtrD, + const int* csrColIndD, + const cusparseMatDescr_t descrC, + T* csrValC, + const int* csrRowPtrC, + int* csrColIndC, + const csrgemm2Info_t info, + void* pBuffer, + cudaStream_t stream); + + template <> + inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, + int m, + int n, + int k, + const float* alpha, + const cusparseMatDescr_t descrA, + int nnzA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const cusparseMatDescr_t descrB, + int nnzB, + const float* csrValB, + const int* csrRowPtrB, + const int* csrColIndB, + const float* beta, + const cusparseMatDescr_t descrD, + int nnzD, + const float* csrValD, + const int* csrRowPtrD, + const int* csrColIndD, + const cusparseMatDescr_t descrC, + float* csrValC, + const int* csrRowPtrC, + int* csrColIndC, + const csrgemm2Info_t info, + void* pBuffer, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return cusparseScsrgemm2(handle, + m, + n, + k, + alpha, + descrA, + nnzA, + csrValA, + csrRowPtrA, + csrColIndA, + descrB, + nnzB, + csrValB, + csrRowPtrB, + csrColIndB, + beta, + descrD, + nnzD, + csrValD, + csrRowPtrD, + csrColIndD, + descrC, + csrValC, + csrRowPtrC, + csrColIndC, + info, + pBuffer); +#pragma GCC diagnostic pop + } + + template <> + inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, + int m, + int n, + int k, + const double* alpha, + const cusparseMatDescr_t descrA, + int nnzA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const cusparseMatDescr_t descrB, + int nnzB, + const double* csrValB, + const int* csrRowPtrB, + const int* csrColIndB, + const double* beta, + const cusparseMatDescr_t descrD, + int nnzD, + const double* csrValD, + const int* csrRowPtrD, + const int* csrColIndD, + const cusparseMatDescr_t descrC, + double* csrValC, + const int* csrRowPtrC, + int* csrColIndC, + const csrgemm2Info_t info, + void* pBuffer, + cudaStream_t stream) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return cusparseDcsrgemm2(handle, + m, + n, + k, + alpha, + descrA, + nnzA, + csrValA, + csrRowPtrA, + csrColIndA, + descrB, + nnzB, + csrValB, + csrRowPtrB, + csrColIndB, + beta, + descrD, + nnzD, + csrValD, + csrRowPtrD, + csrColIndD, + descrC, + csrValC, + csrRowPtrC, + csrColIndC, + info, + pBuffer); +#pragma GCC diagnostic pop + } /** @} */ -} // namespace sparse -} // namespace raft +/** + * @defgroup csrgemm2 cusparse sparse gemm operations + * @{ + */ + + template + cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + T* A, + int lda, + size_t *buffer_size, + bool row_major = false, + cudaStream_t stream); + + template <> + cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + float* A, + int lda, + size_t *buffer_size, + bool row_major = false, + cudaStream_t stream) { + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F); + + cusparseDnMatDescr_t matB; + cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL); + + cusparseStatus_t result = cusparseSparseToDense_bufferSize(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); + + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + + return result; + + } + + template <> + cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + double* A, + int lda, + size_t *buffer_size, + bool row_major = false, + cudaStream_t stream) { + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + + cusparseDnMatDescr_t matB; + cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL); + + cusparseStatus_t result = cusparseSparseToDense_bufferSize(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + + return result; + + } + + + + template + cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + T* A, + int lda, + void *buffer, + cudaStream_t stream, + bool row_major = false); + + template <> + inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + float* A, + int lda, + void *buffer, + cudaStream_t stream, + bool row_major = false) + { + + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F); + + cusparseDnMatDescr_t matB; + cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL); + + cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + + return result; + } + template <> + inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + double* A, + int lda, + void *buffer, + cudaStream_t stream, + bool row_major = false) + { + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + + cusparseDnMatDescr_t matB; + cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL); + + cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + + return result; + } + +/** @} */ + + } // namespace sparse +} // namespace raft \ No newline at end of file From 5e16bdc7fb04e485adbb4bd82b3c72fbfe86b6c8 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 31 Jan 2022 15:38:48 -0500 Subject: [PATCH 03/16] Separating cusparse macros so the cusparse wrappers don't need to be explicitly compiled w/ the handle --- cpp/include/raft/handle.hpp | 2 +- cpp/include/raft/sparse/cusparse_wrappers.h | 106 +----------------- .../raft/sparse/detail/cusparse_macros.h | 105 +++++++++++++++++ 3 files changed, 107 insertions(+), 106 deletions(-) create mode 100644 cpp/include/raft/sparse/detail/cusparse_macros.h diff --git a/cpp/include/raft/handle.hpp b/cpp/include/raft/handle.hpp index 6421ba5344..8a55df114d 100644 --- a/cpp/include/raft/handle.hpp +++ b/cpp/include/raft/handle.hpp @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h index ba9f40fd7e..edd26748fb 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/cusparse_wrappers.h @@ -17,111 +17,7 @@ #pragma once #include - -#include -///@todo: enable this once logging is enabled -//#include - -#define _CUSPARSE_ERR_TO_STR(err) \ - case err: return #err; - -// Notes: -//(1.) CUDA_VER_10_1_UP aggregates all the CUDA version selection logic; -//(2.) to enforce a lower version, -// -//`#define CUDA_ENFORCE_LOWER -// #include ` -// -// (i.e., before including this header) -// -#define CUDA_VER_10_1_UP (CUDART_VERSION >= 10100) - -namespace raft { - -/** - * @brief Exception thrown when a cuSparse error is encountered. - */ - struct cusparse_error : public raft::exception { - explicit cusparse_error(char const* const message) : raft::exception(message) {} - explicit cusparse_error(std::string const& message) : raft::exception(message) {} - }; - - namespace sparse { - namespace detail { - - inline const char* cusparse_error_to_string(cusparseStatus_t err) - { -#if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 - return cusparseGetErrorString(err); -#else // CUDART_VERSION - switch (err) { - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); - default: return "CUSPARSE_STATUS_UNKNOWN"; - }; -#endif // CUDART_VERSION - } - - } // namespace detail - } // namespace sparse -} // namespace raft - -#undef _CUSPARSE_ERR_TO_STR - -/** - * @brief Error checking macro for cuSparse runtime API functions. - * - * Invokes a cuSparse runtime API function call, if the call does not return - * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred - */ -#define RAFT_CUSPARSE_TRY(call) \ - do { \ - cusparseStatus_t const status = (call); \ - if (CUSPARSE_STATUS_SUCCESS != status) { \ - std::string msg{}; \ - SET_ERROR_MSG(msg, \ - "cuSparse error encountered at: ", \ - "call='%s', Reason=%d:%s", \ - #call, \ - status, \ - raft::sparse::detail::cusparse_error_to_string(status)); \ - throw raft::cusparse_error(msg); \ - } \ - } while (0) - -// FIXME: Remove after consumer rename -#ifndef CUSPARSE_TRY -#define CUSPARSE_TRY(call) RAFT_CUSPARSE_TRY(call) -#endif - -// FIXME: Remove after consumer rename -#ifndef CUSPARSE_CHECK -#define CUSPARSE_CHECK(call) CUSPARSE_TRY(call) -#endif - -//@todo: use logger here once logging is enabled -/** check for cusparse runtime API errors but do not assert */ -#define RAFT_CUSPARSE_TRY_NO_THROW(call) \ - do { \ - cusparseStatus_t err = call; \ - if (err != CUSPARSE_STATUS_SUCCESS) { \ - printf("CUSPARSE call='%s' got errorcode=%d err=%s", \ - #call, \ - err, \ - raft::sparse::detail::cusparse_error_to_string(err)); \ - } \ - } while (0) - -// FIXME: Remove after consumer rename -#ifndef CUSPARSE_CHECK_NO_THROW -#define CUSPARSE_CHECK_NO_THROW(call) RAFT_CUSPARSE_TRY_NO_THROW(call) -#endif +#include namespace raft { namespace sparse { diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h new file mode 100644 index 0000000000..a604cb57f2 --- /dev/null +++ b/cpp/include/raft/sparse/detail/cusparse_macros.h @@ -0,0 +1,105 @@ +#include +#include +///@todo: enable this once logging is enabled +//#include + +#define _CUSPARSE_ERR_TO_STR(err) \ + case err: return #err; + +// Notes: +//(1.) CUDA_VER_10_1_UP aggregates all the CUDA version selection logic; +//(2.) to enforce a lower version, +// +//`#define CUDA_ENFORCE_LOWER +// #include ` +// +// (i.e., before including this header) +// +#define CUDA_VER_10_1_UP (CUDART_VERSION >= 10100) + +namespace raft { + +/** + * @brief Exception thrown when a cuSparse error is encountered. + */ +struct cusparse_error : public raft::exception { + explicit cusparse_error(char const* const message) : raft::exception(message) {} + explicit cusparse_error(std::string const& message) : raft::exception(message) {} +}; + +namespace sparse { +namespace detail { + + inline const char* cusparse_error_to_string(cusparseStatus_t err) + { +#if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 + return cusparseGetErrorString(err); +#else // CUDART_VERSION + switch (err) { + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); + default: return "CUSPARSE_STATUS_UNKNOWN"; + }; +#endif // CUDART_VERSION + } + + } // namespace detail + } // namespace sparse +} // namespace raft + +#undef _CUSPARSE_ERR_TO_STR + +/** + * @brief Error checking macro for cuSparse runtime API functions. + * + * Invokes a cuSparse runtime API function call, if the call does not return + * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred + */ +#define RAFT_CUSPARSE_TRY(call) \ + do { \ + cusparseStatus_t const status = (call); \ + if (CUSPARSE_STATUS_SUCCESS != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "cuSparse error encountered at: ", \ + "call='%s', Reason=%d:%s", \ + #call, \ + status, \ + raft::sparse::detail::cusparse_error_to_string(status)); \ + throw raft::cusparse_error(msg); \ + } \ + } while (0) + +// FIXME: Remove after consumer rename +#ifndef CUSPARSE_TRY +#define CUSPARSE_TRY(call) RAFT_CUSPARSE_TRY(call) +#endif + +// FIXME: Remove after consumer rename +#ifndef CUSPARSE_CHECK +#define CUSPARSE_CHECK(call) CUSPARSE_TRY(call) +#endif + +//@todo: use logger here once logging is enabled +/** check for cusparse runtime API errors but do not assert */ +#define RAFT_CUSPARSE_TRY_NO_THROW(call) \ + do { \ + cusparseStatus_t err = call; \ + if (err != CUSPARSE_STATUS_SUCCESS) { \ + printf("CUSPARSE call='%s' got errorcode=%d err=%s", \ + #call, \ + err, \ + raft::sparse::detail::cusparse_error_to_string(err)); \ + } \ + } while (0) + +// FIXME: Remove after consumer rename +#ifndef CUSPARSE_CHECK_NO_THROW +#define CUSPARSE_CHECK_NO_THROW(call) RAFT_CUSPARSE_TRY_NO_THROW(call) +#endif From 2b2d3632be031b981f2de9fccf7ef91cdebc6480 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 31 Jan 2022 15:44:06 -0500 Subject: [PATCH 04/16] Fixing style --- .../raft/sparse/convert/detail/dense.cuh | 28 +- cpp/include/raft/sparse/cusparse_wrappers.h | 2358 +++++++++-------- .../raft/sparse/detail/cusparse_macros.h | 40 +- 3 files changed, 1262 insertions(+), 1164 deletions(-) diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh index d826d5ea00..3c18a40526 100644 --- a/cpp/include/raft/sparse/convert/detail/dense.cuh +++ b/cpp/include/raft/sparse/convert/detail/dense.cuh @@ -98,13 +98,33 @@ void csr_to_dense(cusparseHandle_t handle, RAFT_CUSPARSE_TRY(cusparseSetMatType(out_mat, CUSPARSE_MATRIX_TYPE_GENERAL)); size_t buffer_size; - RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense_buffersize( - handle, nrows, ncols, nnz, out_mat, csr_data, csr_indptr, csr_indices, out, lda, &buffer_size, stream)); + RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense_buffersize(handle, + nrows, + ncols, + nnz, + out_mat, + csr_data, + csr_indptr, + csr_indices, + out, + lda, + &buffer_size, + stream)); rmm::device_uvector buffer(buffer_size, stream); - RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense( - handle, nrows, ncols, nnz, out_mat, csr_data, csr_indptr, csr_indices, out, lda, buffer.data(), stream)); + RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense(handle, + nrows, + ncols, + nnz, + out_mat, + csr_data, + csr_indptr, + csr_indices, + out, + lda, + buffer.data(), + stream)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyMatDescr(out_mat)); diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h index edd26748fb..a28d218960 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/cusparse_wrappers.h @@ -20,195 +20,195 @@ #include namespace raft { - namespace sparse { +namespace sparse { /** * @defgroup gthr cusparse gather methods * @{ */ - template - cusparseStatus_t cusparsegthr( - cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream); - template <> - inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, - int nnz, - const double* vals, - double* vals_sorted, - int* d_P, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +template +cusparseStatus_t cusparsegthr( + cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream); +template <> +inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, + int nnz, + const double* vals, + double* vals_sorted, + int* d_P, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); + return cusparseDgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); #pragma GCC diagnostic pop - } - template <> - inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, - int nnz, - const float* vals, - float* vals_sorted, - int* d_P, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +} +template <> +inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, + int nnz, + const float* vals, + float* vals_sorted, + int* d_P, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseSgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); + return cusparseSgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); #pragma GCC diagnostic pop - } +} /** @} */ /** * @defgroup coo2csr cusparse COO to CSR converter methods * @{ */ - template - void cusparsecoo2csr( - cusparseHandle_t handle, const T* cooRowInd, int nnz, int m, T* csrRowPtr, cudaStream_t stream); - template <> - inline void cusparsecoo2csr(cusparseHandle_t handle, - const int* cooRowInd, - int nnz, - int m, - int* csrRowPtr, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - CUSPARSE_CHECK(cusparseXcoo2csr(handle, cooRowInd, nnz, m, csrRowPtr, CUSPARSE_INDEX_BASE_ZERO)); - } +template +void cusparsecoo2csr( + cusparseHandle_t handle, const T* cooRowInd, int nnz, int m, T* csrRowPtr, cudaStream_t stream); +template <> +inline void cusparsecoo2csr(cusparseHandle_t handle, + const int* cooRowInd, + int nnz, + int m, + int* csrRowPtr, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + CUSPARSE_CHECK(cusparseXcoo2csr(handle, cooRowInd, nnz, m, csrRowPtr, CUSPARSE_INDEX_BASE_ZERO)); +} /** @} */ /** * @defgroup coosort cusparse coo sort methods * @{ */ - template - size_t cusparsecoosort_bufferSizeExt( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int nnz, - const T* cooRows, - const T* cooCols, - cudaStream_t stream); - template <> - inline size_t cusparsecoosort_bufferSizeExt( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int nnz, - const int* cooRows, - const int* cooCols, - cudaStream_t stream) - { - size_t val; - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - CUSPARSE_CHECK(cusparseXcoosort_bufferSizeExt(handle, m, n, nnz, cooRows, cooCols, &val)); - return val; - } - - template - void cusparsecoosortByRow( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int nnz, - T* cooRows, - T* cooCols, - T* P, - void* pBuffer, - cudaStream_t stream); - template <> - inline void cusparsecoosortByRow( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int nnz, - int* cooRows, - int* cooCols, - int* P, - void* pBuffer, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - CUSPARSE_CHECK(cusparseXcoosortByRow(handle, m, n, nnz, cooRows, cooCols, P, pBuffer)); - } +template +size_t cusparsecoosort_bufferSizeExt( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int nnz, + const T* cooRows, + const T* cooCols, + cudaStream_t stream); +template <> +inline size_t cusparsecoosort_bufferSizeExt( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int nnz, + const int* cooRows, + const int* cooCols, + cudaStream_t stream) +{ + size_t val; + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + CUSPARSE_CHECK(cusparseXcoosort_bufferSizeExt(handle, m, n, nnz, cooRows, cooCols, &val)); + return val; +} + +template +void cusparsecoosortByRow( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int nnz, + T* cooRows, + T* cooCols, + T* P, + void* pBuffer, + cudaStream_t stream); +template <> +inline void cusparsecoosortByRow( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int nnz, + int* cooRows, + int* cooCols, + int* P, + void* pBuffer, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + CUSPARSE_CHECK(cusparseXcoosortByRow(handle, m, n, nnz, cooRows, cooCols, P, pBuffer)); +} /** @} */ /** * @defgroup Gemmi cusparse gemmi operations * @{ */ - template - cusparseStatus_t cusparsegemmi( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int k, - int nnz, - const T* alpha, - const T* A, - int lda, - const T* cscValB, - const int* cscColPtrB, - const int* cscRowIndB, - const T* beta, - T* C, - int ldc, - cudaStream_t stream); - template <> - inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, - int m, - int n, - int k, - int nnz, - const float* alpha, - const float* A, - int lda, - const float* cscValB, - const int* cscColPtrB, - const int* cscRowIndB, - const float* beta, - float* C, - int ldc, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +template +cusparseStatus_t cusparsegemmi( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int k, + int nnz, + const T* alpha, + const T* A, + int lda, + const T* cscValB, + const int* cscColPtrB, + const int* cscRowIndB, + const T* beta, + T* C, + int ldc, + cudaStream_t stream); +template <> +inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, + int m, + int n, + int k, + int nnz, + const float* alpha, + const float* A, + int lda, + const float* cscValB, + const int* cscColPtrB, + const int* cscRowIndB, + const float* beta, + float* C, + int ldc, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseSgemmi( - handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); + return cusparseSgemmi( + handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); #pragma GCC diagnostic pop - } - template <> - inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, - int m, - int n, - int k, - int nnz, - const double* alpha, - const double* A, - int lda, - const double* cscValB, - const int* cscColPtrB, - const int* cscRowIndB, - const double* beta, - double* C, - int ldc, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +} +template <> +inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, + int m, + int n, + int k, + int nnz, + const double* alpha, + const double* A, + int lda, + const double* cscValB, + const int* cscColPtrB, + const int* cscRowIndB, + const double* beta, + double* C, + int ldc, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDgemmi( - handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); + return cusparseDgemmi( + handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); #pragma GCC diagnostic pop - } +} /** @} */ #if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP - /** +/** * @defgroup cusparse Create CSR operations * @{ */ @@ -455,67 +455,67 @@ inline cusparseStatus_t cusparsespmv(cusparseHandle_t handle, * @defgroup Csrmv cusparse csrmv operations * @{ */ - template - cusparseStatus_t cusparsecsrmv( // NOLINT - cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int nnz, - const T* alpha, - const cusparseMatDescr_t descr, - const T* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const T* x, - const T* beta, - T* y, - cudaStream_t stream); - template <> - inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int nnz, - const float* alpha, - const cusparseMatDescr_t descr, - const float* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const float* x, - const float* beta, - float* y, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseScsrmv( - handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y); - } - template <> - inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int nnz, - const double* alpha, - const cusparseMatDescr_t descr, - const double* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const double* x, - const double* beta, - double* y, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseDcsrmv( - handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y); - } +template +cusparseStatus_t cusparsecsrmv( // NOLINT + cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int nnz, + const T* alpha, + const cusparseMatDescr_t descr, + const T* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const T* x, + const T* beta, + T* y, + cudaStream_t stream); +template <> +inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int nnz, + const float* alpha, + const cusparseMatDescr_t descr, + const float* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const float* x, + const float* beta, + float* y, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseScsrmv( + handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y); +} +template <> +inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int nnz, + const double* alpha, + const cusparseMatDescr_t descr, + const double* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const double* x, + const double* beta, + double* y, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseDcsrmv( + handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y); +} /** @} */ #endif #if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP - /** +/** * @defgroup Csrmm cusparse csrmm operations * @{ */ @@ -617,71 +617,71 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, * @defgroup Csrmm cusparse csrmm operations * @{ */ - template - cusparseStatus_t cusparsecsrmm( // NOLINT - cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int k, - int nnz, - const T* alpha, - const cusparseMatDescr_t descr, - const T* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const T* x, - const int ldx, - const T* beta, - T* y, - const int ldy, - cudaStream_t stream); - template <> - inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int k, - int nnz, - const float* alpha, - const cusparseMatDescr_t descr, - const float* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const float* x, - const int ldx, - const float* beta, - float* y, - const int ldy, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseScsrmm( - handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy); - } - template <> - inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle, - cusparseOperation_t trans, - int m, - int n, - int k, - int nnz, - const double* alpha, - const cusparseMatDescr_t descr, - const double* csrVal, - const int* csrRowPtr, - const int* csrColInd, - const double* x, - const int ldx, - const double* beta, - double* y, - const int ldy, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseDcsrmm( - handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy); - } +template +cusparseStatus_t cusparsecsrmm( // NOLINT + cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int k, + int nnz, + const T* alpha, + const cusparseMatDescr_t descr, + const T* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const T* x, + const int ldx, + const T* beta, + T* y, + const int ldy, + cudaStream_t stream); +template <> +inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int k, + int nnz, + const float* alpha, + const cusparseMatDescr_t descr, + const float* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const float* x, + const int ldx, + const float* beta, + float* y, + const int ldy, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseScsrmm( + handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy); +} +template <> +inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle, + cusparseOperation_t trans, + int m, + int n, + int k, + int nnz, + const double* alpha, + const cusparseMatDescr_t descr, + const double* csrVal, + const int* csrRowPtr, + const int* csrColInd, + const double* x, + const int ldx, + const double* beta, + double* y, + const int ldy, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseDcsrmm( + handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy); +} /** @} */ #endif @@ -689,25 +689,25 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, * @defgroup csr2coo cusparse CSR to COO converter methods * @{ */ - template - void cusparsecsr2coo( // NOLINT - cusparseHandle_t handle, - const int n, - const int nnz, - const T* csrRowPtr, - T* cooRowInd, - cudaStream_t stream); - template <> - inline void cusparsecsr2coo(cusparseHandle_t handle, - const int n, - const int nnz, - const int* csrRowPtr, - int* cooRowInd, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - CUSPARSE_CHECK(cusparseXcsr2coo(handle, csrRowPtr, nnz, n, cooRowInd, CUSPARSE_INDEX_BASE_ZERO)); - } +template +void cusparsecsr2coo( // NOLINT + cusparseHandle_t handle, + const int n, + const int nnz, + const T* csrRowPtr, + T* cooRowInd, + cudaStream_t stream); +template <> +inline void cusparsecsr2coo(cusparseHandle_t handle, + const int n, + const int nnz, + const int* csrRowPtr, + int* cooRowInd, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + CUSPARSE_CHECK(cusparseXcsr2coo(handle, csrRowPtr, nnz, n, cooRowInd, CUSPARSE_INDEX_BASE_ZERO)); +} /** @} */ /** @@ -722,214 +722,260 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, // cudaStream_t stream); // template<> - inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle, - cusparsePointerMode_t mode, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseSetPointerMode(handle, mode); - } +inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle, + cusparsePointerMode_t mode, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + return cusparseSetPointerMode(handle, mode); +} /** @} */ /** * @defgroup CsrmvEx cusparse csrmvex operations * @{ */ - template - cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const T* alpha, - const cusparseMatDescr_t descrA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const T* x, - const T* beta, - T* y, - size_t* bufferSizeInBytes, - cudaStream_t stream); - template <> - inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const float* alpha, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const float* x, - const float* beta, - float* y, - size_t* bufferSizeInBytes, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - - cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F); - - cusparseDnVecDescr_t vecX; - cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F); - - cusparseDnVecDescr_t vecY; - cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F); - - cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, alpha, matA, vecX, beta, vecY, - CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, bufferSizeInBytes); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - - return result; - - } - template <> - inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const double* alpha, - const cusparseMatDescr_t descrA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const double* x, - const double* beta, - double* y, - size_t* bufferSizeInBytes, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); - - cusparseDnVecDescr_t vecX; - cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F); - - cusparseDnVecDescr_t vecY; - cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F); - - cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, alpha, matA, vecX, beta, vecY, - CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, bufferSizeInBytes); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - - return result; - - } - - template - cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const T* alpha, - const cusparseMatDescr_t descrA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const T* x, - const T* beta, - T* y, - T* buffer, - cudaStream_t stream); - template <> - inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const float* alpha, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const float* x, - const float* beta, - float* y, - float* buffer, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F); - - cusparseDnVecDescr_t vecX; - cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F); - - cusparseDnVecDescr_t vecY; - cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F); - - cusparseStatus_t result = cusparseSpMV(handle, transA, alpha, matA, vecX, beta, vecY, - CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - - return result; - } - template <> - inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const double* alpha, - const cusparseMatDescr_t descrA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const double* x, - const double* beta, - double* y, - double* buffer, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - - cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); - - cusparseDnVecDescr_t vecX; - cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F); - - cusparseDnVecDescr_t vecY; - cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F); +template +cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const T* alpha, + const cusparseMatDescr_t descrA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const T* x, + const T* beta, + T* y, + size_t* bufferSizeInBytes, + cudaStream_t stream); +template <> +inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const float* alpha, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const float* x, + const float* beta, + float* y, + size_t* bufferSizeInBytes, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - cusparseStatus_t result = cusparseSpMV(handle, transA, alpha, matA, vecX, beta, vecY, - CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, + m, + n, + nnz, + csrRowPtrA, + csrColIndA, + csrValA, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F); + + cusparseDnVecDescr_t vecX; + cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F); + + cusparseDnVecDescr_t vecY; + cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F); + + cusparseStatus_t result = cusparseSpMV_bufferSize(handle, + transA, + alpha, + matA, + vecX, + beta, + vecY, + CUDA_R_32F, + CUSPARSE_SPMV_ALG_DEFAULT, + bufferSizeInBytes); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + + return result; +} +template <> +inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const double* alpha, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const double* x, + const double* beta, + double* y, + size_t* bufferSizeInBytes, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, + m, + n, + nnz, + csrRowPtrA, + csrColIndA, + csrValA, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + + cusparseDnVecDescr_t vecX; + cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F); + + cusparseDnVecDescr_t vecY; + cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F); + + cusparseStatus_t result = cusparseSpMV_bufferSize(handle, + transA, + alpha, + matA, + vecX, + beta, + vecY, + CUDA_R_64F, + CUSPARSE_SPMV_ALG_DEFAULT, + bufferSizeInBytes); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + + return result; +} - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); +template +cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const T* alpha, + const cusparseMatDescr_t descrA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const T* x, + const T* beta, + T* y, + T* buffer, + cudaStream_t stream); +template <> +inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const float* alpha, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const float* x, + const float* beta, + float* y, + float* buffer, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, + m, + n, + nnz, + csrRowPtrA, + csrColIndA, + csrValA, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F); + + cusparseDnVecDescr_t vecX; + cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F); + + cusparseDnVecDescr_t vecY; + cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F); + + cusparseStatus_t result = cusparseSpMV( + handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + + return result; +} +template <> +inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, + cusparseAlgMode_t alg, + cusparseOperation_t transA, + int m, + int n, + int nnz, + const double* alpha, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const double* x, + const double* beta, + double* y, + double* buffer, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return result; - } + cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, + m, + n, + nnz, + csrRowPtrA, + csrColIndA, + csrValA, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + + cusparseDnVecDescr_t vecX; + cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F); + + cusparseDnVecDescr_t vecY; + cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F); + + cusparseStatus_t result = cusparseSpMV( + handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + + return result; +} /** @} */ @@ -938,182 +984,182 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, * @{ */ - template - cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const T* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - size_t* bufferSize, - cudaStream_t stream); - - template <> - inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const float* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - size_t* bufferSize, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - - return cusparseCsr2cscEx2_bufferSize(handle, - m, - n, - nnz, - csrVal, - csrRowPtr, - csrColInd, - cscVal, - cscColPtr, - cscRowInd, - CUDA_R_32F, - copyValues, - idxBase, - alg, - bufferSize); - } - template <> - inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const double* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - size_t* bufferSize, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - - return cusparseCsr2cscEx2_bufferSize(handle, - m, - n, - nnz, - csrVal, - csrRowPtr, - csrColInd, - cscVal, - cscColPtr, - cscRowInd, - CUDA_R_64F, - copyValues, - idxBase, - alg, - bufferSize); - } - - template - cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, - int m, - int n, - int nnz, - const T* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - void* buffer, - cudaStream_t stream); +template +cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const T* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + size_t* bufferSize, + cudaStream_t stream); - template <> - inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, - int m, - int n, - int nnz, - const float* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - void* buffer, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - - return cusparseCsr2cscEx2(handle, - m, - n, - nnz, - csrVal, - csrRowPtr, - csrColInd, - cscVal, - cscColPtr, - cscRowInd, - CUDA_R_32F, - copyValues, - idxBase, - alg, - buffer); - } - - template <> - inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, - int m, - int n, - int nnz, - const double* csrVal, - const int* csrRowPtr, - const int* csrColInd, - void* cscVal, - int* cscColPtr, - int* cscRowInd, - cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, - void* buffer, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - - return cusparseCsr2cscEx2(handle, - m, - n, - nnz, - csrVal, - csrRowPtr, - csrColInd, - cscVal, - cscColPtr, - cscRowInd, - CUDA_R_64F, - copyValues, - idxBase, - alg, - buffer); - } +template <> +inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const float* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + size_t* bufferSize, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + + return cusparseCsr2cscEx2_bufferSize(handle, + m, + n, + nnz, + csrVal, + csrRowPtr, + csrColInd, + cscVal, + cscColPtr, + cscRowInd, + CUDA_R_32F, + copyValues, + idxBase, + alg, + bufferSize); +} +template <> +inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const double* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + size_t* bufferSize, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + + return cusparseCsr2cscEx2_bufferSize(handle, + m, + n, + nnz, + csrVal, + csrRowPtr, + csrColInd, + cscVal, + cscColPtr, + cscRowInd, + CUDA_R_64F, + copyValues, + idxBase, + alg, + bufferSize); +} + +template +cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, + int m, + int n, + int nnz, + const T* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + void* buffer, + cudaStream_t stream); + +template <> +inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, + int m, + int n, + int nnz, + const float* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + void* buffer, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + + return cusparseCsr2cscEx2(handle, + m, + n, + nnz, + csrVal, + csrRowPtr, + csrColInd, + cscVal, + cscColPtr, + cscRowInd, + CUDA_R_32F, + copyValues, + idxBase, + alg, + buffer); +} + +template <> +inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, + int m, + int n, + int nnz, + const double* csrVal, + const int* csrRowPtr, + const int* csrColInd, + void* cscVal, + int* cscColPtr, + int* cscRowInd, + cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, + cusparseCsr2CscAlg_t alg, + void* buffer, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + + return cusparseCsr2cscEx2(handle, + m, + n, + nnz, + csrVal, + csrRowPtr, + csrColInd, + cscVal, + cscColPtr, + cscRowInd, + CUDA_R_64F, + copyValues, + idxBase, + alg, + buffer); +} /** @} */ @@ -1122,332 +1168,332 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, * @{ */ - template - cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, - int m, - int n, - int k, - const T* alpha, - const T* beta, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - csrgemm2Info_t info, - size_t* pBufferSizeInBytes, - cudaStream_t stream); - - template <> - inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, - int m, - int n, - int k, - const float* alpha, - const float* beta, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - csrgemm2Info_t info, - size_t* pBufferSizeInBytes, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +template +cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, + int m, + int n, + int k, + const T* alpha, + const T* beta, + const cusparseMatDescr_t matA, + int nnzA, + const int* rowindA, + const int* indicesA, + const cusparseMatDescr_t matB, + int nnzB, + const int* rowindB, + const int* indicesB, + const cusparseMatDescr_t matD, + int nnzD, + const int* rowindD, + const int* indicesD, + csrgemm2Info_t info, + size_t* pBufferSizeInBytes, + cudaStream_t stream); + +template <> +inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, + int m, + int n, + int k, + const float* alpha, + const float* beta, + const cusparseMatDescr_t matA, + int nnzA, + const int* rowindA, + const int* indicesA, + const cusparseMatDescr_t matB, + int nnzB, + const int* rowindB, + const int* indicesB, + const cusparseMatDescr_t matD, + int nnzD, + const int* rowindD, + const int* indicesD, + csrgemm2Info_t info, + size_t* pBufferSizeInBytes, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseScsrgemm2_bufferSizeExt(handle, - m, - n, - k, - alpha, - matA, - nnzA, - rowindA, - indicesA, - matB, - nnzB, - rowindB, - indicesB, - beta, - matD, - nnzD, - rowindD, - indicesD, - info, - pBufferSizeInBytes); + return cusparseScsrgemm2_bufferSizeExt(handle, + m, + n, + k, + alpha, + matA, + nnzA, + rowindA, + indicesA, + matB, + nnzB, + rowindB, + indicesB, + beta, + matD, + nnzD, + rowindD, + indicesD, + info, + pBufferSizeInBytes); #pragma GCC diagnostic pop - } - - template <> - inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, - int m, - int n, - int k, - const double* alpha, - const double* beta, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - csrgemm2Info_t info, - size_t* pBufferSizeInBytes, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +} + +template <> +inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, + int m, + int n, + int k, + const double* alpha, + const double* beta, + const cusparseMatDescr_t matA, + int nnzA, + const int* rowindA, + const int* indicesA, + const cusparseMatDescr_t matB, + int nnzB, + const int* rowindB, + const int* indicesB, + const cusparseMatDescr_t matD, + int nnzD, + const int* rowindD, + const int* indicesD, + csrgemm2Info_t info, + size_t* pBufferSizeInBytes, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDcsrgemm2_bufferSizeExt(handle, - m, - n, - k, - alpha, - matA, - nnzA, - rowindA, - indicesA, - matB, - nnzB, - rowindB, - indicesB, - beta, - matD, - nnzD, - rowindD, - indicesD, - info, - pBufferSizeInBytes); + return cusparseDcsrgemm2_bufferSizeExt(handle, + m, + n, + k, + alpha, + matA, + nnzA, + rowindA, + indicesA, + matB, + nnzB, + rowindB, + indicesB, + beta, + matD, + nnzD, + rowindD, + indicesD, + info, + pBufferSizeInBytes); #pragma GCC diagnostic pop - } - - inline cusparseStatus_t cusparsecsrgemm2nnz(cusparseHandle_t handle, - int m, - int n, - int k, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - const cusparseMatDescr_t matC, - int* rowindC, - int* nnzC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +} + +inline cusparseStatus_t cusparsecsrgemm2nnz(cusparseHandle_t handle, + int m, + int n, + int k, + const cusparseMatDescr_t matA, + int nnzA, + const int* rowindA, + const int* indicesA, + const cusparseMatDescr_t matB, + int nnzB, + const int* rowindB, + const int* indicesB, + const cusparseMatDescr_t matD, + int nnzD, + const int* rowindD, + const int* indicesD, + const cusparseMatDescr_t matC, + int* rowindC, + int* nnzC, + const csrgemm2Info_t info, + void* pBuffer, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseXcsrgemm2Nnz(handle, - m, - n, - k, - matA, - nnzA, - rowindA, - indicesA, - matB, - nnzB, - rowindB, - indicesB, - matD, - nnzD, - rowindD, - indicesD, - matC, - rowindC, - nnzC, - info, - pBuffer); + return cusparseXcsrgemm2Nnz(handle, + m, + n, + k, + matA, + nnzA, + rowindA, + indicesA, + matB, + nnzB, + rowindB, + indicesB, + matD, + nnzD, + rowindD, + indicesD, + matC, + rowindC, + nnzC, + info, + pBuffer); #pragma GCC diagnostic pop - } +} - template - cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, - int m, - int n, - int k, - const T* alpha, - const cusparseMatDescr_t descrA, - int nnzA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const cusparseMatDescr_t descrB, - int nnzB, - const T* csrValB, - const int* csrRowPtrB, - const int* csrColIndB, - const T* beta, - const cusparseMatDescr_t descrD, - int nnzD, - const T* csrValD, - const int* csrRowPtrD, - const int* csrColIndD, - const cusparseMatDescr_t descrC, - T* csrValC, - const int* csrRowPtrC, - int* csrColIndC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream); - - template <> - inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, - int m, - int n, - int k, - const float* alpha, - const cusparseMatDescr_t descrA, - int nnzA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const cusparseMatDescr_t descrB, - int nnzB, - const float* csrValB, - const int* csrRowPtrB, - const int* csrColIndB, - const float* beta, - const cusparseMatDescr_t descrD, - int nnzD, - const float* csrValD, - const int* csrRowPtrD, - const int* csrColIndD, - const cusparseMatDescr_t descrC, - float* csrValC, - const int* csrRowPtrC, - int* csrColIndC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +template +cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, + int m, + int n, + int k, + const T* alpha, + const cusparseMatDescr_t descrA, + int nnzA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const cusparseMatDescr_t descrB, + int nnzB, + const T* csrValB, + const int* csrRowPtrB, + const int* csrColIndB, + const T* beta, + const cusparseMatDescr_t descrD, + int nnzD, + const T* csrValD, + const int* csrRowPtrD, + const int* csrColIndD, + const cusparseMatDescr_t descrC, + T* csrValC, + const int* csrRowPtrC, + int* csrColIndC, + const csrgemm2Info_t info, + void* pBuffer, + cudaStream_t stream); + +template <> +inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, + int m, + int n, + int k, + const float* alpha, + const cusparseMatDescr_t descrA, + int nnzA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const cusparseMatDescr_t descrB, + int nnzB, + const float* csrValB, + const int* csrRowPtrB, + const int* csrColIndB, + const float* beta, + const cusparseMatDescr_t descrD, + int nnzD, + const float* csrValD, + const int* csrRowPtrD, + const int* csrColIndD, + const cusparseMatDescr_t descrC, + float* csrValC, + const int* csrRowPtrC, + int* csrColIndC, + const csrgemm2Info_t info, + void* pBuffer, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseScsrgemm2(handle, - m, - n, - k, - alpha, - descrA, - nnzA, - csrValA, - csrRowPtrA, - csrColIndA, - descrB, - nnzB, - csrValB, - csrRowPtrB, - csrColIndB, - beta, - descrD, - nnzD, - csrValD, - csrRowPtrD, - csrColIndD, - descrC, - csrValC, - csrRowPtrC, - csrColIndC, - info, - pBuffer); + return cusparseScsrgemm2(handle, + m, + n, + k, + alpha, + descrA, + nnzA, + csrValA, + csrRowPtrA, + csrColIndA, + descrB, + nnzB, + csrValB, + csrRowPtrB, + csrColIndB, + beta, + descrD, + nnzD, + csrValD, + csrRowPtrD, + csrColIndD, + descrC, + csrValC, + csrRowPtrC, + csrColIndC, + info, + pBuffer); #pragma GCC diagnostic pop - } - - template <> - inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, - int m, - int n, - int k, - const double* alpha, - const cusparseMatDescr_t descrA, - int nnzA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const cusparseMatDescr_t descrB, - int nnzB, - const double* csrValB, - const int* csrRowPtrB, - const int* csrColIndB, - const double* beta, - const cusparseMatDescr_t descrD, - int nnzD, - const double* csrValD, - const int* csrRowPtrD, - const int* csrColIndD, - const cusparseMatDescr_t descrC, - double* csrValC, - const int* csrRowPtrC, - int* csrColIndC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +} + +template <> +inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, + int m, + int n, + int k, + const double* alpha, + const cusparseMatDescr_t descrA, + int nnzA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + const cusparseMatDescr_t descrB, + int nnzB, + const double* csrValB, + const int* csrRowPtrB, + const int* csrColIndB, + const double* beta, + const cusparseMatDescr_t descrD, + int nnzD, + const double* csrValD, + const int* csrRowPtrD, + const int* csrColIndD, + const cusparseMatDescr_t descrC, + double* csrValC, + const int* csrRowPtrC, + int* csrColIndC, + const csrgemm2Info_t info, + void* pBuffer, + cudaStream_t stream) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDcsrgemm2(handle, - m, - n, - k, - alpha, - descrA, - nnzA, - csrValA, - csrRowPtrA, - csrColIndA, - descrB, - nnzB, - csrValB, - csrRowPtrB, - csrColIndB, - beta, - descrD, - nnzD, - csrValD, - csrRowPtrD, - csrColIndD, - descrC, - csrValC, - csrRowPtrC, - csrColIndC, - info, - pBuffer); + return cusparseDcsrgemm2(handle, + m, + n, + k, + alpha, + descrA, + nnzA, + csrValA, + csrRowPtrA, + csrColIndA, + descrB, + nnzB, + csrValB, + csrRowPtrB, + csrColIndB, + beta, + descrD, + nnzD, + csrValD, + csrRowPtrD, + csrColIndD, + descrC, + csrValC, + csrRowPtrC, + csrColIndC, + info, + pBuffer); #pragma GCC diagnostic pop - } +} /** @} */ @@ -1456,171 +1502,203 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, * @{ */ - template - cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const cusparseMatDescr_t descrA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - T* A, - int lda, - size_t *buffer_size, - bool row_major = false, - cudaStream_t stream); - - template <> - cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - float* A, - int lda, - size_t *buffer_size, - bool row_major = false, - cudaStream_t stream) { - cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL - cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F); - - cusparseDnMatDescr_t matB; - cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL); - - cusparseStatus_t result = cusparseSparseToDense_bufferSize(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); - - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); - - return result; - - } - - template <> - cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const cusparseMatDescr_t descrA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - double* A, - int lda, - size_t *buffer_size, - bool row_major = false, - cudaStream_t stream) { - cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL - cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); - - cusparseDnMatDescr_t matB; - cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL); - - cusparseStatus_t result = cusparseSparseToDense_bufferSize(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); - - return result; - - } - - - - template - cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, - int m, - int n, - int nnz, - const cusparseMatDescr_t descrA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - T* A, - int lda, - void *buffer, - cudaStream_t stream, - bool row_major = false); - - template <> - inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, - int m, - int n, - int nnz, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - float* A, - int lda, - void *buffer, - cudaStream_t stream, - bool row_major = false) - { - - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - - cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL - cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F); - - cusparseDnMatDescr_t matB; - cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL); - - cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); - - return result; - } - template <> - inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, - int m, - int n, - int nnz, - const cusparseMatDescr_t descrA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - double* A, - int lda, - void *buffer, - cudaStream_t stream, - bool row_major = false) - { - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL - cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); - - cusparseDnMatDescr_t matB; - cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL); - - cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); - - return result; - } +template +cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + T* A, + int lda, + size_t* buffer_size, + bool row_major = false, + cudaStream_t stream); + +template <> +cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + float* A, + int lda, + size_t* buffer_size, + bool row_major = false, + cudaStream_t stream) +{ + cusparseOrder_t order = + row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, + m, + n, + nnz, + csrRowPtrA, + csrColIndA, + csrValA, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F); + + cusparseDnMatDescr_t matB; + cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL); + + cusparseStatus_t result = cusparseSparseToDense_bufferSize( + handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + + return result; +} + +template <> +cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + double* A, + int lda, + size_t* buffer_size, + bool row_major = false, + cudaStream_t stream) +{ + cusparseOrder_t order = + row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, + m, + n, + nnz, + csrRowPtrA, + csrColIndA, + csrValA, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + + cusparseDnMatDescr_t matB; + cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL); + + cusparseStatus_t result = cusparseSparseToDense_bufferSize( + handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + + return result; +} + +template +cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + T* A, + int lda, + void* buffer, + cudaStream_t stream, + bool row_major = false); + +template <> +inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + float* A, + int lda, + void* buffer, + cudaStream_t stream, + bool row_major = false) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + + cusparseOrder_t order = + row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, + m, + n, + nnz, + csrRowPtrA, + csrColIndA, + csrValA, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F); + + cusparseDnMatDescr_t matB; + cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL); + + cusparseStatus_t result = + cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + + return result; +} +template <> +inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + double* A, + int lda, + void* buffer, + cudaStream_t stream, + bool row_major = false) +{ + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + cusparseOrder_t order = + row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA; + cusparseCreateCsr(&matA, + m, + n, + nnz, + csrRowPtrA, + csrColIndA, + csrValA, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + + cusparseDnMatDescr_t matB; + cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL); + + cusparseStatus_t result = + cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + + return result; +} /** @} */ - } // namespace sparse +} // namespace sparse } // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h index a604cb57f2..ecd0abf485 100644 --- a/cpp/include/raft/sparse/detail/cusparse_macros.h +++ b/cpp/include/raft/sparse/detail/cusparse_macros.h @@ -1,5 +1,5 @@ -#include #include +#include ///@todo: enable this once logging is enabled //#include @@ -23,34 +23,34 @@ namespace raft { * @brief Exception thrown when a cuSparse error is encountered. */ struct cusparse_error : public raft::exception { - explicit cusparse_error(char const* const message) : raft::exception(message) {} - explicit cusparse_error(std::string const& message) : raft::exception(message) {} + explicit cusparse_error(char const* const message) : raft::exception(message) {} + explicit cusparse_error(std::string const& message) : raft::exception(message) {} }; namespace sparse { namespace detail { - inline const char* cusparse_error_to_string(cusparseStatus_t err) - { +inline const char* cusparse_error_to_string(cusparseStatus_t err) +{ #if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 - return cusparseGetErrorString(err); + return cusparseGetErrorString(err); #else // CUDART_VERSION - switch (err) { - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); - default: return "CUSPARSE_STATUS_UNKNOWN"; - }; + switch (err) { + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); + default: return "CUSPARSE_STATUS_UNKNOWN"; + }; #endif // CUDART_VERSION - } +} - } // namespace detail - } // namespace sparse +} // namespace detail +} // namespace sparse } // namespace raft #undef _CUSPARSE_ERR_TO_STR From 1c1f3659648cea08e8fede11e5d8008a1ca8bba9 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 31 Jan 2022 16:05:23 -0500 Subject: [PATCH 05/16] Moving cusparse_wrappers behind detail namespace. These shouldn't be exposed --- cpp/include/raft/handle.hpp | 2 +- .../raft/sparse/detail/cusparse_macros.h | 18 +++ .../sparse/{ => detail}/cusparse_wrappers.h | 125 +++++++----------- 3 files changed, 68 insertions(+), 77 deletions(-) rename cpp/include/raft/sparse/{ => detail}/cusparse_wrappers.h (95%) diff --git a/cpp/include/raft/handle.hpp b/cpp/include/raft/handle.hpp index 8a55df114d..1babfaded5 100644 --- a/cpp/include/raft/handle.hpp +++ b/cpp/include/raft/handle.hpp @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h index ecd0abf485..080083006d 100644 --- a/cpp/include/raft/sparse/detail/cusparse_macros.h +++ b/cpp/include/raft/sparse/detail/cusparse_macros.h @@ -1,3 +1,21 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + #include #include ///@todo: enable this once logging is enabled diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h similarity index 95% rename from cpp/include/raft/sparse/cusparse_wrappers.h rename to cpp/include/raft/sparse/detail/cusparse_wrappers.h index a28d218960..02df917f99 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -773,23 +773,19 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, - csrValA, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F); + csrValA); cusparseDnVecDescr_t vecX; - cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F); + cusparsecreatednvec(&vecX, static_cast(n), x); cusparseDnVecDescr_t vecY; - cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F); + cusparsecreatednvec(&vecY, static_cast(n), y); cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, @@ -828,23 +824,19 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, - csrValA, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); + csrValA); cusparseDnVecDescr_t vecX; - cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F); + cusparsecreatednvec(&vecX, static_cast(n), x); cusparseDnVecDescr_t vecY; - cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F); + cusparsecreatednvec(&vecY, static_cast(n), y); cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, @@ -901,23 +893,19 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, - csrValA, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F); + csrValA); cusparseDnVecDescr_t vecX; - cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F); + cusparsecreatednvec(&vecX, static_cast(n), x); cusparseDnVecDescr_t vecY; - cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F); + cusparsecreatednvec(&vecY, static_cast(n), y); cusparseStatus_t result = cusparseSpMV( handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); @@ -949,23 +937,19 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, - csrValA, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); + csrValA); cusparseDnVecDescr_t vecX; - cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F); + cusparsecreatednvec(&vecX, static_cast(n), x); cusparseDnVecDescr_t vecY; - cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F); + cusparsecreatednvec(&vecY, static_cast(n), y); cusparseStatus_t result = cusparseSpMV( handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); @@ -1514,8 +1498,8 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, T* A, int lda, size_t* buffer_size, - bool row_major = false, - cudaStream_t stream); + cudaStream_t stream, + bool row_major = false); template <> cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, @@ -1529,31 +1513,29 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, float* A, int lda, size_t* buffer_size, - bool row_major = false, - cudaStream_t stream) + cudaStream_t stream, + bool row_major) { cusparseOrder_t order = - row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, + row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, - csrValA, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F); + csrValA); cusparseDnMatDescr_t matB; - cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL); + cusparsecreatednmat(&matB, static_cast(m), static_cast(n), static_cast(lda), A, order); cusparseStatus_t result = cusparseSparseToDense_bufferSize( handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); return result; } @@ -1570,31 +1552,28 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, double* A, int lda, size_t* buffer_size, - bool row_major = false, - cudaStream_t stream) + cudaStream_t stream, + bool row_major) { cusparseOrder_t order = - row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, + row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, - csrValA, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); + csrValA); cusparseDnMatDescr_t matB; - cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL); + cusparsecreatednmat(&matB, static_cast(m), static_cast(n), static_cast(lda), A, order); cusparseStatus_t result = cusparseSparseToDense_bufferSize( handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); return result; } @@ -1627,32 +1606,29 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, int lda, void* buffer, cudaStream_t stream, - bool row_major = false) + bool row_major) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseOrder_t order = - row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, + row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, - csrValA, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F); + csrValA); cusparseDnMatDescr_t matB; - cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL); + cusparsecreatednmat(&matB, static_cast(m), static_cast(n), static_cast(lda), A, order); cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); return result; } @@ -1669,31 +1645,28 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, int lda, void* buffer, cudaStream_t stream, - bool row_major = false) + bool row_major ) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseOrder_t order = - row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA; - cusparseCreateCsr(&matA, + row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, - csrValA, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); + csrValA); cusparseDnMatDescr_t matB; - cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL); + cusparsecreatednmat(&matB, static_cast(m), static_cast(n), static_cast(lda), A, order); cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); return result; } From ebd40325d6c6354315d224de5a653c0235ece314 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 31 Jan 2022 16:41:04 -0500 Subject: [PATCH 06/16] Moving cusparse wrappers to detail and adjusting all includes --- cpp/doxygen/Doxyfile.in | 2 +- cpp/include/raft/sparse/convert/detail/coo.cuh | 2 +- cpp/include/raft/sparse/convert/detail/csr.cuh | 2 +- cpp/include/raft/sparse/convert/detail/dense.cuh | 2 +- cpp/include/raft/sparse/detail/csr.cuh | 2 +- cpp/include/raft/sparse/detail/cusparse_macros.h | 2 +- cpp/include/raft/sparse/detail/cusparse_wrappers.h | 1 + cpp/include/raft/sparse/distance/detail/bin_distance.cuh | 2 +- cpp/include/raft/sparse/distance/detail/coo_spmv.cuh | 2 +- cpp/include/raft/sparse/distance/detail/ip_distance.cuh | 2 +- cpp/include/raft/sparse/distance/detail/l2_distance.cuh | 2 +- cpp/include/raft/sparse/distance/detail/lp_distance.cuh | 2 +- cpp/include/raft/sparse/distance/distance.hpp | 2 +- cpp/include/raft/sparse/linalg/detail/add.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/norm.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/spectral.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/symmetrize.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/transpose.h | 2 +- cpp/include/raft/sparse/op/detail/filter.cuh | 2 +- cpp/include/raft/sparse/op/detail/reduce.cuh | 2 +- cpp/include/raft/sparse/op/detail/row_op.cuh | 2 +- cpp/include/raft/sparse/op/detail/slice.h | 2 +- cpp/include/raft/sparse/op/detail/sort.h | 2 +- cpp/include/raft/spectral/matrix_wrappers.hpp | 2 +- cpp/test/sparse/csr_row_slice.cu | 2 +- cpp/test/sparse/csr_to_dense.cu | 2 +- cpp/test/sparse/csr_transpose.cu | 2 +- cpp/test/sparse/dist_coo_spmv.cu | 2 +- cpp/test/sparse/distance.cu | 2 +- cpp/test/sparse/knn.cu | 4 ++-- 30 files changed, 31 insertions(+), 30 deletions(-) diff --git a/cpp/doxygen/Doxyfile.in b/cpp/doxygen/Doxyfile.in index eb27b2d02c..c83224050e 100644 --- a/cpp/doxygen/Doxyfile.in +++ b/cpp/doxygen/Doxyfile.in @@ -815,7 +815,7 @@ RECURSIVE = YES EXCLUDE = @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/linalg/symmetrize.hpp \ # Contains device code @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/csr.hpp \ # Contains device code - @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/cusparse_wrappers.h + @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/detail/cusparse_wrappers.h # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded diff --git a/cpp/include/raft/sparse/convert/detail/coo.cuh b/cpp/include/raft/sparse/convert/detail/coo.cuh index fd300dcdba..c37087789c 100644 --- a/cpp/include/raft/sparse/convert/detail/coo.cuh +++ b/cpp/include/raft/sparse/convert/detail/coo.cuh @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh index 0f4dc4976c..b787a9d588 100644 --- a/cpp/include/raft/sparse/convert/detail/csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/csr.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh index 3c18a40526..0f3a4fb5e5 100644 --- a/cpp/include/raft/sparse/convert/detail/dense.cuh +++ b/cpp/include/raft/sparse/convert/detail/dense.cuh @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/detail/csr.cuh b/cpp/include/raft/sparse/detail/csr.cuh index cb39f34ba4..a256ac402b 100644 --- a/cpp/include/raft/sparse/detail/csr.cuh +++ b/cpp/include/raft/sparse/detail/csr.cuh @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h index 080083006d..7e89014b61 100644 --- a/cpp/include/raft/sparse/detail/cusparse_macros.h +++ b/cpp/include/raft/sparse/detail/cusparse_macros.h @@ -29,7 +29,7 @@ //(2.) to enforce a lower version, // //`#define CUDA_ENFORCE_LOWER -// #include ` +// #include ` // // (i.e., before including this header) // diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index 02df917f99..922425221f 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -16,6 +16,7 @@ #pragma once +#include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh index 07bf251f14..5ba48cf9ca 100644 --- a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh index c23a2b1537..046b65a0f0 100644 --- a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh +++ b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include "../../csr.hpp" #include "../../detail/utils.h" diff --git a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh index 00054a8e96..1c5c51b654 100644 --- a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh index 7f63a7fec8..edbc50cc2b 100644 --- a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh index 1e907c98eb..142a0d6093 100644 --- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include diff --git a/cpp/include/raft/sparse/distance/distance.hpp b/cpp/include/raft/sparse/distance/distance.hpp index 2f121dce33..428883c3df 100644 --- a/cpp/include/raft/sparse/distance/distance.hpp +++ b/cpp/include/raft/sparse/distance/distance.hpp @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/add.cuh b/cpp/include/raft/sparse/linalg/detail/add.cuh index 769c7e523f..b288d0a603 100644 --- a/cpp/include/raft/sparse/linalg/detail/add.cuh +++ b/cpp/include/raft/sparse/linalg/detail/add.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index f4b4f65f7e..b7420a55e7 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index de62f25ffa..782ce2b909 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index 045f0e14bc..4384f2ba55 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/transpose.h b/cpp/include/raft/sparse/linalg/detail/transpose.h index be74a72817..699c67544e 100644 --- a/cpp/include/raft/sparse/linalg/detail/transpose.h +++ b/cpp/include/raft/sparse/linalg/detail/transpose.h @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index 6e5d518619..80a6584251 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/reduce.cuh b/cpp/include/raft/sparse/op/detail/reduce.cuh index 074a139ba9..697e9cce7f 100644 --- a/cpp/include/raft/sparse/op/detail/reduce.cuh +++ b/cpp/include/raft/sparse/op/detail/reduce.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/row_op.cuh b/cpp/include/raft/sparse/op/detail/row_op.cuh index b8803d4926..4754f753d4 100644 --- a/cpp/include/raft/sparse/op/detail/row_op.cuh +++ b/cpp/include/raft/sparse/op/detail/row_op.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/slice.h b/cpp/include/raft/sparse/op/detail/slice.h index 3c47d19a0b..662ba55f96 100644 --- a/cpp/include/raft/sparse/op/detail/slice.h +++ b/cpp/include/raft/sparse/op/detail/slice.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h index 94feda1e76..9fc7cac5e3 100644 --- a/cpp/include/raft/sparse/op/detail/sort.h +++ b/cpp/include/raft/sparse/op/detail/sort.h @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp index a260e75505..bd1866a4f0 100644 --- a/cpp/include/raft/spectral/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/matrix_wrappers.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/test/sparse/csr_row_slice.cu b/cpp/test/sparse/csr_row_slice.cu index e37827d18d..cdcc2fdd7f 100644 --- a/cpp/test/sparse/csr_row_slice.cu +++ b/cpp/test/sparse/csr_row_slice.cu @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/test/sparse/csr_to_dense.cu b/cpp/test/sparse/csr_to_dense.cu index 8dec9492bb..f3291f3bb9 100644 --- a/cpp/test/sparse/csr_to_dense.cu +++ b/cpp/test/sparse/csr_to_dense.cu @@ -20,7 +20,7 @@ #include #include -#include +#include #include diff --git a/cpp/test/sparse/csr_transpose.cu b/cpp/test/sparse/csr_transpose.cu index b1a432422e..e6df4cada2 100644 --- a/cpp/test/sparse/csr_transpose.cu +++ b/cpp/test/sparse/csr_transpose.cu @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include "../test_utils.h" diff --git a/cpp/test/sparse/dist_coo_spmv.cu b/cpp/test/sparse/dist_coo_spmv.cu index dc136d6f18..67a61438d1 100644 --- a/cpp/test/sparse/dist_coo_spmv.cu +++ b/cpp/test/sparse/dist_coo_spmv.cu @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/test/sparse/distance.cu b/cpp/test/sparse/distance.cu index f4f346561c..a9781567ab 100644 --- a/cpp/test/sparse/distance.cu +++ b/cpp/test/sparse/distance.cu @@ -20,7 +20,7 @@ #include #include -#include +#include #include diff --git a/cpp/test/sparse/knn.cu b/cpp/test/sparse/knn.cu index bcfa796931..8da9d1e443 100644 --- a/cpp/test/sparse/knn.cu +++ b/cpp/test/sparse/knn.cu @@ -19,11 +19,11 @@ #include "../test_utils.h" #include -#include +#include #include #include -#include +#include namespace raft { namespace sparse { From 47d5176d769422d85483854fb08813c478df371b Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 31 Jan 2022 16:41:51 -0500 Subject: [PATCH 07/16] Updating style --- .../raft/sparse/detail/cusparse_wrappers.h | 110 +++++------------- 1 file changed, 31 insertions(+), 79 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index 922425221f..5a512fea56 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -774,19 +774,13 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - csrRowPtrA, - csrColIndA, - csrValA); + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), x); + cusparsecreatednvec(&vecX, static_cast(n), x); cusparseDnVecDescr_t vecY; - cusparsecreatednvec(&vecY, static_cast(n), y); + cusparsecreatednvec(&vecY, static_cast(n), y); cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, @@ -825,19 +819,13 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - csrRowPtrA, - csrColIndA, - csrValA); + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), x); + cusparsecreatednvec(&vecX, static_cast(n), x); cusparseDnVecDescr_t vecY; - cusparsecreatednvec(&vecY, static_cast(n), y); + cusparsecreatednvec(&vecY, static_cast(n), y); cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, @@ -894,19 +882,13 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - csrRowPtrA, - csrColIndA, - csrValA); + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), x); + cusparsecreatednvec(&vecX, static_cast(n), x); cusparseDnVecDescr_t vecY; - cusparsecreatednvec(&vecY, static_cast(n), y); + cusparsecreatednvec(&vecY, static_cast(n), y); cusparseStatus_t result = cusparseSpMV( handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); @@ -938,16 +920,10 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - csrRowPtrA, - csrColIndA, - csrValA); + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), x); + cusparsecreatednvec(&vecX, static_cast(n), x); cusparseDnVecDescr_t vecY; cusparsecreatednvec(&vecY, static_cast(n), y); @@ -1517,20 +1493,14 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, cudaStream_t stream, bool row_major) { - cusparseOrder_t order = - row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - csrRowPtrA, - csrColIndA, - csrValA); + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); cusparseDnMatDescr_t matB; - cusparsecreatednmat(&matB, static_cast(m), static_cast(n), static_cast(lda), A, order); + cusparsecreatednmat( + &matB, static_cast(m), static_cast(n), static_cast(lda), A, order); cusparseStatus_t result = cusparseSparseToDense_bufferSize( handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); @@ -1556,25 +1526,19 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, cudaStream_t stream, bool row_major) { - cusparseOrder_t order = - row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - csrRowPtrA, - csrColIndA, - csrValA); + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); cusparseDnMatDescr_t matB; - cusparsecreatednmat(&matB, static_cast(m), static_cast(n), static_cast(lda), A, order); + cusparsecreatednmat( + &matB, static_cast(m), static_cast(n), static_cast(lda), A, order); cusparseStatus_t result = cusparseSparseToDense_bufferSize( handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); return result; } @@ -1611,25 +1575,19 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - cusparseOrder_t order = - row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - csrRowPtrA, - csrColIndA, - csrValA); + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); cusparseDnMatDescr_t matB; - cusparsecreatednmat(&matB, static_cast(m), static_cast(n), static_cast(lda), A, order); + cusparsecreatednmat( + &matB, static_cast(m), static_cast(n), static_cast(lda), A, order); cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); return result; } @@ -1646,28 +1604,22 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, int lda, void* buffer, cudaStream_t stream, - bool row_major ) + bool row_major) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - cusparseOrder_t order = - row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - csrRowPtrA, - csrColIndA, - csrValA); + cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); cusparseDnMatDescr_t matB; - cusparsecreatednmat(&matB, static_cast(m), static_cast(n), static_cast(lda), A, order); + cusparsecreatednmat( + &matB, static_cast(m), static_cast(n), static_cast(lda), A, order); cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); return result; } From d9e8eec15dec156a0c762e6b60321a4a0e86287a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 31 Jan 2022 17:55:30 -0500 Subject: [PATCH 08/16] Fixing definitions. --- cpp/include/raft/sparse/convert/dense.hpp | 3 +- .../raft/sparse/convert/detail/dense.cuh | 1 + .../raft/sparse/detail/cusparse_wrappers.h | 156 ++++++++++++------ cpp/test/sparse/csr_to_dense.cu | 3 + 4 files changed, 116 insertions(+), 47 deletions(-) diff --git a/cpp/include/raft/sparse/convert/dense.hpp b/cpp/include/raft/sparse/convert/dense.hpp index c8d3b46d03..ac8598033d 100644 --- a/cpp/include/raft/sparse/convert/dense.hpp +++ b/cpp/include/raft/sparse/convert/dense.hpp @@ -44,6 +44,7 @@ template void csr_to_dense(cusparseHandle_t handle, value_idx nrows, value_idx ncols, + value_idx nnz, const value_idx* csr_indptr, const value_idx* csr_indices, const value_t* csr_data, @@ -53,7 +54,7 @@ void csr_to_dense(cusparseHandle_t handle, bool row_major = true) { detail::csr_to_dense( - handle, nrows, ncols, csr_indptr, csr_indices, csr_data, lda, out, stream, row_major); + handle, nrows, ncols, nnz, csr_indptr, csr_indices, csr_data, lda, out, stream, row_major); } }; // end NAMESPACE convert diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh index 0f3a4fb5e5..15f2f8518e 100644 --- a/cpp/include/raft/sparse/convert/detail/dense.cuh +++ b/cpp/include/raft/sparse/convert/detail/dense.cuh @@ -31,6 +31,7 @@ #include #include +#include namespace raft { namespace sparse { diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index 5a512fea56..e76fba945e 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -774,10 +774,16 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), x); + cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); cusparseDnVecDescr_t vecY; cusparsecreatednvec(&vecY, static_cast(n), y); @@ -819,10 +825,16 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), x); + cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); cusparseDnVecDescr_t vecY; cusparsecreatednvec(&vecY, static_cast(n), y); @@ -882,10 +894,16 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), x); + cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); cusparseDnVecDescr_t vecY; cusparsecreatednvec(&vecY, static_cast(n), y); @@ -920,10 +938,16 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), x); + cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); cusparseDnVecDescr_t vecY; cusparsecreatednvec(&vecY, static_cast(n), y); @@ -1479,28 +1503,38 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, bool row_major = false); template <> -cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - float* A, - int lda, - size_t* buffer_size, - cudaStream_t stream, - bool row_major) +inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + float* A, + int lda, + size_t* buffer_size, + cudaStream_t stream, + bool row_major) { cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); cusparseDnMatDescr_t matB; - cusparsecreatednmat( - &matB, static_cast(m), static_cast(n), static_cast(lda), A, order); + cusparsecreatednmat(&matB, + static_cast(m), + static_cast(n), + static_cast(lda), + const_cast(A), + order); cusparseStatus_t result = cusparseSparseToDense_bufferSize( handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); @@ -1512,27 +1546,37 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, } template <> -cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, - int m, - int n, - int nnz, - const cusparseMatDescr_t descrA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - double* A, - int lda, - size_t* buffer_size, - cudaStream_t stream, - bool row_major) +inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + double* A, + int lda, + size_t* buffer_size, + cudaStream_t stream, + bool row_major) { cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); cusparseDnMatDescr_t matB; - cusparsecreatednmat( - &matB, static_cast(m), static_cast(n), static_cast(lda), A, order); + cusparsecreatednmat(&matB, + static_cast(m), + static_cast(n), + static_cast(lda), + const_cast(A), + order); cusparseStatus_t result = cusparseSparseToDense_bufferSize( handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); @@ -1577,11 +1621,21 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); cusparseDnMatDescr_t matB; - cusparsecreatednmat( - &matB, static_cast(m), static_cast(n), static_cast(lda), A, order); + cusparsecreatednmat(&matB, + static_cast(m), + static_cast(n), + static_cast(lda), + const_cast(A), + order); cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); @@ -1609,11 +1663,21 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, CUSPARSE_CHECK(cusparseSetStream(handle, stream)); cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA); + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); cusparseDnMatDescr_t matB; - cusparsecreatednmat( - &matB, static_cast(m), static_cast(n), static_cast(lda), A, order); + cusparsecreatednmat(&matB, + static_cast(m), + static_cast(n), + static_cast(lda), + const_cast(A), + order); cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); diff --git a/cpp/test/sparse/csr_to_dense.cu b/cpp/test/sparse/csr_to_dense.cu index f3291f3bb9..60447e3a81 100644 --- a/cpp/test/sparse/csr_to_dense.cu +++ b/cpp/test/sparse/csr_to_dense.cu @@ -36,6 +36,7 @@ template struct CSRToDenseInputs { value_idx nrows; value_idx ncols; + value_idx nnz; std::vector indptr_h; std::vector indices_h; @@ -95,6 +96,7 @@ class CSRToDenseTest : public ::testing::TestWithParam> inputs_i32_f = { {4, 4, + 8, {0, 2, 4, 6, 8}, {0, 1, 2, 3, 0, 1, 2, 3}, // indices {1.0f, 3.0f, 1.0f, 5.0f, 50.0f, 28.0f, 16.0f, 2.0f}, From 9a30ac561adbdf50c03f97d8e81c26f1a4c3a3cf Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 31 Jan 2022 18:11:23 -0500 Subject: [PATCH 09/16] Changing cusparse include --- cpp/include/raft/sparse/detail/cusparse_macros.h | 2 +- cpp/include/raft/sparse/detail/cusparse_wrappers.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h index 7e89014b61..1f9f0e5175 100644 --- a/cpp/include/raft/sparse/detail/cusparse_macros.h +++ b/cpp/include/raft/sparse/detail/cusparse_macros.h @@ -16,7 +16,7 @@ #pragma once -#include +#include #include ///@todo: enable this once logging is enabled //#include diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index e76fba945e..c1b6934bee 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include From 8dd3564301414547e14ca9872ea85c73a5408ce4 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 31 Jan 2022 19:10:43 -0500 Subject: [PATCH 10/16] Adding doxygen for missing param --- cpp/include/raft/sparse/convert/detail/dense.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh index 15f2f8518e..bc6828d471 100644 --- a/cpp/include/raft/sparse/convert/detail/dense.cuh +++ b/cpp/include/raft/sparse/convert/detail/dense.cuh @@ -68,6 +68,7 @@ __global__ void csr_to_dense_warp_per_row_kernel( * @param[in] handle : cusparse handle for conversion * @param[in] nrows : number of rows in CSR * @param[in] ncols : number of columns in CSR + * @param[in] nnz : the number of nonzeros in CSR * @param[in] csr_indptr : CSR row index pointer array * @param[in] csr_indices : CSR column indices array * @param[in] csr_data : CSR data array From 75c095cf0e4b1516e1c289fa270041296ed67c98 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 1 Feb 2022 10:32:38 -0500 Subject: [PATCH 11/16] Fixing cusparse API compatibility for CUDA versions < 11.2 --- .../raft/sparse/detail/cusparse_wrappers.h | 131 +++++++++++++++++- 1 file changed, 127 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index c1b6934bee..febe1c0e78 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -773,6 +773,7 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#if CUDART_VERSION >= 11020 cusparseSpMatDescr_t matA; cusparsecreatecsr(&matA, m, @@ -802,8 +803,32 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - return result; + +#else + + return cusparseCsrmvEx_bufferSize(handle, + alg, + transA, + m, + n, + nnz, + alpha, + CUDA_R_32F, + descrA, + csrValA, + CUDA_R_32F, + csrRowPtrA, + csrColIndA, + x, + CUDA_R_32F, + beta, + CUDA_R_32F, + y, + CUDA_R_32F, + CUDA_R_32F, + bufferSizeInBytes); +#endif } template <> inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, @@ -824,6 +849,8 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + +#if CUDART_VERSION >= 11020 cusparseSpMatDescr_t matA; cusparsecreatecsr(&matA, m, @@ -853,8 +880,30 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - return result; +#else + return cusparseCsrmvEx_bufferSize(handle, + alg, + transA, + m, + n, + nnz, + alpha, + CUDA_R_64F, + descrA, + csrValA, + CUDA_R_64F, + csrRowPtrA, + csrColIndA, + x, + CUDA_R_64F, + beta, + CUDA_R_64F, + y, + CUDA_R_64F, + CUDA_R_64F, + bufferSizeInBytes); +#endif } template @@ -893,6 +942,8 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + +#if CUDART_VERSION >= 11020 cusparseSpMatDescr_t matA; cusparsecreatecsr(&matA, m, @@ -914,8 +965,30 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - return result; +#else + return cusparseCsrmvEx(handle, + alg, + transA, + m, + n, + nnz, + alpha, + CUDA_R_32F, + descrA, + csrValA, + CUDA_R_32F, + csrRowPtrA, + csrColIndA, + x, + CUDA_R_32F, + beta, + CUDA_R_32F, + y, + CUDA_R_32F, + CUDA_R_32F, + buffer); +#endif } template <> inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, @@ -937,6 +1010,7 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#if CUDART_VERSION >= 11020 cusparseSpMatDescr_t matA; cusparsecreatecsr(&matA, m, @@ -958,8 +1032,32 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - return result; + +#else + + return cusparseCsrmvEx(handle, + alg, + transA, + m, + n, + nnz, + alpha, + CUDA_R_64F, + descrA, + csrValA, + CUDA_R_64F, + csrRowPtrA, + csrColIndA, + x, + CUDA_R_64F, + beta, + CUDA_R_64F, + y, + CUDA_R_64F, + CUDA_R_64F, + buffer); +#endif } /** @} */ @@ -1517,6 +1615,7 @@ inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, cudaStream_t stream, bool row_major) { +#if CUDART_VERSION >= 11020 cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; @@ -1542,6 +1641,12 @@ inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); +#else + + cusparseStatus_t result = CUSPARSE_STATUS_SUCCESS; + buffer_size[0] = 0; + +#endif return result; } @@ -1560,6 +1665,7 @@ inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, cudaStream_t stream, bool row_major) { +#if CUDART_VERSION >= 11020 cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; cusparsecreatecsr(&matA, @@ -1584,6 +1690,12 @@ inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); +#else + cusparseStatus_t result = CUSPARSE_STATUS_SUCCESS; + buffer_size[0] = 0; + +#endif + return result; } @@ -1619,6 +1731,7 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); +#if CUDART_VERSION >= 11020 cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; cusparsecreatecsr(&matA, @@ -1643,6 +1756,10 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); +#else + return cusparseScsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda); +#endif + return result; } template <> @@ -1661,6 +1778,8 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, bool row_major) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + +#if CUDART_VERSION >= 11020 cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseSpMatDescr_t matA; cusparsecreatecsr(&matA, @@ -1684,6 +1803,10 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); +#else + + return cusparseDcsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda); +#endif return result; } From 0ce2d3a6285b78bc7886a132e65e935b2783b7af Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 1 Feb 2022 10:33:49 -0500 Subject: [PATCH 12/16] Fixing doc --- cpp/include/raft/sparse/convert/dense.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/raft/sparse/convert/dense.hpp b/cpp/include/raft/sparse/convert/dense.hpp index ac8598033d..e28cfb4d2c 100644 --- a/cpp/include/raft/sparse/convert/dense.hpp +++ b/cpp/include/raft/sparse/convert/dense.hpp @@ -32,6 +32,7 @@ namespace convert { * @param[in] handle : cusparse handle for conversion * @param[in] nrows : number of rows in CSR * @param[in] ncols : number of columns in CSR + * @param[in] nnz : number of nonzeros in CSR * @param[in] csr_indptr : CSR row index pointer array * @param[in] csr_indices : CSR column indices array * @param[in] csr_data : CSR data array From 65cfd0401e08da7c29479881ca35467f08eb462e Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 1 Feb 2022 10:37:03 -0500 Subject: [PATCH 13/16] Fixing doc and style --- cpp/include/raft/sparse/convert/dense.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/sparse/convert/dense.hpp b/cpp/include/raft/sparse/convert/dense.hpp index e28cfb4d2c..2570d7ae65 100644 --- a/cpp/include/raft/sparse/convert/dense.hpp +++ b/cpp/include/raft/sparse/convert/dense.hpp @@ -32,7 +32,7 @@ namespace convert { * @param[in] handle : cusparse handle for conversion * @param[in] nrows : number of rows in CSR * @param[in] ncols : number of columns in CSR - * @param[in] nnz : number of nonzeros in CSR + * @param[in] nnz : number of nonzeros in CSR * @param[in] csr_indptr : CSR row index pointer array * @param[in] csr_indices : CSR column indices array * @param[in] csr_data : CSR data array From 213b314b125efd76e8a059497e136011e00165d1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 1 Feb 2022 11:15:43 -0500 Subject: [PATCH 14/16] Fixing bad return --- cpp/include/raft/sparse/detail/cusparse_wrappers.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index febe1c0e78..8a1bf24dd2 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -1756,11 +1756,10 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); + return result; #else return cusparseScsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda); #endif - - return result; } template <> inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, @@ -1803,12 +1802,12 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); + + return result; #else return cusparseDcsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda); #endif - - return result; } /** @} */ From a0aa3f6ff0da80763476fa0a29b4e432c98b3e6d Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 1 Feb 2022 12:14:44 -0500 Subject: [PATCH 15/16] Moving cusparse wrappers to detail namespace --- .../raft/sparse/convert/detail/csr.cuh | 9 +-- .../raft/sparse/convert/detail/dense.cuh | 48 +++++++-------- .../raft/sparse/detail/cusparse_wrappers.h | 2 + .../raft/sparse/linalg/detail/transpose.h | 60 +++++++++---------- cpp/include/raft/spectral/matrix_wrappers.hpp | 53 ++++++++-------- 5 files changed, 88 insertions(+), 84 deletions(-) diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh index b787a9d588..751335dfca 100644 --- a/cpp/include/raft/sparse/convert/detail/csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/csr.cuh @@ -61,15 +61,16 @@ void coo_to_csr(const raft::handle_t& handle, cudaMemcpyAsync(dstRows.data(), srcRows, sizeof(int) * nnz, cudaMemcpyDeviceToDevice, stream)); RAFT_CUDA_TRY( cudaMemcpyAsync(dstCols, srcCols, sizeof(int) * nnz, cudaMemcpyDeviceToDevice, stream)); - auto buffSize = raft::sparse::cusparsecoosort_bufferSizeExt( + auto buffSize = raft::sparse::detail::cusparsecoosort_bufferSizeExt( cusparseHandle, m, m, nnz, srcRows, srcCols, stream); rmm::device_uvector pBuffer(buffSize, stream); rmm::device_uvector P(nnz, stream); RAFT_CUSPARSE_TRY(cusparseCreateIdentityPermutation(cusparseHandle, nnz, P.data())); - raft::sparse::cusparsecoosortByRow( + raft::sparse::detail::cusparsecoosortByRow( cusparseHandle, m, m, nnz, dstRows.data(), dstCols, P.data(), pBuffer.data(), stream); - raft::sparse::cusparsegthr(cusparseHandle, nnz, srcVals, dstVals, P.data(), stream); - raft::sparse::cusparsecoo2csr(cusparseHandle, dstRows.data(), nnz, m, dst_offsets, stream); + raft::sparse::detail::cusparsegthr(cusparseHandle, nnz, srcVals, dstVals, P.data(), stream); + raft::sparse::detail::cusparsecoo2csr( + cusparseHandle, dstRows.data(), nnz, m, dst_offsets, stream); RAFT_CUDA_TRY(cudaDeviceSynchronize()); } diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh index bc6828d471..b2756b81c9 100644 --- a/cpp/include/raft/sparse/convert/detail/dense.cuh +++ b/cpp/include/raft/sparse/convert/detail/dense.cuh @@ -100,33 +100,33 @@ void csr_to_dense(cusparseHandle_t handle, RAFT_CUSPARSE_TRY(cusparseSetMatType(out_mat, CUSPARSE_MATRIX_TYPE_GENERAL)); size_t buffer_size; - RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense_buffersize(handle, - nrows, - ncols, - nnz, - out_mat, - csr_data, - csr_indptr, - csr_indices, - out, - lda, - &buffer_size, - stream)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2dense_buffersize(handle, + nrows, + ncols, + nnz, + out_mat, + csr_data, + csr_indptr, + csr_indices, + out, + lda, + &buffer_size, + stream)); rmm::device_uvector buffer(buffer_size, stream); - RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense(handle, - nrows, - ncols, - nnz, - out_mat, - csr_data, - csr_indptr, - csr_indices, - out, - lda, - buffer.data(), - stream)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2dense(handle, + nrows, + ncols, + nnz, + out_mat, + csr_data, + csr_indptr, + csr_indices, + out, + lda, + buffer.data(), + stream)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyMatDescr(out_mat)); diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index 8a1bf24dd2..aef3976294 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -22,6 +22,7 @@ namespace raft { namespace sparse { +namespace detail { /** * @defgroup gthr cusparse gather methods @@ -1812,5 +1813,6 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, /** @} */ +} // namespace detail } // namespace sparse } // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/detail/transpose.h b/cpp/include/raft/sparse/linalg/detail/transpose.h index 699c67544e..398877eaab 100644 --- a/cpp/include/raft/sparse/linalg/detail/transpose.h +++ b/cpp/include/raft/sparse/linalg/detail/transpose.h @@ -70,39 +70,39 @@ void csr_transpose(cusparseHandle_t handle, { size_t convert_csc_workspace_size = 0; - RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2csc_bufferSize(handle, - csr_nrows, - csr_ncols, - nnz, - csr_data, - csr_indptr, - csr_indices, - csc_data, - csc_indptr, - csc_indices, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - &convert_csc_workspace_size, - stream)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2csc_bufferSize(handle, + csr_nrows, + csr_ncols, + nnz, + csr_data, + csr_indptr, + csr_indices, + csc_data, + csc_indptr, + csc_indices, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + &convert_csc_workspace_size, + stream)); rmm::device_uvector convert_csc_workspace(convert_csc_workspace_size, stream); - RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2csc(handle, - csr_nrows, - csr_ncols, - nnz, - csr_data, - csr_indptr, - csr_indices, - csc_data, - csc_indptr, - csc_indices, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - convert_csc_workspace.data(), - stream)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2csc(handle, + csr_nrows, + csr_ncols, + nnz, + csr_data, + csr_indptr, + csr_indices, + csc_data, + csc_indptr, + csc_indices, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + convert_csc_workspace.data(), + stream)); } }; // end NAMESPACE detail diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp index bd1866a4f0..378cebcb4a 100644 --- a/cpp/include/raft/spectral/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/matrix_wrappers.hpp @@ -208,24 +208,24 @@ struct sparse_matrix_t { // void*; the casts should be harmless) // cusparseSpMatDescr_t matA; - RAFT_CUSPARSE_TRY(cusparsecreatecsr(&matA, - nrows_, - ncols_, - nnz_, - const_cast(row_offsets_), - const_cast(col_indices_), - const_cast(values_))); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&matA, + nrows_, + ncols_, + nnz_, + const_cast(row_offsets_), + const_cast(col_indices_), + const_cast(values_))); cusparseDnVecDescr_t vecX; - RAFT_CUSPARSE_TRY(cusparsecreatednvec(&vecX, size_x, x)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecX, size_x, x)); cusparseDnVecDescr_t vecY; - RAFT_CUSPARSE_TRY(cusparsecreatednvec(&vecY, size_y, y)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecY, size_y, y)); // get (scratch) external device buffer size: // size_t bufferSize; - RAFT_CUSPARSE_TRY(cusparsespmv_buffersize( + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv_buffersize( cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, &bufferSize, stream)); // allocate external buffer: @@ -234,7 +234,7 @@ struct sparse_matrix_t { // finally perform SpMV: // - RAFT_CUSPARSE_TRY(cusparsespmv( + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv( cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, external_buffer.raw(), stream)); // free descriptors: @@ -244,7 +244,8 @@ struct sparse_matrix_t { RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecX)); RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matA)); #else - RAFT_CUSPARSE_TRY(cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream)); + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream)); cusparseMatDescr_t descr = 0; RAFT_CUSPARSE_TRY(cusparseCreateMatDescr(&descr)); if (symmetric) { @@ -253,20 +254,20 @@ struct sparse_matrix_t { RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)); } RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)); - RAFT_CUSPARSE_TRY(cusparsecsrmv(cusparse_h, - trans, - nrows_, - ncols_, - nnz_, - &alpha, - descr, - values_, - row_offsets_, - col_indices_, - x, - &beta, - y, - stream)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsrmv(cusparse_h, + trans, + nrows_, + ncols_, + nnz_, + &alpha, + descr, + values_, + row_offsets_, + col_indices_, + x, + &beta, + y, + stream)); RAFT_CUSPARSE_TRY(cusparseDestroyMatDescr(descr)); #endif } From 16474fbaaa77cca7c8c35491433cfb8f5b25b046 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 8 Feb 2022 11:32:59 -0500 Subject: [PATCH 16/16] Fixing merge --- cpp/include/raft/handle.hpp | 8 +------- .../raft/sparse/distance/detail/bin_distance.cuh | 3 +-- .../raft/sparse/distance/detail/ip_distance.cuh | 5 ++--- .../raft/sparse/distance/detail/lp_distance.cuh | 4 +--- cpp/include/raft/sparse/distance/distance.hpp | 14 +------------- cpp/include/raft/sparse/op/detail/slice.h | 2 +- cpp/include/raft/spectral/matrix_wrappers.hpp | 2 +- cpp/test/sparse/dist_coo_spmv.cu | 6 ++---- cpp/test/sparse/distance.cu | 2 +- cpp/test/sparse/knn.cu | 2 -- 10 files changed, 11 insertions(+), 37 deletions(-) diff --git a/cpp/include/raft/handle.hpp b/cpp/include/raft/handle.hpp index 1e866ea1d5..7d6a5bfafd 100644 --- a/cpp/include/raft/handle.hpp +++ b/cpp/include/raft/handle.hpp @@ -35,16 +35,10 @@ #include "cudart_utils.h" #include -<<<<<<< HEAD -#include -#include -#include -======= #include #include #include -#include ->>>>>>> rapidsai/branch-22.04 +#include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh index 1d302f2fc7..124fa2285d 100644 --- a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh @@ -20,8 +20,7 @@ #include #include -#include -#include +#include #include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh index be5f2405c9..6e717e9920 100644 --- a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh @@ -19,11 +19,10 @@ #include #include #include -#include +#include #include -#include -#include +#include #include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh index c63bcecee1..de9049ced7 100644 --- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh @@ -20,9 +20,7 @@ #include #include -#include -#include - +#include #include #include diff --git a/cpp/include/raft/sparse/distance/distance.hpp b/cpp/include/raft/sparse/distance/distance.hpp index 028b8f8531..dc9837ab43 100644 --- a/cpp/include/raft/sparse/distance/distance.hpp +++ b/cpp/include/raft/sparse/distance/distance.hpp @@ -16,28 +16,16 @@ #pragma once -#include +#include #include -#include #include -#include -#include - -#include -#include -#include -#include -#include -#include #include #include #include #include -#include - namespace raft { namespace sparse { namespace distance { diff --git a/cpp/include/raft/sparse/op/detail/slice.h b/cpp/include/raft/sparse/op/detail/slice.h index c1a2fb7121..e3c0f09e14 100644 --- a/cpp/include/raft/sparse/op/detail/slice.h +++ b/cpp/include/raft/sparse/op/detail/slice.h @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp index cd33d9c28e..d86dc21135 100644 --- a/cpp/include/raft/spectral/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/matrix_wrappers.hpp @@ -17,8 +17,8 @@ #include #include -#include #include +#include #include #include diff --git a/cpp/test/sparse/dist_coo_spmv.cu b/cpp/test/sparse/dist_coo_spmv.cu index 778295bda6..e2288daed9 100644 --- a/cpp/test/sparse/dist_coo_spmv.cu +++ b/cpp/test/sparse/dist_coo_spmv.cu @@ -16,12 +16,10 @@ #include -#include - #include -#include -#include +#include #include +#include #include #include diff --git a/cpp/test/sparse/distance.cu b/cpp/test/sparse/distance.cu index 4a3c4133f9..7c61f2ed1c 100644 --- a/cpp/test/sparse/distance.cu +++ b/cpp/test/sparse/distance.cu @@ -19,8 +19,8 @@ #include #include -#include #include +#include #include diff --git a/cpp/test/sparse/knn.cu b/cpp/test/sparse/knn.cu index a480d1be6a..5a066c2c28 100644 --- a/cpp/test/sparse/knn.cu +++ b/cpp/test/sparse/knn.cu @@ -18,12 +18,10 @@ #include #include "../test_utils.h" -#include #include #include #include -#include namespace raft { namespace sparse {