From 67271ebe3cbcf172c8c018d9ac44dc95742b0081 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 12 Jan 2022 18:26:14 -0500
Subject: [PATCH 01/16] Removing deprecated (and unused) functions from
 cusparse_wrappers

---
 cpp/include/raft/sparse/cusparse_wrappers.h | 807 --------------------
 1 file changed, 807 deletions(-)
diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h
index e2306686ce..0e93acf8d0 100644
--- a/cpp/include/raft/sparse/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/cusparse_wrappers.h
@@ -240,77 +240,6 @@ inline void cusparsecoosortByRow(  // NOLINT
 }
 /** @} */
 
-/**
- * @defgroup Gemmi cusparse gemmi operations
- * @{
- */
-template <typename T>
-cusparseStatus_t cusparsegemmi(  // NOLINT
-  cusparseHandle_t handle,
-  int m,
-  int n,
-  int k,
-  int nnz,
-  const T* alpha,
-  const T* A,
-  int lda,
-  const T* cscValB,
-  const int* cscColPtrB,
-  const int* cscRowIndB,
-  const T* beta,
-  T* C,
-  int ldc,
-  cudaStream_t stream);
-template <>
-inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle,
-                                      int m,
-                                      int n,
-                                      int k,
-                                      int nnz,
-                                      const float* alpha,
-                                      const float* A,
-                                      int lda,
-                                      const float* cscValB,
-                                      const int* cscColPtrB,
-                                      const int* cscRowIndB,
-                                      const float* beta,
-                                      float* C,
-                                      int ldc,
-                                      cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  return cusparseSgemmi(
-    handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc);
-#pragma GCC diagnostic pop
-}
-template <>
-inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle,
-                                      int m,
-                                      int n,
-                                      int k,
-                                      int nnz,
-                                      const double* alpha,
-                                      const double* A,
-                                      int lda,
-                                      const double* cscValB,
-                                      const int* cscColPtrB,
-                                      const int* cscRowIndB,
-                                      const double* beta,
-                                      double* C,
-                                      int ldc,
-                                      cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  return cusparseDgemmi(
-    handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc);
-#pragma GCC diagnostic pop
-}
-/** @} */
-
 #if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
 /**
  * @defgroup cusparse Create CSR operations
@@ -618,202 +547,6 @@ inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
 /** @} */
 #endif
 
-#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
-/**
- * @defgroup Csrmm cusparse csrmm operations
- * @{
- */
-template <typename T>
-cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle,
-                                         cusparseOperation_t opA,
-                                         cusparseOperation_t opB,
-                                         const T* alpha,
-                                         const cusparseSpMatDescr_t matA,
-                                         const cusparseDnMatDescr_t matB,
-                                         const T* beta,
-                                         cusparseDnMatDescr_t matC,
-                                         cusparseSpMMAlg_t alg,
-                                         size_t* bufferSize,
-                                         cudaStream_t stream);
-template <>
-inline cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle,
-                                                cusparseOperation_t opA,
-                                                cusparseOperation_t opB,
-                                                const float* alpha,
-                                                const cusparseSpMatDescr_t matA,
-                                                const cusparseDnMatDescr_t matB,
-                                                const float* beta,
-                                                cusparseDnMatDescr_t matC,
-                                                cusparseSpMMAlg_t alg,
-                                                size_t* bufferSize,
-                                                cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseSpMM_bufferSize(
-    handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_32F, alg, bufferSize);
-}
-template <>
-inline cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle,
-                                                cusparseOperation_t opA,
-                                                cusparseOperation_t opB,
-                                                const double* alpha,
-                                                const cusparseSpMatDescr_t matA,
-                                                const cusparseDnMatDescr_t matB,
-                                                const double* beta,
-                                                cusparseDnMatDescr_t matC,
-                                                cusparseSpMMAlg_t alg,
-                                                size_t* bufferSize,
-                                                cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseSpMM_bufferSize(
-    handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_64F, alg, bufferSize);
-}
-template <typename T>
-inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
-                                     cusparseOperation_t opA,
-                                     cusparseOperation_t opB,
-                                     const T* alpha,
-                                     const cusparseSpMatDescr_t matA,
-                                     const cusparseDnMatDescr_t matB,
-                                     const T* beta,
-                                     cusparseDnMatDescr_t matC,
-                                     cusparseSpMMAlg_t alg,
-                                     T* externalBuffer,
-                                     cudaStream_t stream);
-template <>
-inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
-                                     cusparseOperation_t opA,
-                                     cusparseOperation_t opB,
-                                     const float* alpha,
-                                     const cusparseSpMatDescr_t matA,
-                                     const cusparseDnMatDescr_t matB,
-                                     const float* beta,
-                                     cusparseDnMatDescr_t matC,
-                                     cusparseSpMMAlg_t alg,
-                                     float* externalBuffer,
-                                     cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseSpMM(
-    handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_32F, alg, externalBuffer);
-}
-template <>
-inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
-                                     cusparseOperation_t opA,
-                                     cusparseOperation_t opB,
-                                     const double* alpha,
-                                     const cusparseSpMatDescr_t matA,
-                                     const cusparseDnMatDescr_t matB,
-                                     const double* beta,
-                                     cusparseDnMatDescr_t matC,
-                                     cusparseSpMMAlg_t alg,
-                                     double* externalBuffer,
-                                     cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseSpMM(
-    handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_64F, alg, externalBuffer);
-}
-/** @} */
-#else
-/**
- * @defgroup Csrmm cusparse csrmm operations
- * @{
- */
-template <typename T>
-cusparseStatus_t cusparsecsrmm(  // NOLINT
-  cusparseHandle_t handle,
-  cusparseOperation_t trans,
-  int m,
-  int n,
-  int k,
-  int nnz,
-  const T* alpha,
-  const cusparseMatDescr_t descr,
-  const T* csrVal,
-  const int* csrRowPtr,
-  const int* csrColInd,
-  const T* x,
-  const int ldx,
-  const T* beta,
-  T* y,
-  const int ldy,
-  cudaStream_t stream);
-template <>
-inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle,
-                                      cusparseOperation_t trans,
-                                      int m,
-                                      int n,
-                                      int k,
-                                      int nnz,
-                                      const float* alpha,
-                                      const cusparseMatDescr_t descr,
-                                      const float* csrVal,
-                                      const int* csrRowPtr,
-                                      const int* csrColInd,
-                                      const float* x,
-                                      const int ldx,
-                                      const float* beta,
-                                      float* y,
-                                      const int ldy,
-                                      cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseScsrmm(
-    handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy);
-}
-template <>
-inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle,
-                                      cusparseOperation_t trans,
-                                      int m,
-                                      int n,
-                                      int k,
-                                      int nnz,
-                                      const double* alpha,
-                                      const cusparseMatDescr_t descr,
-                                      const double* csrVal,
-                                      const int* csrRowPtr,
-                                      const int* csrColInd,
-                                      const double* x,
-                                      const int ldx,
-                                      const double* beta,
-                                      double* y,
-                                      const int ldy,
-                                      cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseDcsrmm(
-    handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy);
-}
-/** @} */
-#endif
-
-/**
- * @defgroup csr2coo cusparse CSR to COO converter methods
- * @{
- */
-template <typename T>
-void cusparsecsr2coo(  // NOLINT
-  cusparseHandle_t handle,
-  const int n,
-  const int nnz,
-  const T* csrRowPtr,
-  T* cooRowInd,
-  cudaStream_t stream);
-template <>
-inline void cusparsecsr2coo(cusparseHandle_t handle,
-                            const int n,
-                            const int nnz,
-                            const int* csrRowPtr,
-                            int* cooRowInd,
-                            cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  CUSPARSE_CHECK(cusparseXcsr2coo(handle, csrRowPtr, nnz, n, cooRowInd, CUSPARSE_INDEX_BASE_ZERO));
-}
-/** @} */
-
 /**
  * @defgroup setpointermode cusparse set pointer mode method
  * @{
@@ -835,212 +568,6 @@ inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle,
 }
 /** @} */
 
-/**
- * @defgroup CsrmvEx cusparse csrmvex operations
- * @{
- */
-template <typename T>
-cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
-                                            cusparseAlgMode_t alg,
-                                            cusparseOperation_t transA,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const T* alpha,
-                                            const cusparseMatDescr_t descrA,
-                                            const T* csrValA,
-                                            const int* csrRowPtrA,
-                                            const int* csrColIndA,
-                                            const T* x,
-                                            const T* beta,
-                                            T* y,
-                                            size_t* bufferSizeInBytes,
-                                            cudaStream_t stream);
-template <>
-inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
-                                                   cusparseAlgMode_t alg,
-                                                   cusparseOperation_t transA,
-                                                   int m,
-                                                   int n,
-                                                   int nnz,
-                                                   const float* alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const float* csrValA,
-                                                   const int* csrRowPtrA,
-                                                   const int* csrColIndA,
-                                                   const float* x,
-                                                   const float* beta,
-                                                   float* y,
-                                                   size_t* bufferSizeInBytes,
-                                                   cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseCsrmvEx_bufferSize(handle,
-                                    alg,
-                                    transA,
-                                    m,
-                                    n,
-                                    nnz,
-                                    alpha,
-                                    CUDA_R_32F,
-                                    descrA,
-                                    csrValA,
-                                    CUDA_R_32F,
-                                    csrRowPtrA,
-                                    csrColIndA,
-                                    x,
-                                    CUDA_R_32F,
-                                    beta,
-                                    CUDA_R_32F,
-                                    y,
-                                    CUDA_R_32F,
-                                    CUDA_R_32F,
-                                    bufferSizeInBytes);
-}
-template <>
-inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
-                                                   cusparseAlgMode_t alg,
-                                                   cusparseOperation_t transA,
-                                                   int m,
-                                                   int n,
-                                                   int nnz,
-                                                   const double* alpha,
-                                                   const cusparseMatDescr_t descrA,
-                                                   const double* csrValA,
-                                                   const int* csrRowPtrA,
-                                                   const int* csrColIndA,
-                                                   const double* x,
-                                                   const double* beta,
-                                                   double* y,
-                                                   size_t* bufferSizeInBytes,
-                                                   cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseCsrmvEx_bufferSize(handle,
-                                    alg,
-                                    transA,
-                                    m,
-                                    n,
-                                    nnz,
-                                    alpha,
-                                    CUDA_R_64F,
-                                    descrA,
-                                    csrValA,
-                                    CUDA_R_64F,
-                                    csrRowPtrA,
-                                    csrColIndA,
-                                    x,
-                                    CUDA_R_64F,
-                                    beta,
-                                    CUDA_R_64F,
-                                    y,
-                                    CUDA_R_64F,
-                                    CUDA_R_64F,
-                                    bufferSizeInBytes);
-}
-
-template <typename T>
-cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
-                                 cusparseAlgMode_t alg,
-                                 cusparseOperation_t transA,
-                                 int m,
-                                 int n,
-                                 int nnz,
-                                 const T* alpha,
-                                 const cusparseMatDescr_t descrA,
-                                 const T* csrValA,
-                                 const int* csrRowPtrA,
-                                 const int* csrColIndA,
-                                 const T* x,
-                                 const T* beta,
-                                 T* y,
-                                 T* buffer,
-                                 cudaStream_t stream);
-template <>
-inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
-                                        cusparseAlgMode_t alg,
-                                        cusparseOperation_t transA,
-                                        int m,
-                                        int n,
-                                        int nnz,
-                                        const float* alpha,
-                                        const cusparseMatDescr_t descrA,
-                                        const float* csrValA,
-                                        const int* csrRowPtrA,
-                                        const int* csrColIndA,
-                                        const float* x,
-                                        const float* beta,
-                                        float* y,
-                                        float* buffer,
-                                        cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseCsrmvEx(handle,
-                         alg,
-                         transA,
-                         m,
-                         n,
-                         nnz,
-                         alpha,
-                         CUDA_R_32F,
-                         descrA,
-                         csrValA,
-                         CUDA_R_32F,
-                         csrRowPtrA,
-                         csrColIndA,
-                         x,
-                         CUDA_R_32F,
-                         beta,
-                         CUDA_R_32F,
-                         y,
-                         CUDA_R_32F,
-                         CUDA_R_32F,
-                         buffer);
-}
-template <>
-inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
-                                        cusparseAlgMode_t alg,
-                                        cusparseOperation_t transA,
-                                        int m,
-                                        int n,
-                                        int nnz,
-                                        const double* alpha,
-                                        const cusparseMatDescr_t descrA,
-                                        const double* csrValA,
-                                        const int* csrRowPtrA,
-                                        const int* csrColIndA,
-                                        const double* x,
-                                        const double* beta,
-                                        double* y,
-                                        double* buffer,
-                                        cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseCsrmvEx(handle,
-                         alg,
-                         transA,
-                         m,
-                         n,
-                         nnz,
-                         alpha,
-                         CUDA_R_64F,
-                         descrA,
-                         csrValA,
-                         CUDA_R_64F,
-                         csrRowPtrA,
-                         csrColIndA,
-                         x,
-                         CUDA_R_64F,
-                         beta,
-                         CUDA_R_64F,
-                         y,
-                         CUDA_R_64F,
-                         CUDA_R_64F,
-                         buffer);
-}
-
-/** @} */
-
 /**
  * @defgroup Csr2cscEx2 cusparse csr->csc conversion
  * @{
@@ -1225,340 +752,6 @@ inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
 
 /** @} */
 
-/**
- * @defgroup csrgemm2 cusparse sparse gemm operations
- * @{
- */
-
-template <typename T>
-cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                int k,
-                                                const T* alpha,
-                                                const T* beta,
-                                                const cusparseMatDescr_t matA,
-                                                int nnzA,
-                                                const int* rowindA,
-                                                const int* indicesA,
-                                                const cusparseMatDescr_t matB,
-                                                int nnzB,
-                                                const int* rowindB,
-                                                const int* indicesB,
-                                                const cusparseMatDescr_t matD,
-                                                int nnzD,
-                                                const int* rowindD,
-                                                const int* indicesD,
-                                                csrgemm2Info_t info,
-                                                size_t* pBufferSizeInBytes,
-                                                cudaStream_t stream);
-
-template <>
-inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
-                                                       int m,
-                                                       int n,
-                                                       int k,
-                                                       const float* alpha,
-                                                       const float* beta,
-                                                       const cusparseMatDescr_t matA,
-                                                       int nnzA,
-                                                       const int* rowindA,
-                                                       const int* indicesA,
-                                                       const cusparseMatDescr_t matB,
-                                                       int nnzB,
-                                                       const int* rowindB,
-                                                       const int* indicesB,
-                                                       const cusparseMatDescr_t matD,
-                                                       int nnzD,
-                                                       const int* rowindD,
-                                                       const int* indicesD,
-                                                       csrgemm2Info_t info,
-                                                       size_t* pBufferSizeInBytes,
-                                                       cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  return cusparseScsrgemm2_bufferSizeExt(handle,
-                                         m,
-                                         n,
-                                         k,
-                                         alpha,
-                                         matA,
-                                         nnzA,
-                                         rowindA,
-                                         indicesA,
-                                         matB,
-                                         nnzB,
-                                         rowindB,
-                                         indicesB,
-                                         beta,
-                                         matD,
-                                         nnzD,
-                                         rowindD,
-                                         indicesD,
-                                         info,
-                                         pBufferSizeInBytes);
-#pragma GCC diagnostic pop
-}
-
-template <>
-inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
-                                                       int m,
-                                                       int n,
-                                                       int k,
-                                                       const double* alpha,
-                                                       const double* beta,
-                                                       const cusparseMatDescr_t matA,
-                                                       int nnzA,
-                                                       const int* rowindA,
-                                                       const int* indicesA,
-                                                       const cusparseMatDescr_t matB,
-                                                       int nnzB,
-                                                       const int* rowindB,
-                                                       const int* indicesB,
-                                                       const cusparseMatDescr_t matD,
-                                                       int nnzD,
-                                                       const int* rowindD,
-                                                       const int* indicesD,
-                                                       csrgemm2Info_t info,
-                                                       size_t* pBufferSizeInBytes,
-                                                       cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  return cusparseDcsrgemm2_bufferSizeExt(handle,
-                                         m,
-                                         n,
-                                         k,
-                                         alpha,
-                                         matA,
-                                         nnzA,
-                                         rowindA,
-                                         indicesA,
-                                         matB,
-                                         nnzB,
-                                         rowindB,
-                                         indicesB,
-                                         beta,
-                                         matD,
-                                         nnzD,
-                                         rowindD,
-                                         indicesD,
-                                         info,
-                                         pBufferSizeInBytes);
-#pragma GCC diagnostic pop
-}
-
-inline cusparseStatus_t cusparsecsrgemm2nnz(cusparseHandle_t handle,
-                                            int m,
-                                            int n,
-                                            int k,
-                                            const cusparseMatDescr_t matA,
-                                            int nnzA,
-                                            const int* rowindA,
-                                            const int* indicesA,
-                                            const cusparseMatDescr_t matB,
-                                            int nnzB,
-                                            const int* rowindB,
-                                            const int* indicesB,
-                                            const cusparseMatDescr_t matD,
-                                            int nnzD,
-                                            const int* rowindD,
-                                            const int* indicesD,
-                                            const cusparseMatDescr_t matC,
-                                            int* rowindC,
-                                            int* nnzC,
-                                            const csrgemm2Info_t info,
-                                            void* pBuffer,
-                                            cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  return cusparseXcsrgemm2Nnz(handle,
-                              m,
-                              n,
-                              k,
-                              matA,
-                              nnzA,
-                              rowindA,
-                              indicesA,
-                              matB,
-                              nnzB,
-                              rowindB,
-                              indicesB,
-                              matD,
-                              nnzD,
-                              rowindD,
-                              indicesD,
-                              matC,
-                              rowindC,
-                              nnzC,
-                              info,
-                              pBuffer);
-#pragma GCC diagnostic pop
-}
-
-template <typename T>
-cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
-                                  int m,
-                                  int n,
-                                  int k,
-                                  const T* alpha,
-                                  const cusparseMatDescr_t descrA,
-                                  int nnzA,
-                                  const T* csrValA,
-                                  const int* csrRowPtrA,
-                                  const int* csrColIndA,
-                                  const cusparseMatDescr_t descrB,
-                                  int nnzB,
-                                  const T* csrValB,
-                                  const int* csrRowPtrB,
-                                  const int* csrColIndB,
-                                  const T* beta,
-                                  const cusparseMatDescr_t descrD,
-                                  int nnzD,
-                                  const T* csrValD,
-                                  const int* csrRowPtrD,
-                                  const int* csrColIndD,
-                                  const cusparseMatDescr_t descrC,
-                                  T* csrValC,
-                                  const int* csrRowPtrC,
-                                  int* csrColIndC,
-                                  const csrgemm2Info_t info,
-                                  void* pBuffer,
-                                  cudaStream_t stream);
-
-template <>
-inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
-                                         int m,
-                                         int n,
-                                         int k,
-                                         const float* alpha,
-                                         const cusparseMatDescr_t descrA,
-                                         int nnzA,
-                                         const float* csrValA,
-                                         const int* csrRowPtrA,
-                                         const int* csrColIndA,
-                                         const cusparseMatDescr_t descrB,
-                                         int nnzB,
-                                         const float* csrValB,
-                                         const int* csrRowPtrB,
-                                         const int* csrColIndB,
-                                         const float* beta,
-                                         const cusparseMatDescr_t descrD,
-                                         int nnzD,
-                                         const float* csrValD,
-                                         const int* csrRowPtrD,
-                                         const int* csrColIndD,
-                                         const cusparseMatDescr_t descrC,
-                                         float* csrValC,
-                                         const int* csrRowPtrC,
-                                         int* csrColIndC,
-                                         const csrgemm2Info_t info,
-                                         void* pBuffer,
-                                         cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  return cusparseScsrgemm2(handle,
-                           m,
-                           n,
-                           k,
-                           alpha,
-                           descrA,
-                           nnzA,
-                           csrValA,
-                           csrRowPtrA,
-                           csrColIndA,
-                           descrB,
-                           nnzB,
-                           csrValB,
-                           csrRowPtrB,
-                           csrColIndB,
-                           beta,
-                           descrD,
-                           nnzD,
-                           csrValD,
-                           csrRowPtrD,
-                           csrColIndD,
-                           descrC,
-                           csrValC,
-                           csrRowPtrC,
-                           csrColIndC,
-                           info,
-                           pBuffer);
-#pragma GCC diagnostic pop
-}
-
-template <>
-inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
-                                         int m,
-                                         int n,
-                                         int k,
-                                         const double* alpha,
-                                         const cusparseMatDescr_t descrA,
-                                         int nnzA,
-                                         const double* csrValA,
-                                         const int* csrRowPtrA,
-                                         const int* csrColIndA,
-                                         const cusparseMatDescr_t descrB,
-                                         int nnzB,
-                                         const double* csrValB,
-                                         const int* csrRowPtrB,
-                                         const int* csrColIndB,
-                                         const double* beta,
-                                         const cusparseMatDescr_t descrD,
-                                         int nnzD,
-                                         const double* csrValD,
-                                         const int* csrRowPtrD,
-                                         const int* csrColIndD,
-                                         const cusparseMatDescr_t descrC,
-                                         double* csrValC,
-                                         const int* csrRowPtrC,
-                                         int* csrColIndC,
-                                         const csrgemm2Info_t info,
-                                         void* pBuffer,
-                                         cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  return cusparseDcsrgemm2(handle,
-                           m,
-                           n,
-                           k,
-                           alpha,
-                           descrA,
-                           nnzA,
-                           csrValA,
-                           csrRowPtrA,
-                           csrColIndA,
-                           descrB,
-                           nnzB,
-                           csrValB,
-                           csrRowPtrB,
-                           csrColIndB,
-                           beta,
-                           descrD,
-                           nnzD,
-                           csrValD,
-                           csrRowPtrD,
-                           csrColIndD,
-                           descrC,
-                           csrValC,
-                           csrRowPtrC,
-                           csrColIndC,
-                           info,
-                           pBuffer);
-#pragma GCC diagnostic pop
-}
-
-/** @} */
-
 /**
  * @defgroup csrgemm2 cusparse sparse gemm operations
  * @{

From 41676d513f5bc272960bc073b3a0d4a64aaf1ee2 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 31 Jan 2022 15:35:29 -0500
Subject: [PATCH 02/16] Updating cusparse wrappers to remove deprecation
 warnings

---
 .../raft/sparse/convert/detail/dense.cuh      |    9 +-
 cpp/include/raft/sparse/cusparse_wrappers.h   | 1672 +++++++++++++----
 2 files changed, 1306 insertions(+), 375 deletions(-)

diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh
index 9f48fd2172..d826d5ea00 100644
--- a/cpp/include/raft/sparse/convert/detail/dense.cuh
+++ b/cpp/include/raft/sparse/convert/detail/dense.cuh
@@ -79,6 +79,7 @@ template <typename value_idx, typename value_t>
 void csr_to_dense(cusparseHandle_t handle,
                   value_idx nrows,
                   value_idx ncols,
+                  value_idx nnz,
                   const value_idx* csr_indptr,
                   const value_idx* csr_indices,
                   const value_t* csr_data,
@@ -96,8 +97,14 @@ void csr_to_dense(cusparseHandle_t handle,
     RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(out_mat, CUSPARSE_INDEX_BASE_ZERO));
     RAFT_CUSPARSE_TRY(cusparseSetMatType(out_mat, CUSPARSE_MATRIX_TYPE_GENERAL));
 
+    size_t buffer_size;
+      RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense_buffersize(
+              handle, nrows, ncols, nnz, out_mat, csr_data, csr_indptr, csr_indices, out, lda, &buffer_size, stream));
+
+    rmm::device_uvector<char> buffer(buffer_size, stream);
+
     RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense(
-      handle, nrows, ncols, out_mat, csr_data, csr_indptr, csr_indices, out, lda, stream));
+      handle, nrows, ncols, nnz, out_mat, csr_data, csr_indptr, csr_indices, out, lda, buffer.data(), stream));
 
     RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyMatDescr(out_mat));
 
diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h
index 0e93acf8d0..ba9f40fd7e 100644
--- a/cpp/include/raft/sparse/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/cusparse_wrappers.h
@@ -41,35 +41,35 @@ namespace raft {
 /**
  * @brief Exception thrown when a cuSparse error is encountered.
  */
-struct cusparse_error : public raft::exception {
-  explicit cusparse_error(char const* const message) : raft::exception(message) {}
-  explicit cusparse_error(std::string const& message) : raft::exception(message) {}
-};
+    struct cusparse_error : public raft::exception {
+        explicit cusparse_error(char const* const message) : raft::exception(message) {}
+        explicit cusparse_error(std::string const& message) : raft::exception(message) {}
+    };
 
-namespace sparse {
-namespace detail {
+    namespace sparse {
+        namespace detail {
 
-inline const char* cusparse_error_to_string(cusparseStatus_t err)
-{
+            inline const char* cusparse_error_to_string(cusparseStatus_t err)
+            {
 #if defined(CUDART_VERSION) && CUDART_VERSION >= 10100
-  return cusparseGetErrorString(err);
+                return cusparseGetErrorString(err);
 #else   // CUDART_VERSION
-  switch (err) {
-    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS);
-    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED);
-    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED);
-    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE);
-    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH);
-    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED);
-    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR);
-    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
-    default: return "CUSPARSE_STATUS_UNKNOWN";
-  };
+                switch (err) {
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
+                    default: return "CUSPARSE_STATUS_UNKNOWN";
+                };
 #endif  // CUDART_VERSION
-}
+            }
 
-}  // namespace detail
-}  // namespace sparse
+        }  // namespace detail
+    }  // namespace sparse
 }  // namespace raft
 
 #undef _CUSPARSE_ERR_TO_STR
@@ -124,124 +124,195 @@ inline const char* cusparse_error_to_string(cusparseStatus_t err)
 #endif
 
 namespace raft {
-namespace sparse {
+    namespace sparse {
 
 /**
  * @defgroup gthr cusparse gather methods
  * @{
  */
-template <typename T>
-cusparseStatus_t cusparsegthr(
-  cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream);
-template <>
-inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle,
-                                     int nnz,
-                                     const double* vals,
-                                     double* vals_sorted,
-                                     int* d_P,
-                                     cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+        template <typename T>
+        cusparseStatus_t cusparsegthr(
+                cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream);
+        template <>
+        inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle,
+                                             int nnz,
+                                             const double* vals,
+                                             double* vals_sorted,
+                                             int* d_P,
+                                             cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  return cusparseDgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO);
+            return cusparseDgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO);
 #pragma GCC diagnostic pop
-}
-template <>
-inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle,
-                                     int nnz,
-                                     const float* vals,
-                                     float* vals_sorted,
-                                     int* d_P,
-                                     cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+        }
+        template <>
+        inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle,
+                                             int nnz,
+                                             const float* vals,
+                                             float* vals_sorted,
+                                             int* d_P,
+                                             cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  return cusparseSgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO);
+            return cusparseSgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO);
 #pragma GCC diagnostic pop
-}
+        }
 /** @} */
 
 /**
  * @defgroup coo2csr cusparse COO to CSR converter methods
  * @{
  */
-template <typename T>
-void cusparsecoo2csr(
-  cusparseHandle_t handle, const T* cooRowInd, int nnz, int m, T* csrRowPtr, cudaStream_t stream);
-template <>
-inline void cusparsecoo2csr(cusparseHandle_t handle,
-                            const int* cooRowInd,
-                            int nnz,
-                            int m,
-                            int* csrRowPtr,
-                            cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  CUSPARSE_CHECK(cusparseXcoo2csr(handle, cooRowInd, nnz, m, csrRowPtr, CUSPARSE_INDEX_BASE_ZERO));
-}
+        template <typename T>
+        void cusparsecoo2csr(
+                cusparseHandle_t handle, const T* cooRowInd, int nnz, int m, T* csrRowPtr, cudaStream_t stream);
+        template <>
+        inline void cusparsecoo2csr(cusparseHandle_t handle,
+                                    const int* cooRowInd,
+                                    int nnz,
+                                    int m,
+                                    int* csrRowPtr,
+                                    cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            CUSPARSE_CHECK(cusparseXcoo2csr(handle, cooRowInd, nnz, m, csrRowPtr, CUSPARSE_INDEX_BASE_ZERO));
+        }
 /** @} */
 
 /**
  * @defgroup coosort cusparse coo sort methods
  * @{
  */
-template <typename T>
-size_t cusparsecoosort_bufferSizeExt(  // NOLINT
-  cusparseHandle_t handle,
-  int m,
-  int n,
-  int nnz,
-  const T* cooRows,
-  const T* cooCols,
-  cudaStream_t stream);
-template <>
-inline size_t cusparsecoosort_bufferSizeExt(  // NOLINT
-  cusparseHandle_t handle,
-  int m,
-  int n,
-  int nnz,
-  const int* cooRows,
-  const int* cooCols,
-  cudaStream_t stream)
-{
-  size_t val;
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  CUSPARSE_CHECK(cusparseXcoosort_bufferSizeExt(handle, m, n, nnz, cooRows, cooCols, &val));
-  return val;
-}
+        template <typename T>
+        size_t cusparsecoosort_bufferSizeExt(  // NOLINT
+                cusparseHandle_t handle,
+                int m,
+                int n,
+                int nnz,
+                const T* cooRows,
+                const T* cooCols,
+                cudaStream_t stream);
+        template <>
+        inline size_t cusparsecoosort_bufferSizeExt(  // NOLINT
+                cusparseHandle_t handle,
+                int m,
+                int n,
+                int nnz,
+                const int* cooRows,
+                const int* cooCols,
+                cudaStream_t stream)
+        {
+            size_t val;
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            CUSPARSE_CHECK(cusparseXcoosort_bufferSizeExt(handle, m, n, nnz, cooRows, cooCols, &val));
+            return val;
+        }
 
-template <typename T>
-void cusparsecoosortByRow(  // NOLINT
-  cusparseHandle_t handle,
-  int m,
-  int n,
-  int nnz,
-  T* cooRows,
-  T* cooCols,
-  T* P,
-  void* pBuffer,
-  cudaStream_t stream);
-template <>
-inline void cusparsecoosortByRow(  // NOLINT
-  cusparseHandle_t handle,
-  int m,
-  int n,
-  int nnz,
-  int* cooRows,
-  int* cooCols,
-  int* P,
-  void* pBuffer,
-  cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  CUSPARSE_CHECK(cusparseXcoosortByRow(handle, m, n, nnz, cooRows, cooCols, P, pBuffer));
-}
+        template <typename T>
+        void cusparsecoosortByRow(  // NOLINT
+                cusparseHandle_t handle,
+                int m,
+                int n,
+                int nnz,
+                T* cooRows,
+                T* cooCols,
+                T* P,
+                void* pBuffer,
+                cudaStream_t stream);
+        template <>
+        inline void cusparsecoosortByRow(  // NOLINT
+                cusparseHandle_t handle,
+                int m,
+                int n,
+                int nnz,
+                int* cooRows,
+                int* cooCols,
+                int* P,
+                void* pBuffer,
+                cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            CUSPARSE_CHECK(cusparseXcoosortByRow(handle, m, n, nnz, cooRows, cooCols, P, pBuffer));
+        }
 /** @} */
 
-#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
 /**
+ * @defgroup Gemmi cusparse gemmi operations
+ * @{
+ */
+        template <typename T>
+        cusparseStatus_t cusparsegemmi(  // NOLINT
+                cusparseHandle_t handle,
+                int m,
+                int n,
+                int k,
+                int nnz,
+                const T* alpha,
+                const T* A,
+                int lda,
+                const T* cscValB,
+                const int* cscColPtrB,
+                const int* cscRowIndB,
+                const T* beta,
+                T* C,
+                int ldc,
+                cudaStream_t stream);
+        template <>
+        inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle,
+                                              int m,
+                                              int n,
+                                              int k,
+                                              int nnz,
+                                              const float* alpha,
+                                              const float* A,
+                                              int lda,
+                                              const float* cscValB,
+                                              const int* cscColPtrB,
+                                              const int* cscRowIndB,
+                                              const float* beta,
+                                              float* C,
+                                              int ldc,
+                                              cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+            return cusparseSgemmi(
+                    handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc);
+#pragma GCC diagnostic pop
+        }
+        template <>
+        inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle,
+                                              int m,
+                                              int n,
+                                              int k,
+                                              int nnz,
+                                              const double* alpha,
+                                              const double* A,
+                                              int lda,
+                                              const double* cscValB,
+                                              const int* cscColPtrB,
+                                              const int* cscRowIndB,
+                                              const double* beta,
+                                              double* C,
+                                              int ldc,
+                                              cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+            return cusparseDgemmi(
+                    handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc);
+#pragma GCC diagnostic pop
+        }
+/** @} */
+
+#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
+        /**
  * @defgroup cusparse Create CSR operations
  * @{
  */
@@ -488,65 +559,261 @@ inline cusparseStatus_t cusparsespmv(cusparseHandle_t handle,
  * @defgroup Csrmv cusparse csrmv operations
  * @{
  */
+        template <typename T>
+        cusparseStatus_t cusparsecsrmv(  // NOLINT
+                cusparseHandle_t handle,
+                cusparseOperation_t trans,
+                int m,
+                int n,
+                int nnz,
+                const T* alpha,
+                const cusparseMatDescr_t descr,
+                const T* csrVal,
+                const int* csrRowPtr,
+                const int* csrColInd,
+                const T* x,
+                const T* beta,
+                T* y,
+                cudaStream_t stream);
+        template <>
+        inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
+                                              cusparseOperation_t trans,
+                                              int m,
+                                              int n,
+                                              int nnz,
+                                              const float* alpha,
+                                              const cusparseMatDescr_t descr,
+                                              const float* csrVal,
+                                              const int* csrRowPtr,
+                                              const int* csrColInd,
+                                              const float* x,
+                                              const float* beta,
+                                              float* y,
+                                              cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            return cusparseScsrmv(
+                    handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y);
+        }
+        template <>
+        inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
+                                              cusparseOperation_t trans,
+                                              int m,
+                                              int n,
+                                              int nnz,
+                                              const double* alpha,
+                                              const cusparseMatDescr_t descr,
+                                              const double* csrVal,
+                                              const int* csrRowPtr,
+                                              const int* csrColInd,
+                                              const double* x,
+                                              const double* beta,
+                                              double* y,
+                                              cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            return cusparseDcsrmv(
+                    handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y);
+        }
+/** @} */
+#endif
+
+#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
+        /**
+ * @defgroup Csrmm cusparse csrmm operations
+ * @{
+ */
+template <typename T>
+cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle,
+                                         cusparseOperation_t opA,
+                                         cusparseOperation_t opB,
+                                         const T* alpha,
+                                         const cusparseSpMatDescr_t matA,
+                                         const cusparseDnMatDescr_t matB,
+                                         const T* beta,
+                                         cusparseDnMatDescr_t matC,
+                                         cusparseSpMMAlg_t alg,
+                                         size_t* bufferSize,
+                                         cudaStream_t stream);
+template <>
+inline cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle,
+                                                cusparseOperation_t opA,
+                                                cusparseOperation_t opB,
+                                                const float* alpha,
+                                                const cusparseSpMatDescr_t matA,
+                                                const cusparseDnMatDescr_t matB,
+                                                const float* beta,
+                                                cusparseDnMatDescr_t matC,
+                                                cusparseSpMMAlg_t alg,
+                                                size_t* bufferSize,
+                                                cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  return cusparseSpMM_bufferSize(
+    handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_32F, alg, bufferSize);
+}
+template <>
+inline cusparseStatus_t cusparsespmm_bufferSize(cusparseHandle_t handle,
+                                                cusparseOperation_t opA,
+                                                cusparseOperation_t opB,
+                                                const double* alpha,
+                                                const cusparseSpMatDescr_t matA,
+                                                const cusparseDnMatDescr_t matB,
+                                                const double* beta,
+                                                cusparseDnMatDescr_t matC,
+                                                cusparseSpMMAlg_t alg,
+                                                size_t* bufferSize,
+                                                cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  return cusparseSpMM_bufferSize(
+    handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_64F, alg, bufferSize);
+}
 template <typename T>
-cusparseStatus_t cusparsecsrmv(  // NOLINT
-  cusparseHandle_t handle,
-  cusparseOperation_t trans,
-  int m,
-  int n,
-  int nnz,
-  const T* alpha,
-  const cusparseMatDescr_t descr,
-  const T* csrVal,
-  const int* csrRowPtr,
-  const int* csrColInd,
-  const T* x,
-  const T* beta,
-  T* y,
-  cudaStream_t stream);
+inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
+                                     cusparseOperation_t opA,
+                                     cusparseOperation_t opB,
+                                     const T* alpha,
+                                     const cusparseSpMatDescr_t matA,
+                                     const cusparseDnMatDescr_t matB,
+                                     const T* beta,
+                                     cusparseDnMatDescr_t matC,
+                                     cusparseSpMMAlg_t alg,
+                                     T* externalBuffer,
+                                     cudaStream_t stream);
 template <>
-inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
-                                      cusparseOperation_t trans,
-                                      int m,
-                                      int n,
-                                      int nnz,
-                                      const float* alpha,
-                                      const cusparseMatDescr_t descr,
-                                      const float* csrVal,
-                                      const int* csrRowPtr,
-                                      const int* csrColInd,
-                                      const float* x,
-                                      const float* beta,
-                                      float* y,
-                                      cudaStream_t stream)
+inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
+                                     cusparseOperation_t opA,
+                                     cusparseOperation_t opB,
+                                     const float* alpha,
+                                     const cusparseSpMatDescr_t matA,
+                                     const cusparseDnMatDescr_t matB,
+                                     const float* beta,
+                                     cusparseDnMatDescr_t matC,
+                                     cusparseSpMMAlg_t alg,
+                                     float* externalBuffer,
+                                     cudaStream_t stream)
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseScsrmv(
-    handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y);
+  return cusparseSpMM(
+    handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_32F, alg, externalBuffer);
 }
 template <>
-inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
-                                      cusparseOperation_t trans,
-                                      int m,
-                                      int n,
-                                      int nnz,
-                                      const double* alpha,
-                                      const cusparseMatDescr_t descr,
-                                      const double* csrVal,
-                                      const int* csrRowPtr,
-                                      const int* csrColInd,
-                                      const double* x,
-                                      const double* beta,
-                                      double* y,
-                                      cudaStream_t stream)
+inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
+                                     cusparseOperation_t opA,
+                                     cusparseOperation_t opB,
+                                     const double* alpha,
+                                     const cusparseSpMatDescr_t matA,
+                                     const cusparseDnMatDescr_t matB,
+                                     const double* beta,
+                                     cusparseDnMatDescr_t matC,
+                                     cusparseSpMMAlg_t alg,
+                                     double* externalBuffer,
+                                     cudaStream_t stream)
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseDcsrmv(
-    handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y);
+  return cusparseSpMM(
+    handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_64F, alg, externalBuffer);
 }
 /** @} */
+#else
+/**
+ * @defgroup Csrmm cusparse csrmm operations
+ * @{
+ */
+        template <typename T>
+        cusparseStatus_t cusparsecsrmm(  // NOLINT
+                cusparseHandle_t handle,
+                cusparseOperation_t trans,
+                int m,
+                int n,
+                int k,
+                int nnz,
+                const T* alpha,
+                const cusparseMatDescr_t descr,
+                const T* csrVal,
+                const int* csrRowPtr,
+                const int* csrColInd,
+                const T* x,
+                const int ldx,
+                const T* beta,
+                T* y,
+                const int ldy,
+                cudaStream_t stream);
+        template <>
+        inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle,
+                                              cusparseOperation_t trans,
+                                              int m,
+                                              int n,
+                                              int k,
+                                              int nnz,
+                                              const float* alpha,
+                                              const cusparseMatDescr_t descr,
+                                              const float* csrVal,
+                                              const int* csrRowPtr,
+                                              const int* csrColInd,
+                                              const float* x,
+                                              const int ldx,
+                                              const float* beta,
+                                              float* y,
+                                              const int ldy,
+                                              cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            return cusparseScsrmm(
+                    handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy);
+        }
+        template <>
+        inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle,
+                                              cusparseOperation_t trans,
+                                              int m,
+                                              int n,
+                                              int k,
+                                              int nnz,
+                                              const double* alpha,
+                                              const cusparseMatDescr_t descr,
+                                              const double* csrVal,
+                                              const int* csrRowPtr,
+                                              const int* csrColInd,
+                                              const double* x,
+                                              const int ldx,
+                                              const double* beta,
+                                              double* y,
+                                              const int ldy,
+                                              cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            return cusparseDcsrmm(
+                    handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy);
+        }
+/** @} */
 #endif
 
+/**
+ * @defgroup csr2coo cusparse CSR to COO converter methods
+ * @{
+ */
+        template <typename T>
+        void cusparsecsr2coo(  // NOLINT
+                cusparseHandle_t handle,
+                const int n,
+                const int nnz,
+                const T* csrRowPtr,
+                T* cooRowInd,
+                cudaStream_t stream);
+        template <>
+        inline void cusparsecsr2coo(cusparseHandle_t handle,
+                                    const int n,
+                                    const int nnz,
+                                    const int* csrRowPtr,
+                                    int* cooRowInd,
+                                    cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            CUSPARSE_CHECK(cusparseXcsr2coo(handle, csrRowPtr, nnz, n, cooRowInd, CUSPARSE_INDEX_BASE_ZERO));
+        }
+/** @} */
+
 /**
  * @defgroup setpointermode cusparse set pointer mode method
  * @{
@@ -559,13 +826,215 @@ inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
 //                                         cudaStream_t stream);
 
 // template<>
-inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle,
-                                               cusparsePointerMode_t mode,
-                                               cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseSetPointerMode(handle, mode);
-}
+        inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle,
+                                                       cusparsePointerMode_t mode,
+                                                       cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            return cusparseSetPointerMode(handle, mode);
+        }
+/** @} */
+
+/**
+ * @defgroup CsrmvEx cusparse csrmvex operations
+ * @{
+ */
+        template <typename T>
+        cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
+                                                    cusparseAlgMode_t alg,
+                                                    cusparseOperation_t transA,
+                                                    int m,
+                                                    int n,
+                                                    int nnz,
+                                                    const T* alpha,
+                                                    const cusparseMatDescr_t descrA,
+                                                    const T* csrValA,
+                                                    const int* csrRowPtrA,
+                                                    const int* csrColIndA,
+                                                    const T* x,
+                                                    const T* beta,
+                                                    T* y,
+                                                    size_t* bufferSizeInBytes,
+                                                    cudaStream_t stream);
+        template <>
+        inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
+                                                           cusparseAlgMode_t alg,
+                                                           cusparseOperation_t transA,
+                                                           int m,
+                                                           int n,
+                                                           int nnz,
+                                                           const float* alpha,
+                                                           const cusparseMatDescr_t descrA,
+                                                           const float* csrValA,
+                                                           const int* csrRowPtrA,
+                                                           const int* csrColIndA,
+                                                           const float* x,
+                                                           const float* beta,
+                                                           float* y,
+                                                           size_t* bufferSizeInBytes,
+                                                           cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+            cusparseSpMatDescr_t matA;
+            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
+                           CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
+                           CUDA_R_32F);
+
+            cusparseDnVecDescr_t vecX;
+            cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F);
+
+            cusparseDnVecDescr_t vecY;
+            cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F);
+
+            cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, alpha, matA, vecX, beta, vecY,
+                                           CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, bufferSizeInBytes);
+
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
+
+            return result;
+
+        }
+        template <>
+        inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
+                                                           cusparseAlgMode_t alg,
+                                                           cusparseOperation_t transA,
+                                                           int m,
+                                                           int n,
+                                                           int nnz,
+                                                           const double* alpha,
+                                                           const cusparseMatDescr_t descrA,
+                                                           const double* csrValA,
+                                                           const int* csrRowPtrA,
+                                                           const int* csrColIndA,
+                                                           const double* x,
+                                                           const double* beta,
+                                                           double* y,
+                                                           size_t* bufferSizeInBytes,
+                                                           cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            cusparseSpMatDescr_t matA;
+            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
+                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
+                              CUDA_R_64F);
+
+            cusparseDnVecDescr_t vecX;
+            cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F);
+
+            cusparseDnVecDescr_t vecY;
+            cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F);
+
+            cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, alpha, matA, vecX, beta, vecY,
+                                           CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, bufferSizeInBytes);
+
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
+
+            return result;
+
+        }
+
+        template <typename T>
+        cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
+                                         cusparseAlgMode_t alg,
+                                         cusparseOperation_t transA,
+                                         int m,
+                                         int n,
+                                         int nnz,
+                                         const T* alpha,
+                                         const cusparseMatDescr_t descrA,
+                                         const T* csrValA,
+                                         const int* csrRowPtrA,
+                                         const int* csrColIndA,
+                                         const T* x,
+                                         const T* beta,
+                                         T* y,
+                                         T* buffer,
+                                         cudaStream_t stream);
+        template <>
+        inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
+                                                cusparseAlgMode_t alg,
+                                                cusparseOperation_t transA,
+                                                int m,
+                                                int n,
+                                                int nnz,
+                                                const float* alpha,
+                                                const cusparseMatDescr_t descrA,
+                                                const float* csrValA,
+                                                const int* csrRowPtrA,
+                                                const int* csrColIndA,
+                                                const float* x,
+                                                const float* beta,
+                                                float* y,
+                                                float* buffer,
+                                                cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            cusparseSpMatDescr_t matA;
+            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
+                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
+                              CUDA_R_32F);
+
+            cusparseDnVecDescr_t vecX;
+            cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F);
+
+            cusparseDnVecDescr_t vecY;
+            cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F);
+
+            cusparseStatus_t result = cusparseSpMV(handle, transA, alpha, matA, vecX, beta, vecY,
+                                CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer);
+
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
+
+            return result;
+        }
+        template <>
+        inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
+                                                cusparseAlgMode_t alg,
+                                                cusparseOperation_t transA,
+                                                int m,
+                                                int n,
+                                                int nnz,
+                                                const double* alpha,
+                                                const cusparseMatDescr_t descrA,
+                                                const double* csrValA,
+                                                const int* csrRowPtrA,
+                                                const int* csrColIndA,
+                                                const double* x,
+                                                const double* beta,
+                                                double* y,
+                                                double* buffer,
+                                                cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+            cusparseSpMatDescr_t matA;
+            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
+                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
+                              CUDA_R_64F);
+
+            cusparseDnVecDescr_t vecX;
+            cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F);
+
+            cusparseDnVecDescr_t vecY;
+            cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F);
+
+            cusparseStatus_t result = cusparseSpMV(handle, transA, alpha, matA, vecX, beta, vecY,
+                                           CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer);
+
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
+
+            return result;
+        }
+
 /** @} */
 
 /**
@@ -573,182 +1042,182 @@ inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle,
  * @{
  */
 
-template <typename T>
-cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
-                                            int m,
-                                            int n,
-                                            int nnz,
-                                            const T* csrVal,
-                                            const int* csrRowPtr,
-                                            const int* csrColInd,
-                                            void* cscVal,
-                                            int* cscColPtr,
-                                            int* cscRowInd,
-                                            cusparseAction_t copyValues,
-                                            cusparseIndexBase_t idxBase,
-                                            cusparseCsr2CscAlg_t alg,
-                                            size_t* bufferSize,
-                                            cudaStream_t stream);
+        template <typename T>
+        cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
+                                                    int m,
+                                                    int n,
+                                                    int nnz,
+                                                    const T* csrVal,
+                                                    const int* csrRowPtr,
+                                                    const int* csrColInd,
+                                                    void* cscVal,
+                                                    int* cscColPtr,
+                                                    int* cscRowInd,
+                                                    cusparseAction_t copyValues,
+                                                    cusparseIndexBase_t idxBase,
+                                                    cusparseCsr2CscAlg_t alg,
+                                                    size_t* bufferSize,
+                                                    cudaStream_t stream);
 
-template <>
-inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
-                                                   int m,
-                                                   int n,
-                                                   int nnz,
-                                                   const float* csrVal,
-                                                   const int* csrRowPtr,
-                                                   const int* csrColInd,
-                                                   void* cscVal,
-                                                   int* cscColPtr,
-                                                   int* cscRowInd,
-                                                   cusparseAction_t copyValues,
-                                                   cusparseIndexBase_t idxBase,
-                                                   cusparseCsr2CscAlg_t alg,
-                                                   size_t* bufferSize,
-                                                   cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+        template <>
+        inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
+                                                           int m,
+                                                           int n,
+                                                           int nnz,
+                                                           const float* csrVal,
+                                                           const int* csrRowPtr,
+                                                           const int* csrColInd,
+                                                           void* cscVal,
+                                                           int* cscColPtr,
+                                                           int* cscRowInd,
+                                                           cusparseAction_t copyValues,
+                                                           cusparseIndexBase_t idxBase,
+                                                           cusparseCsr2CscAlg_t alg,
+                                                           size_t* bufferSize,
+                                                           cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
-  return cusparseCsr2cscEx2_bufferSize(handle,
-                                       m,
-                                       n,
-                                       nnz,
-                                       csrVal,
-                                       csrRowPtr,
-                                       csrColInd,
-                                       cscVal,
-                                       cscColPtr,
-                                       cscRowInd,
-                                       CUDA_R_32F,
-                                       copyValues,
-                                       idxBase,
-                                       alg,
-                                       bufferSize);
-}
-template <>
-inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
-                                                   int m,
-                                                   int n,
-                                                   int nnz,
-                                                   const double* csrVal,
-                                                   const int* csrRowPtr,
-                                                   const int* csrColInd,
-                                                   void* cscVal,
-                                                   int* cscColPtr,
-                                                   int* cscRowInd,
-                                                   cusparseAction_t copyValues,
-                                                   cusparseIndexBase_t idxBase,
-                                                   cusparseCsr2CscAlg_t alg,
-                                                   size_t* bufferSize,
-                                                   cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            return cusparseCsr2cscEx2_bufferSize(handle,
+                                                 m,
+                                                 n,
+                                                 nnz,
+                                                 csrVal,
+                                                 csrRowPtr,
+                                                 csrColInd,
+                                                 cscVal,
+                                                 cscColPtr,
+                                                 cscRowInd,
+                                                 CUDA_R_32F,
+                                                 copyValues,
+                                                 idxBase,
+                                                 alg,
+                                                 bufferSize);
+        }
+        template <>
+        inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
+                                                           int m,
+                                                           int n,
+                                                           int nnz,
+                                                           const double* csrVal,
+                                                           const int* csrRowPtr,
+                                                           const int* csrColInd,
+                                                           void* cscVal,
+                                                           int* cscColPtr,
+                                                           int* cscRowInd,
+                                                           cusparseAction_t copyValues,
+                                                           cusparseIndexBase_t idxBase,
+                                                           cusparseCsr2CscAlg_t alg,
+                                                           size_t* bufferSize,
+                                                           cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
-  return cusparseCsr2cscEx2_bufferSize(handle,
-                                       m,
-                                       n,
-                                       nnz,
-                                       csrVal,
-                                       csrRowPtr,
-                                       csrColInd,
-                                       cscVal,
-                                       cscColPtr,
-                                       cscRowInd,
-                                       CUDA_R_64F,
-                                       copyValues,
-                                       idxBase,
-                                       alg,
-                                       bufferSize);
-}
+            return cusparseCsr2cscEx2_bufferSize(handle,
+                                                 m,
+                                                 n,
+                                                 nnz,
+                                                 csrVal,
+                                                 csrRowPtr,
+                                                 csrColInd,
+                                                 cscVal,
+                                                 cscColPtr,
+                                                 cscRowInd,
+                                                 CUDA_R_64F,
+                                                 copyValues,
+                                                 idxBase,
+                                                 alg,
+                                                 bufferSize);
+        }
 
-template <typename T>
-cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
-                                 int m,
-                                 int n,
-                                 int nnz,
-                                 const T* csrVal,
-                                 const int* csrRowPtr,
-                                 const int* csrColInd,
-                                 void* cscVal,
-                                 int* cscColPtr,
-                                 int* cscRowInd,
-                                 cusparseAction_t copyValues,
-                                 cusparseIndexBase_t idxBase,
-                                 cusparseCsr2CscAlg_t alg,
-                                 void* buffer,
-                                 cudaStream_t stream);
+        template <typename T>
+        cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
+                                         int m,
+                                         int n,
+                                         int nnz,
+                                         const T* csrVal,
+                                         const int* csrRowPtr,
+                                         const int* csrColInd,
+                                         void* cscVal,
+                                         int* cscColPtr,
+                                         int* cscRowInd,
+                                         cusparseAction_t copyValues,
+                                         cusparseIndexBase_t idxBase,
+                                         cusparseCsr2CscAlg_t alg,
+                                         void* buffer,
+                                         cudaStream_t stream);
 
-template <>
-inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
-                                        int m,
-                                        int n,
-                                        int nnz,
-                                        const float* csrVal,
-                                        const int* csrRowPtr,
-                                        const int* csrColInd,
-                                        void* cscVal,
-                                        int* cscColPtr,
-                                        int* cscRowInd,
-                                        cusparseAction_t copyValues,
-                                        cusparseIndexBase_t idxBase,
-                                        cusparseCsr2CscAlg_t alg,
-                                        void* buffer,
-                                        cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+        template <>
+        inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
+                                                int m,
+                                                int n,
+                                                int nnz,
+                                                const float* csrVal,
+                                                const int* csrRowPtr,
+                                                const int* csrColInd,
+                                                void* cscVal,
+                                                int* cscColPtr,
+                                                int* cscRowInd,
+                                                cusparseAction_t copyValues,
+                                                cusparseIndexBase_t idxBase,
+                                                cusparseCsr2CscAlg_t alg,
+                                                void* buffer,
+                                                cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
-  return cusparseCsr2cscEx2(handle,
-                            m,
-                            n,
-                            nnz,
-                            csrVal,
-                            csrRowPtr,
-                            csrColInd,
-                            cscVal,
-                            cscColPtr,
-                            cscRowInd,
-                            CUDA_R_32F,
-                            copyValues,
-                            idxBase,
-                            alg,
-                            buffer);
-}
+            return cusparseCsr2cscEx2(handle,
+                                      m,
+                                      n,
+                                      nnz,
+                                      csrVal,
+                                      csrRowPtr,
+                                      csrColInd,
+                                      cscVal,
+                                      cscColPtr,
+                                      cscRowInd,
+                                      CUDA_R_32F,
+                                      copyValues,
+                                      idxBase,
+                                      alg,
+                                      buffer);
+        }
 
-template <>
-inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
-                                        int m,
-                                        int n,
-                                        int nnz,
-                                        const double* csrVal,
-                                        const int* csrRowPtr,
-                                        const int* csrColInd,
-                                        void* cscVal,
-                                        int* cscColPtr,
-                                        int* cscRowInd,
-                                        cusparseAction_t copyValues,
-                                        cusparseIndexBase_t idxBase,
-                                        cusparseCsr2CscAlg_t alg,
-                                        void* buffer,
-                                        cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+        template <>
+        inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
+                                                int m,
+                                                int n,
+                                                int nnz,
+                                                const double* csrVal,
+                                                const int* csrRowPtr,
+                                                const int* csrColInd,
+                                                void* cscVal,
+                                                int* cscColPtr,
+                                                int* cscRowInd,
+                                                cusparseAction_t copyValues,
+                                                cusparseIndexBase_t idxBase,
+                                                cusparseCsr2CscAlg_t alg,
+                                                void* buffer,
+                                                cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
-  return cusparseCsr2cscEx2(handle,
-                            m,
-                            n,
-                            nnz,
-                            csrVal,
-                            csrRowPtr,
-                            csrColInd,
-                            cscVal,
-                            cscColPtr,
-                            cscRowInd,
-                            CUDA_R_64F,
-                            copyValues,
-                            idxBase,
-                            alg,
-                            buffer);
-}
+            return cusparseCsr2cscEx2(handle,
+                                      m,
+                                      n,
+                                      nnz,
+                                      csrVal,
+                                      csrRowPtr,
+                                      csrColInd,
+                                      cscVal,
+                                      cscColPtr,
+                                      cscRowInd,
+                                      CUDA_R_64F,
+                                      copyValues,
+                                      idxBase,
+                                      alg,
+                                      buffer);
+        }
 
 /** @} */
 
@@ -757,50 +1226,505 @@ inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
  * @{
  */
 
-template <typename T>
-cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
-                                   int m,
-                                   int n,
-                                   const cusparseMatDescr_t descrA,
-                                   const T* csrValA,
-                                   const int* csrRowPtrA,
-                                   const int* csrColIndA,
-                                   T* A,
-                                   int lda,
-                                   cudaStream_t stream);
+        template <typename T>
+        cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
+                                                        int m,
+                                                        int n,
+                                                        int k,
+                                                        const T* alpha,
+                                                        const T* beta,
+                                                        const cusparseMatDescr_t matA,
+                                                        int nnzA,
+                                                        const int* rowindA,
+                                                        const int* indicesA,
+                                                        const cusparseMatDescr_t matB,
+                                                        int nnzB,
+                                                        const int* rowindB,
+                                                        const int* indicesB,
+                                                        const cusparseMatDescr_t matD,
+                                                        int nnzD,
+                                                        const int* rowindD,
+                                                        const int* indicesD,
+                                                        csrgemm2Info_t info,
+                                                        size_t* pBufferSizeInBytes,
+                                                        cudaStream_t stream);
 
-template <>
-inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
-                                          int m,
-                                          int n,
-                                          const cusparseMatDescr_t descrA,
-                                          const float* csrValA,
-                                          const int* csrRowPtrA,
-                                          const int* csrColIndA,
-                                          float* A,
-                                          int lda,
-                                          cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseScsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda);
-}
-template <>
-inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
+        template <>
+        inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
+                                                               int m,
+                                                               int n,
+                                                               int k,
+                                                               const float* alpha,
+                                                               const float* beta,
+                                                               const cusparseMatDescr_t matA,
+                                                               int nnzA,
+                                                               const int* rowindA,
+                                                               const int* indicesA,
+                                                               const cusparseMatDescr_t matB,
+                                                               int nnzB,
+                                                               const int* rowindB,
+                                                               const int* indicesB,
+                                                               const cusparseMatDescr_t matD,
+                                                               int nnzD,
+                                                               const int* rowindD,
+                                                               const int* indicesD,
+                                                               csrgemm2Info_t info,
+                                                               size_t* pBufferSizeInBytes,
+                                                               cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+            return cusparseScsrgemm2_bufferSizeExt(handle,
+                                                   m,
+                                                   n,
+                                                   k,
+                                                   alpha,
+                                                   matA,
+                                                   nnzA,
+                                                   rowindA,
+                                                   indicesA,
+                                                   matB,
+                                                   nnzB,
+                                                   rowindB,
+                                                   indicesB,
+                                                   beta,
+                                                   matD,
+                                                   nnzD,
+                                                   rowindD,
+                                                   indicesD,
+                                                   info,
+                                                   pBufferSizeInBytes);
+#pragma GCC diagnostic pop
+        }
+
+        template <>
+        inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
+                                                               int m,
+                                                               int n,
+                                                               int k,
+                                                               const double* alpha,
+                                                               const double* beta,
+                                                               const cusparseMatDescr_t matA,
+                                                               int nnzA,
+                                                               const int* rowindA,
+                                                               const int* indicesA,
+                                                               const cusparseMatDescr_t matB,
+                                                               int nnzB,
+                                                               const int* rowindB,
+                                                               const int* indicesB,
+                                                               const cusparseMatDescr_t matD,
+                                                               int nnzD,
+                                                               const int* rowindD,
+                                                               const int* indicesD,
+                                                               csrgemm2Info_t info,
+                                                               size_t* pBufferSizeInBytes,
+                                                               cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+            return cusparseDcsrgemm2_bufferSizeExt(handle,
+                                                   m,
+                                                   n,
+                                                   k,
+                                                   alpha,
+                                                   matA,
+                                                   nnzA,
+                                                   rowindA,
+                                                   indicesA,
+                                                   matB,
+                                                   nnzB,
+                                                   rowindB,
+                                                   indicesB,
+                                                   beta,
+                                                   matD,
+                                                   nnzD,
+                                                   rowindD,
+                                                   indicesD,
+                                                   info,
+                                                   pBufferSizeInBytes);
+#pragma GCC diagnostic pop
+        }
+
+        inline cusparseStatus_t cusparsecsrgemm2nnz(cusparseHandle_t handle,
+                                                    int m,
+                                                    int n,
+                                                    int k,
+                                                    const cusparseMatDescr_t matA,
+                                                    int nnzA,
+                                                    const int* rowindA,
+                                                    const int* indicesA,
+                                                    const cusparseMatDescr_t matB,
+                                                    int nnzB,
+                                                    const int* rowindB,
+                                                    const int* indicesB,
+                                                    const cusparseMatDescr_t matD,
+                                                    int nnzD,
+                                                    const int* rowindD,
+                                                    const int* indicesD,
+                                                    const cusparseMatDescr_t matC,
+                                                    int* rowindC,
+                                                    int* nnzC,
+                                                    const csrgemm2Info_t info,
+                                                    void* pBuffer,
+                                                    cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+            return cusparseXcsrgemm2Nnz(handle,
+                                        m,
+                                        n,
+                                        k,
+                                        matA,
+                                        nnzA,
+                                        rowindA,
+                                        indicesA,
+                                        matB,
+                                        nnzB,
+                                        rowindB,
+                                        indicesB,
+                                        matD,
+                                        nnzD,
+                                        rowindD,
+                                        indicesD,
+                                        matC,
+                                        rowindC,
+                                        nnzC,
+                                        info,
+                                        pBuffer);
+#pragma GCC diagnostic pop
+        }
+
+        template <typename T>
+        cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
                                           int m,
                                           int n,
+                                          int k,
+                                          const T* alpha,
                                           const cusparseMatDescr_t descrA,
-                                          const double* csrValA,
+                                          int nnzA,
+                                          const T* csrValA,
                                           const int* csrRowPtrA,
                                           const int* csrColIndA,
-                                          double* A,
-                                          int lda,
-                                          cudaStream_t stream)
-{
-  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  return cusparseDcsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda);
-}
+                                          const cusparseMatDescr_t descrB,
+                                          int nnzB,
+                                          const T* csrValB,
+                                          const int* csrRowPtrB,
+                                          const int* csrColIndB,
+                                          const T* beta,
+                                          const cusparseMatDescr_t descrD,
+                                          int nnzD,
+                                          const T* csrValD,
+                                          const int* csrRowPtrD,
+                                          const int* csrColIndD,
+                                          const cusparseMatDescr_t descrC,
+                                          T* csrValC,
+                                          const int* csrRowPtrC,
+                                          int* csrColIndC,
+                                          const csrgemm2Info_t info,
+                                          void* pBuffer,
+                                          cudaStream_t stream);
+
+        template <>
+        inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
+                                                 int m,
+                                                 int n,
+                                                 int k,
+                                                 const float* alpha,
+                                                 const cusparseMatDescr_t descrA,
+                                                 int nnzA,
+                                                 const float* csrValA,
+                                                 const int* csrRowPtrA,
+                                                 const int* csrColIndA,
+                                                 const cusparseMatDescr_t descrB,
+                                                 int nnzB,
+                                                 const float* csrValB,
+                                                 const int* csrRowPtrB,
+                                                 const int* csrColIndB,
+                                                 const float* beta,
+                                                 const cusparseMatDescr_t descrD,
+                                                 int nnzD,
+                                                 const float* csrValD,
+                                                 const int* csrRowPtrD,
+                                                 const int* csrColIndD,
+                                                 const cusparseMatDescr_t descrC,
+                                                 float* csrValC,
+                                                 const int* csrRowPtrC,
+                                                 int* csrColIndC,
+                                                 const csrgemm2Info_t info,
+                                                 void* pBuffer,
+                                                 cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+            return cusparseScsrgemm2(handle,
+                                     m,
+                                     n,
+                                     k,
+                                     alpha,
+                                     descrA,
+                                     nnzA,
+                                     csrValA,
+                                     csrRowPtrA,
+                                     csrColIndA,
+                                     descrB,
+                                     nnzB,
+                                     csrValB,
+                                     csrRowPtrB,
+                                     csrColIndB,
+                                     beta,
+                                     descrD,
+                                     nnzD,
+                                     csrValD,
+                                     csrRowPtrD,
+                                     csrColIndD,
+                                     descrC,
+                                     csrValC,
+                                     csrRowPtrC,
+                                     csrColIndC,
+                                     info,
+                                     pBuffer);
+#pragma GCC diagnostic pop
+        }
+
+        template <>
+        inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
+                                                 int m,
+                                                 int n,
+                                                 int k,
+                                                 const double* alpha,
+                                                 const cusparseMatDescr_t descrA,
+                                                 int nnzA,
+                                                 const double* csrValA,
+                                                 const int* csrRowPtrA,
+                                                 const int* csrColIndA,
+                                                 const cusparseMatDescr_t descrB,
+                                                 int nnzB,
+                                                 const double* csrValB,
+                                                 const int* csrRowPtrB,
+                                                 const int* csrColIndB,
+                                                 const double* beta,
+                                                 const cusparseMatDescr_t descrD,
+                                                 int nnzD,
+                                                 const double* csrValD,
+                                                 const int* csrRowPtrD,
+                                                 const int* csrColIndD,
+                                                 const cusparseMatDescr_t descrC,
+                                                 double* csrValC,
+                                                 const int* csrRowPtrC,
+                                                 int* csrColIndC,
+                                                 const csrgemm2Info_t info,
+                                                 void* pBuffer,
+                                                 cudaStream_t stream)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+            return cusparseDcsrgemm2(handle,
+                                     m,
+                                     n,
+                                     k,
+                                     alpha,
+                                     descrA,
+                                     nnzA,
+                                     csrValA,
+                                     csrRowPtrA,
+                                     csrColIndA,
+                                     descrB,
+                                     nnzB,
+                                     csrValB,
+                                     csrRowPtrB,
+                                     csrColIndB,
+                                     beta,
+                                     descrD,
+                                     nnzD,
+                                     csrValD,
+                                     csrRowPtrD,
+                                     csrColIndD,
+                                     descrC,
+                                     csrValC,
+                                     csrRowPtrC,
+                                     csrColIndC,
+                                     info,
+                                     pBuffer);
+#pragma GCC diagnostic pop
+        }
 
 /** @} */
 
-}  // namespace sparse
-}  // namespace raft
+/**
+ * @defgroup csrgemm2 cusparse sparse gemm operations
+ * @{
+ */
+
+        template <typename T>
+        cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
+                                           int m,
+                                           int n,
+                                           int nnz,
+                                           const cusparseMatDescr_t descrA,
+                                           const T* csrValA,
+                                           const int* csrRowPtrA,
+                                           const int* csrColIndA,
+                                           T* A,
+                                           int lda,
+                                           size_t *buffer_size,
+                                           bool row_major = false,
+                                           cudaStream_t stream);
+
+        template <>
+        cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
+                                                      int m,
+                                                      int n,
+                                                      int nnz,
+                                                      const cusparseMatDescr_t descrA,
+                                                      const float* csrValA,
+                                                      const int* csrRowPtrA,
+                                                      const int* csrColIndA,
+                                                      float* A,
+                                                      int lda,
+                                                      size_t *buffer_size,
+                                                      bool row_major = false,
+                                                      cudaStream_t stream) {
+            cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL
+            cusparseSpMatDescr_t matA;
+            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
+                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
+                              CUDA_R_32F);
+
+            cusparseDnMatDescr_t matB;
+            cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL);
+
+            cusparseStatus_t result = cusparseSparseToDense_bufferSize(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
+
+
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+
+            return result;
+
+        }
+
+        template <>
+        cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
+                                                      int m,
+                                                      int n,
+                                                      int nnz,
+                                                      const cusparseMatDescr_t descrA,
+                                                      const double* csrValA,
+                                                      const int* csrRowPtrA,
+                                                      const int* csrColIndA,
+                                                      double* A,
+                                                      int lda,
+                                                      size_t *buffer_size,
+                                                      bool row_major = false,
+                                                      cudaStream_t stream) {
+            cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL
+            cusparseSpMatDescr_t matA;
+            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
+                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
+                              CUDA_R_64F);
+
+            cusparseDnMatDescr_t matB;
+            cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL);
+
+            cusparseStatus_t result = cusparseSparseToDense_bufferSize(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
+
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+
+            return result;
+
+        }
+
+
+
+        template <typename T>
+        cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
+                                           int m,
+                                           int n,
+                                           int nnz,
+                                           const cusparseMatDescr_t descrA,
+                                           const T* csrValA,
+                                           const int* csrRowPtrA,
+                                           const int* csrColIndA,
+                                           T* A,
+                                           int lda,
+                                           void *buffer,
+                                           cudaStream_t stream,
+                                           bool row_major = false);
+
+        template <>
+        inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
+                                                  int m,
+                                                  int n,
+                                                  int nnz,
+                                                  const cusparseMatDescr_t descrA,
+                                                  const float* csrValA,
+                                                  const int* csrRowPtrA,
+                                                  const int* csrColIndA,
+                                                  float* A,
+                                                  int lda,
+                                                  void *buffer,
+                                                  cudaStream_t stream,
+                                                  bool row_major = false)
+        {
+
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+            cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL
+            cusparseSpMatDescr_t matA;
+            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
+                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
+                              CUDA_R_32F);
+
+            cusparseDnMatDescr_t matB;
+            cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL);
+
+            cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
+
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+
+            return result;
+        }
+        template <>
+        inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
+                                                  int m,
+                                                  int n,
+                                                  int nnz,
+                                                  const cusparseMatDescr_t descrA,
+                                                  const double* csrValA,
+                                                  const int* csrRowPtrA,
+                                                  const int* csrColIndA,
+                                                  double* A,
+                                                  int lda,
+                                                  void *buffer,
+                                                  cudaStream_t stream,
+                                                  bool row_major = false)
+        {
+            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+            cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL
+            cusparseSpMatDescr_t matA;
+            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
+                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
+                              CUDA_R_64F);
+
+            cusparseDnMatDescr_t matB;
+            cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL);
+
+            cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
+
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+
+            return result;
+        }
+
+/** @} */
+
+    }  // namespace sparse
+}  // namespace raft
\ No newline at end of file

From 5e16bdc7fb04e485adbb4bd82b3c72fbfe86b6c8 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 31 Jan 2022 15:38:48 -0500
Subject: [PATCH 03/16] Separating cusparse macros so the cusparse wrappers
 don't need to  be explicitly compiled w/ the handle

---
 cpp/include/raft/handle.hpp                   |   2 +-
 cpp/include/raft/sparse/cusparse_wrappers.h   | 106 +-----------------
 .../raft/sparse/detail/cusparse_macros.h      | 105 +++++++++++++++++
 3 files changed, 107 insertions(+), 106 deletions(-)
 create mode 100644 cpp/include/raft/sparse/detail/cusparse_macros.h

diff --git a/cpp/include/raft/handle.hpp b/cpp/include/raft/handle.hpp
index 6421ba5344..8a55df114d 100644
--- a/cpp/include/raft/handle.hpp
+++ b/cpp/include/raft/handle.hpp
@@ -36,7 +36,7 @@
 #include <raft/comms/comms.hpp>
 #include <raft/linalg/cublas_wrappers.h>
 #include <raft/linalg/cusolver_wrappers.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/cusparse_macros.h>
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h
index ba9f40fd7e..edd26748fb 100644
--- a/cpp/include/raft/sparse/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/cusparse_wrappers.h
@@ -17,111 +17,7 @@
 #pragma once
 
 #include <raft/error.hpp>
-
-#include <cusparse_v2.h>
-///@todo: enable this once logging is enabled
-//#include <cuml/common/logger.hpp>
-
-#define _CUSPARSE_ERR_TO_STR(err) \
-  case err: return #err;
-
-// Notes:
-//(1.) CUDA_VER_10_1_UP aggregates all the CUDA version selection logic;
-//(2.) to enforce a lower version,
-//
-//`#define CUDA_ENFORCE_LOWER
-// #include <raft/sparse/cusparse_wrappers.h>`
-//
-// (i.e., before including this header)
-//
-#define CUDA_VER_10_1_UP (CUDART_VERSION >= 10100)
-
-namespace raft {
-
-/**
- * @brief Exception thrown when a cuSparse error is encountered.
- */
-    struct cusparse_error : public raft::exception {
-        explicit cusparse_error(char const* const message) : raft::exception(message) {}
-        explicit cusparse_error(std::string const& message) : raft::exception(message) {}
-    };
-
-    namespace sparse {
-        namespace detail {
-
-            inline const char* cusparse_error_to_string(cusparseStatus_t err)
-            {
-#if defined(CUDART_VERSION) && CUDART_VERSION >= 10100
-                return cusparseGetErrorString(err);
-#else   // CUDART_VERSION
-                switch (err) {
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
-                    default: return "CUSPARSE_STATUS_UNKNOWN";
-                };
-#endif  // CUDART_VERSION
-            }
-
-        }  // namespace detail
-    }  // namespace sparse
-}  // namespace raft
-
-#undef _CUSPARSE_ERR_TO_STR
-
-/**
- * @brief Error checking macro for cuSparse runtime API functions.
- *
- * Invokes a cuSparse runtime API function call, if the call does not return
- * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred
- */
-#define RAFT_CUSPARSE_TRY(call)                                              \
-  do {                                                                       \
-    cusparseStatus_t const status = (call);                                  \
-    if (CUSPARSE_STATUS_SUCCESS != status) {                                 \
-      std::string msg{};                                                     \
-      SET_ERROR_MSG(msg,                                                     \
-                    "cuSparse error encountered at: ",                       \
-                    "call='%s', Reason=%d:%s",                               \
-                    #call,                                                   \
-                    status,                                                  \
-                    raft::sparse::detail::cusparse_error_to_string(status)); \
-      throw raft::cusparse_error(msg);                                       \
-    }                                                                        \
-  } while (0)
-
-// FIXME: Remove after consumer rename
-#ifndef CUSPARSE_TRY
-#define CUSPARSE_TRY(call) RAFT_CUSPARSE_TRY(call)
-#endif
-
-// FIXME: Remove after consumer rename
-#ifndef CUSPARSE_CHECK
-#define CUSPARSE_CHECK(call) CUSPARSE_TRY(call)
-#endif
-
-//@todo: use logger here once logging is enabled
-/** check for cusparse runtime API errors but do not assert */
-#define RAFT_CUSPARSE_TRY_NO_THROW(call)                           \
-  do {                                                             \
-    cusparseStatus_t err = call;                                   \
-    if (err != CUSPARSE_STATUS_SUCCESS) {                          \
-      printf("CUSPARSE call='%s' got errorcode=%d err=%s",         \
-             #call,                                                \
-             err,                                                  \
-             raft::sparse::detail::cusparse_error_to_string(err)); \
-    }                                                              \
-  } while (0)
-
-// FIXME: Remove after consumer rename
-#ifndef CUSPARSE_CHECK_NO_THROW
-#define CUSPARSE_CHECK_NO_THROW(call) RAFT_CUSPARSE_TRY_NO_THROW(call)
-#endif
+#include <raft/sparse/detail/cusparse_macros.h>
 
 namespace raft {
     namespace sparse {
diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h
new file mode 100644
index 0000000000..a604cb57f2
--- /dev/null
+++ b/cpp/include/raft/sparse/detail/cusparse_macros.h
@@ -0,0 +1,105 @@
+#include <raft/error.hpp>
+#include <cusparse_v2.h>
+///@todo: enable this once logging is enabled
+//#include <cuml/common/logger.hpp>
+
+#define _CUSPARSE_ERR_TO_STR(err) \
+  case err: return #err;
+
+// Notes:
+//(1.) CUDA_VER_10_1_UP aggregates all the CUDA version selection logic;
+//(2.) to enforce a lower version,
+//
+//`#define CUDA_ENFORCE_LOWER
+// #include <raft/sparse/cusparse_wrappers.h>`
+//
+// (i.e., before including this header)
+//
+#define CUDA_VER_10_1_UP (CUDART_VERSION >= 10100)
+
+namespace raft {
+
+/**
+ * @brief Exception thrown when a cuSparse error is encountered.
+ */
+struct cusparse_error : public raft::exception {
+    explicit cusparse_error(char const* const message) : raft::exception(message) {}
+    explicit cusparse_error(std::string const& message) : raft::exception(message) {}
+};
+
+namespace sparse {
+namespace detail {
+
+    inline const char* cusparse_error_to_string(cusparseStatus_t err)
+    {
+#if defined(CUDART_VERSION) && CUDART_VERSION >= 10100
+                return cusparseGetErrorString(err);
+#else   // CUDART_VERSION
+                switch (err) {
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR);
+                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
+                    default: return "CUSPARSE_STATUS_UNKNOWN";
+                };
+#endif  // CUDART_VERSION
+            }
+
+        }  // namespace detail
+    }  // namespace sparse
+}  // namespace raft
+
+#undef _CUSPARSE_ERR_TO_STR
+
+/**
+ * @brief Error checking macro for cuSparse runtime API functions.
+ *
+ * Invokes a cuSparse runtime API function call, if the call does not return
+ * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred
+ */
+#define RAFT_CUSPARSE_TRY(call)                                              \
+  do {                                                                       \
+    cusparseStatus_t const status = (call);                                  \
+    if (CUSPARSE_STATUS_SUCCESS != status) {                                 \
+      std::string msg{};                                                     \
+      SET_ERROR_MSG(msg,                                                     \
+                    "cuSparse error encountered at: ",                       \
+                    "call='%s', Reason=%d:%s",                               \
+                    #call,                                                   \
+                    status,                                                  \
+                    raft::sparse::detail::cusparse_error_to_string(status)); \
+      throw raft::cusparse_error(msg);                                       \
+    }                                                                        \
+  } while (0)
+
+// FIXME: Remove after consumer rename
+#ifndef CUSPARSE_TRY
+#define CUSPARSE_TRY(call) RAFT_CUSPARSE_TRY(call)
+#endif
+
+// FIXME: Remove after consumer rename
+#ifndef CUSPARSE_CHECK
+#define CUSPARSE_CHECK(call) CUSPARSE_TRY(call)
+#endif
+
+//@todo: use logger here once logging is enabled
+/** check for cusparse runtime API errors but do not assert */
+#define RAFT_CUSPARSE_TRY_NO_THROW(call)                           \
+  do {                                                             \
+    cusparseStatus_t err = call;                                   \
+    if (err != CUSPARSE_STATUS_SUCCESS) {                          \
+      printf("CUSPARSE call='%s' got errorcode=%d err=%s",         \
+             #call,                                                \
+             err,                                                  \
+             raft::sparse::detail::cusparse_error_to_string(err)); \
+    }                                                              \
+  } while (0)
+
+// FIXME: Remove after consumer rename
+#ifndef CUSPARSE_CHECK_NO_THROW
+#define CUSPARSE_CHECK_NO_THROW(call) RAFT_CUSPARSE_TRY_NO_THROW(call)
+#endif

From 2b2d3632be031b981f2de9fccf7ef91cdebc6480 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 31 Jan 2022 15:44:06 -0500
Subject: [PATCH 04/16] Fixing style

---
 .../raft/sparse/convert/detail/dense.cuh      |   28 +-
 cpp/include/raft/sparse/cusparse_wrappers.h   | 2358 +++++++++--------
 .../raft/sparse/detail/cusparse_macros.h      |   40 +-
 3 files changed, 1262 insertions(+), 1164 deletions(-)

diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh
index d826d5ea00..3c18a40526 100644
--- a/cpp/include/raft/sparse/convert/detail/dense.cuh
+++ b/cpp/include/raft/sparse/convert/detail/dense.cuh
@@ -98,13 +98,33 @@ void csr_to_dense(cusparseHandle_t handle,
     RAFT_CUSPARSE_TRY(cusparseSetMatType(out_mat, CUSPARSE_MATRIX_TYPE_GENERAL));
 
     size_t buffer_size;
-      RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense_buffersize(
-              handle, nrows, ncols, nnz, out_mat, csr_data, csr_indptr, csr_indices, out, lda, &buffer_size, stream));
+    RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense_buffersize(handle,
+                                                                 nrows,
+                                                                 ncols,
+                                                                 nnz,
+                                                                 out_mat,
+                                                                 csr_data,
+                                                                 csr_indptr,
+                                                                 csr_indices,
+                                                                 out,
+                                                                 lda,
+                                                                 &buffer_size,
+                                                                 stream));
 
     rmm::device_uvector<char> buffer(buffer_size, stream);
 
-    RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense(
-      handle, nrows, ncols, nnz, out_mat, csr_data, csr_indptr, csr_indices, out, lda, buffer.data(), stream));
+    RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense(handle,
+                                                      nrows,
+                                                      ncols,
+                                                      nnz,
+                                                      out_mat,
+                                                      csr_data,
+                                                      csr_indptr,
+                                                      csr_indices,
+                                                      out,
+                                                      lda,
+                                                      buffer.data(),
+                                                      stream));
 
     RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyMatDescr(out_mat));
 
diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h
index edd26748fb..a28d218960 100644
--- a/cpp/include/raft/sparse/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/cusparse_wrappers.h
@@ -20,195 +20,195 @@
 #include <raft/sparse/detail/cusparse_macros.h>
 
 namespace raft {
-    namespace sparse {
+namespace sparse {
 
 /**
  * @defgroup gthr cusparse gather methods
  * @{
  */
-        template <typename T>
-        cusparseStatus_t cusparsegthr(
-                cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream);
-        template <>
-        inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle,
-                                             int nnz,
-                                             const double* vals,
-                                             double* vals_sorted,
-                                             int* d_P,
-                                             cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+template <typename T>
+cusparseStatus_t cusparsegthr(
+  cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream);
+template <>
+inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle,
+                                     int nnz,
+                                     const double* vals,
+                                     double* vals_sorted,
+                                     int* d_P,
+                                     cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-            return cusparseDgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO);
+  return cusparseDgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO);
 #pragma GCC diagnostic pop
-        }
-        template <>
-        inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle,
-                                             int nnz,
-                                             const float* vals,
-                                             float* vals_sorted,
-                                             int* d_P,
-                                             cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+}
+template <>
+inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle,
+                                     int nnz,
+                                     const float* vals,
+                                     float* vals_sorted,
+                                     int* d_P,
+                                     cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-            return cusparseSgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO);
+  return cusparseSgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO);
 #pragma GCC diagnostic pop
-        }
+}
 /** @} */
 
 /**
  * @defgroup coo2csr cusparse COO to CSR converter methods
  * @{
  */
-        template <typename T>
-        void cusparsecoo2csr(
-                cusparseHandle_t handle, const T* cooRowInd, int nnz, int m, T* csrRowPtr, cudaStream_t stream);
-        template <>
-        inline void cusparsecoo2csr(cusparseHandle_t handle,
-                                    const int* cooRowInd,
-                                    int nnz,
-                                    int m,
-                                    int* csrRowPtr,
-                                    cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            CUSPARSE_CHECK(cusparseXcoo2csr(handle, cooRowInd, nnz, m, csrRowPtr, CUSPARSE_INDEX_BASE_ZERO));
-        }
+template <typename T>
+void cusparsecoo2csr(
+  cusparseHandle_t handle, const T* cooRowInd, int nnz, int m, T* csrRowPtr, cudaStream_t stream);
+template <>
+inline void cusparsecoo2csr(cusparseHandle_t handle,
+                            const int* cooRowInd,
+                            int nnz,
+                            int m,
+                            int* csrRowPtr,
+                            cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  CUSPARSE_CHECK(cusparseXcoo2csr(handle, cooRowInd, nnz, m, csrRowPtr, CUSPARSE_INDEX_BASE_ZERO));
+}
 /** @} */
 
 /**
  * @defgroup coosort cusparse coo sort methods
  * @{
  */
-        template <typename T>
-        size_t cusparsecoosort_bufferSizeExt(  // NOLINT
-                cusparseHandle_t handle,
-                int m,
-                int n,
-                int nnz,
-                const T* cooRows,
-                const T* cooCols,
-                cudaStream_t stream);
-        template <>
-        inline size_t cusparsecoosort_bufferSizeExt(  // NOLINT
-                cusparseHandle_t handle,
-                int m,
-                int n,
-                int nnz,
-                const int* cooRows,
-                const int* cooCols,
-                cudaStream_t stream)
-        {
-            size_t val;
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            CUSPARSE_CHECK(cusparseXcoosort_bufferSizeExt(handle, m, n, nnz, cooRows, cooCols, &val));
-            return val;
-        }
-
-        template <typename T>
-        void cusparsecoosortByRow(  // NOLINT
-                cusparseHandle_t handle,
-                int m,
-                int n,
-                int nnz,
-                T* cooRows,
-                T* cooCols,
-                T* P,
-                void* pBuffer,
-                cudaStream_t stream);
-        template <>
-        inline void cusparsecoosortByRow(  // NOLINT
-                cusparseHandle_t handle,
-                int m,
-                int n,
-                int nnz,
-                int* cooRows,
-                int* cooCols,
-                int* P,
-                void* pBuffer,
-                cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            CUSPARSE_CHECK(cusparseXcoosortByRow(handle, m, n, nnz, cooRows, cooCols, P, pBuffer));
-        }
+template <typename T>
+size_t cusparsecoosort_bufferSizeExt(  // NOLINT
+  cusparseHandle_t handle,
+  int m,
+  int n,
+  int nnz,
+  const T* cooRows,
+  const T* cooCols,
+  cudaStream_t stream);
+template <>
+inline size_t cusparsecoosort_bufferSizeExt(  // NOLINT
+  cusparseHandle_t handle,
+  int m,
+  int n,
+  int nnz,
+  const int* cooRows,
+  const int* cooCols,
+  cudaStream_t stream)
+{
+  size_t val;
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  CUSPARSE_CHECK(cusparseXcoosort_bufferSizeExt(handle, m, n, nnz, cooRows, cooCols, &val));
+  return val;
+}
+
+template <typename T>
+void cusparsecoosortByRow(  // NOLINT
+  cusparseHandle_t handle,
+  int m,
+  int n,
+  int nnz,
+  T* cooRows,
+  T* cooCols,
+  T* P,
+  void* pBuffer,
+  cudaStream_t stream);
+template <>
+inline void cusparsecoosortByRow(  // NOLINT
+  cusparseHandle_t handle,
+  int m,
+  int n,
+  int nnz,
+  int* cooRows,
+  int* cooCols,
+  int* P,
+  void* pBuffer,
+  cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  CUSPARSE_CHECK(cusparseXcoosortByRow(handle, m, n, nnz, cooRows, cooCols, P, pBuffer));
+}
 /** @} */
 
 /**
  * @defgroup Gemmi cusparse gemmi operations
  * @{
  */
-        template <typename T>
-        cusparseStatus_t cusparsegemmi(  // NOLINT
-                cusparseHandle_t handle,
-                int m,
-                int n,
-                int k,
-                int nnz,
-                const T* alpha,
-                const T* A,
-                int lda,
-                const T* cscValB,
-                const int* cscColPtrB,
-                const int* cscRowIndB,
-                const T* beta,
-                T* C,
-                int ldc,
-                cudaStream_t stream);
-        template <>
-        inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int k,
-                                              int nnz,
-                                              const float* alpha,
-                                              const float* A,
-                                              int lda,
-                                              const float* cscValB,
-                                              const int* cscColPtrB,
-                                              const int* cscRowIndB,
-                                              const float* beta,
-                                              float* C,
-                                              int ldc,
-                                              cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+template <typename T>
+cusparseStatus_t cusparsegemmi(  // NOLINT
+  cusparseHandle_t handle,
+  int m,
+  int n,
+  int k,
+  int nnz,
+  const T* alpha,
+  const T* A,
+  int lda,
+  const T* cscValB,
+  const int* cscColPtrB,
+  const int* cscRowIndB,
+  const T* beta,
+  T* C,
+  int ldc,
+  cudaStream_t stream);
+template <>
+inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle,
+                                      int m,
+                                      int n,
+                                      int k,
+                                      int nnz,
+                                      const float* alpha,
+                                      const float* A,
+                                      int lda,
+                                      const float* cscValB,
+                                      const int* cscColPtrB,
+                                      const int* cscRowIndB,
+                                      const float* beta,
+                                      float* C,
+                                      int ldc,
+                                      cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-            return cusparseSgemmi(
-                    handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc);
+  return cusparseSgemmi(
+    handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc);
 #pragma GCC diagnostic pop
-        }
-        template <>
-        inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int k,
-                                              int nnz,
-                                              const double* alpha,
-                                              const double* A,
-                                              int lda,
-                                              const double* cscValB,
-                                              const int* cscColPtrB,
-                                              const int* cscRowIndB,
-                                              const double* beta,
-                                              double* C,
-                                              int ldc,
-                                              cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+}
+template <>
+inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle,
+                                      int m,
+                                      int n,
+                                      int k,
+                                      int nnz,
+                                      const double* alpha,
+                                      const double* A,
+                                      int lda,
+                                      const double* cscValB,
+                                      const int* cscColPtrB,
+                                      const int* cscRowIndB,
+                                      const double* beta,
+                                      double* C,
+                                      int ldc,
+                                      cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-            return cusparseDgemmi(
-                    handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc);
+  return cusparseDgemmi(
+    handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc);
 #pragma GCC diagnostic pop
-        }
+}
 /** @} */
 
 #if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
-        /**
+/**
  * @defgroup cusparse Create CSR operations
  * @{
  */
@@ -455,67 +455,67 @@ inline cusparseStatus_t cusparsespmv(cusparseHandle_t handle,
  * @defgroup Csrmv cusparse csrmv operations
  * @{
  */
-        template <typename T>
-        cusparseStatus_t cusparsecsrmv(  // NOLINT
-                cusparseHandle_t handle,
-                cusparseOperation_t trans,
-                int m,
-                int n,
-                int nnz,
-                const T* alpha,
-                const cusparseMatDescr_t descr,
-                const T* csrVal,
-                const int* csrRowPtr,
-                const int* csrColInd,
-                const T* x,
-                const T* beta,
-                T* y,
-                cudaStream_t stream);
-        template <>
-        inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const float* alpha,
-                                              const cusparseMatDescr_t descr,
-                                              const float* csrVal,
-                                              const int* csrRowPtr,
-                                              const int* csrColInd,
-                                              const float* x,
-                                              const float* beta,
-                                              float* y,
-                                              cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            return cusparseScsrmv(
-                    handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y);
-        }
-        template <>
-        inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const double* alpha,
-                                              const cusparseMatDescr_t descr,
-                                              const double* csrVal,
-                                              const int* csrRowPtr,
-                                              const int* csrColInd,
-                                              const double* x,
-                                              const double* beta,
-                                              double* y,
-                                              cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            return cusparseDcsrmv(
-                    handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y);
-        }
+template <typename T>
+cusparseStatus_t cusparsecsrmv(  // NOLINT
+  cusparseHandle_t handle,
+  cusparseOperation_t trans,
+  int m,
+  int n,
+  int nnz,
+  const T* alpha,
+  const cusparseMatDescr_t descr,
+  const T* csrVal,
+  const int* csrRowPtr,
+  const int* csrColInd,
+  const T* x,
+  const T* beta,
+  T* y,
+  cudaStream_t stream);
+template <>
+inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
+                                      cusparseOperation_t trans,
+                                      int m,
+                                      int n,
+                                      int nnz,
+                                      const float* alpha,
+                                      const cusparseMatDescr_t descr,
+                                      const float* csrVal,
+                                      const int* csrRowPtr,
+                                      const int* csrColInd,
+                                      const float* x,
+                                      const float* beta,
+                                      float* y,
+                                      cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  return cusparseScsrmv(
+    handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y);
+}
+template <>
+inline cusparseStatus_t cusparsecsrmv(cusparseHandle_t handle,
+                                      cusparseOperation_t trans,
+                                      int m,
+                                      int n,
+                                      int nnz,
+                                      const double* alpha,
+                                      const cusparseMatDescr_t descr,
+                                      const double* csrVal,
+                                      const int* csrRowPtr,
+                                      const int* csrColInd,
+                                      const double* x,
+                                      const double* beta,
+                                      double* y,
+                                      cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  return cusparseDcsrmv(
+    handle, trans, m, n, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, beta, y);
+}
 /** @} */
 #endif
 
 #if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
-        /**
+/**
  * @defgroup Csrmm cusparse csrmm operations
  * @{
  */
@@ -617,71 +617,71 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
  * @defgroup Csrmm cusparse csrmm operations
  * @{
  */
-        template <typename T>
-        cusparseStatus_t cusparsecsrmm(  // NOLINT
-                cusparseHandle_t handle,
-                cusparseOperation_t trans,
-                int m,
-                int n,
-                int k,
-                int nnz,
-                const T* alpha,
-                const cusparseMatDescr_t descr,
-                const T* csrVal,
-                const int* csrRowPtr,
-                const int* csrColInd,
-                const T* x,
-                const int ldx,
-                const T* beta,
-                T* y,
-                const int ldy,
-                cudaStream_t stream);
-        template <>
-        inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              int n,
-                                              int k,
-                                              int nnz,
-                                              const float* alpha,
-                                              const cusparseMatDescr_t descr,
-                                              const float* csrVal,
-                                              const int* csrRowPtr,
-                                              const int* csrColInd,
-                                              const float* x,
-                                              const int ldx,
-                                              const float* beta,
-                                              float* y,
-                                              const int ldy,
-                                              cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            return cusparseScsrmm(
-                    handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy);
-        }
-        template <>
-        inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle,
-                                              cusparseOperation_t trans,
-                                              int m,
-                                              int n,
-                                              int k,
-                                              int nnz,
-                                              const double* alpha,
-                                              const cusparseMatDescr_t descr,
-                                              const double* csrVal,
-                                              const int* csrRowPtr,
-                                              const int* csrColInd,
-                                              const double* x,
-                                              const int ldx,
-                                              const double* beta,
-                                              double* y,
-                                              const int ldy,
-                                              cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            return cusparseDcsrmm(
-                    handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy);
-        }
+template <typename T>
+cusparseStatus_t cusparsecsrmm(  // NOLINT
+  cusparseHandle_t handle,
+  cusparseOperation_t trans,
+  int m,
+  int n,
+  int k,
+  int nnz,
+  const T* alpha,
+  const cusparseMatDescr_t descr,
+  const T* csrVal,
+  const int* csrRowPtr,
+  const int* csrColInd,
+  const T* x,
+  const int ldx,
+  const T* beta,
+  T* y,
+  const int ldy,
+  cudaStream_t stream);
+template <>
+inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle,
+                                      cusparseOperation_t trans,
+                                      int m,
+                                      int n,
+                                      int k,
+                                      int nnz,
+                                      const float* alpha,
+                                      const cusparseMatDescr_t descr,
+                                      const float* csrVal,
+                                      const int* csrRowPtr,
+                                      const int* csrColInd,
+                                      const float* x,
+                                      const int ldx,
+                                      const float* beta,
+                                      float* y,
+                                      const int ldy,
+                                      cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  return cusparseScsrmm(
+    handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy);
+}
+template <>
+inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle,
+                                      cusparseOperation_t trans,
+                                      int m,
+                                      int n,
+                                      int k,
+                                      int nnz,
+                                      const double* alpha,
+                                      const cusparseMatDescr_t descr,
+                                      const double* csrVal,
+                                      const int* csrRowPtr,
+                                      const int* csrColInd,
+                                      const double* x,
+                                      const int ldx,
+                                      const double* beta,
+                                      double* y,
+                                      const int ldy,
+                                      cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  return cusparseDcsrmm(
+    handle, trans, m, n, k, nnz, alpha, descr, csrVal, csrRowPtr, csrColInd, x, ldx, beta, y, ldy);
+}
 /** @} */
 #endif
 
@@ -689,25 +689,25 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
  * @defgroup csr2coo cusparse CSR to COO converter methods
  * @{
  */
-        template <typename T>
-        void cusparsecsr2coo(  // NOLINT
-                cusparseHandle_t handle,
-                const int n,
-                const int nnz,
-                const T* csrRowPtr,
-                T* cooRowInd,
-                cudaStream_t stream);
-        template <>
-        inline void cusparsecsr2coo(cusparseHandle_t handle,
-                                    const int n,
-                                    const int nnz,
-                                    const int* csrRowPtr,
-                                    int* cooRowInd,
-                                    cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            CUSPARSE_CHECK(cusparseXcsr2coo(handle, csrRowPtr, nnz, n, cooRowInd, CUSPARSE_INDEX_BASE_ZERO));
-        }
+template <typename T>
+void cusparsecsr2coo(  // NOLINT
+  cusparseHandle_t handle,
+  const int n,
+  const int nnz,
+  const T* csrRowPtr,
+  T* cooRowInd,
+  cudaStream_t stream);
+template <>
+inline void cusparsecsr2coo(cusparseHandle_t handle,
+                            const int n,
+                            const int nnz,
+                            const int* csrRowPtr,
+                            int* cooRowInd,
+                            cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  CUSPARSE_CHECK(cusparseXcsr2coo(handle, csrRowPtr, nnz, n, cooRowInd, CUSPARSE_INDEX_BASE_ZERO));
+}
 /** @} */
 
 /**
@@ -722,214 +722,260 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
 //                                         cudaStream_t stream);
 
 // template<>
-        inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle,
-                                                       cusparsePointerMode_t mode,
-                                                       cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            return cusparseSetPointerMode(handle, mode);
-        }
+inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle,
+                                               cusparsePointerMode_t mode,
+                                               cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  return cusparseSetPointerMode(handle, mode);
+}
 /** @} */
 
 /**
  * @defgroup CsrmvEx cusparse csrmvex operations
  * @{
  */
-        template <typename T>
-        cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
-                                                    cusparseAlgMode_t alg,
-                                                    cusparseOperation_t transA,
-                                                    int m,
-                                                    int n,
-                                                    int nnz,
-                                                    const T* alpha,
-                                                    const cusparseMatDescr_t descrA,
-                                                    const T* csrValA,
-                                                    const int* csrRowPtrA,
-                                                    const int* csrColIndA,
-                                                    const T* x,
-                                                    const T* beta,
-                                                    T* y,
-                                                    size_t* bufferSizeInBytes,
-                                                    cudaStream_t stream);
-        template <>
-        inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
-                                                           cusparseAlgMode_t alg,
-                                                           cusparseOperation_t transA,
-                                                           int m,
-                                                           int n,
-                                                           int nnz,
-                                                           const float* alpha,
-                                                           const cusparseMatDescr_t descrA,
-                                                           const float* csrValA,
-                                                           const int* csrRowPtrA,
-                                                           const int* csrColIndA,
-                                                           const float* x,
-                                                           const float* beta,
-                                                           float* y,
-                                                           size_t* bufferSizeInBytes,
-                                                           cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-
-            cusparseSpMatDescr_t matA;
-            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
-                           CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                           CUDA_R_32F);
-
-            cusparseDnVecDescr_t vecX;
-            cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F);
-
-            cusparseDnVecDescr_t vecY;
-            cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F);
-
-            cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, alpha, matA, vecX, beta, vecY,
-                                           CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, bufferSizeInBytes);
-
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
-
-            return result;
-
-        }
-        template <>
-        inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
-                                                           cusparseAlgMode_t alg,
-                                                           cusparseOperation_t transA,
-                                                           int m,
-                                                           int n,
-                                                           int nnz,
-                                                           const double* alpha,
-                                                           const cusparseMatDescr_t descrA,
-                                                           const double* csrValA,
-                                                           const int* csrRowPtrA,
-                                                           const int* csrColIndA,
-                                                           const double* x,
-                                                           const double* beta,
-                                                           double* y,
-                                                           size_t* bufferSizeInBytes,
-                                                           cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            cusparseSpMatDescr_t matA;
-            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
-                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                              CUDA_R_64F);
-
-            cusparseDnVecDescr_t vecX;
-            cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F);
-
-            cusparseDnVecDescr_t vecY;
-            cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F);
-
-            cusparseStatus_t result = cusparseSpMV_bufferSize(handle, transA, alpha, matA, vecX, beta, vecY,
-                                           CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, bufferSizeInBytes);
-
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
-
-            return result;
-
-        }
-
-        template <typename T>
-        cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
-                                         cusparseAlgMode_t alg,
-                                         cusparseOperation_t transA,
-                                         int m,
-                                         int n,
-                                         int nnz,
-                                         const T* alpha,
-                                         const cusparseMatDescr_t descrA,
-                                         const T* csrValA,
-                                         const int* csrRowPtrA,
-                                         const int* csrColIndA,
-                                         const T* x,
-                                         const T* beta,
-                                         T* y,
-                                         T* buffer,
-                                         cudaStream_t stream);
-        template <>
-        inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
-                                                cusparseAlgMode_t alg,
-                                                cusparseOperation_t transA,
-                                                int m,
-                                                int n,
-                                                int nnz,
-                                                const float* alpha,
-                                                const cusparseMatDescr_t descrA,
-                                                const float* csrValA,
-                                                const int* csrRowPtrA,
-                                                const int* csrColIndA,
-                                                const float* x,
-                                                const float* beta,
-                                                float* y,
-                                                float* buffer,
-                                                cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            cusparseSpMatDescr_t matA;
-            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
-                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                              CUDA_R_32F);
-
-            cusparseDnVecDescr_t vecX;
-            cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F);
-
-            cusparseDnVecDescr_t vecY;
-            cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F);
-
-            cusparseStatus_t result = cusparseSpMV(handle, transA, alpha, matA, vecX, beta, vecY,
-                                CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer);
-
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
-
-            return result;
-        }
-        template <>
-        inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
-                                                cusparseAlgMode_t alg,
-                                                cusparseOperation_t transA,
-                                                int m,
-                                                int n,
-                                                int nnz,
-                                                const double* alpha,
-                                                const cusparseMatDescr_t descrA,
-                                                const double* csrValA,
-                                                const int* csrRowPtrA,
-                                                const int* csrColIndA,
-                                                const double* x,
-                                                const double* beta,
-                                                double* y,
-                                                double* buffer,
-                                                cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-
-            cusparseSpMatDescr_t matA;
-            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
-                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                              CUDA_R_64F);
-
-            cusparseDnVecDescr_t vecX;
-            cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F);
-
-            cusparseDnVecDescr_t vecY;
-            cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F);
+template <typename T>
+cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
+                                            cusparseAlgMode_t alg,
+                                            cusparseOperation_t transA,
+                                            int m,
+                                            int n,
+                                            int nnz,
+                                            const T* alpha,
+                                            const cusparseMatDescr_t descrA,
+                                            const T* csrValA,
+                                            const int* csrRowPtrA,
+                                            const int* csrColIndA,
+                                            const T* x,
+                                            const T* beta,
+                                            T* y,
+                                            size_t* bufferSizeInBytes,
+                                            cudaStream_t stream);
+template <>
+inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
+                                                   cusparseAlgMode_t alg,
+                                                   cusparseOperation_t transA,
+                                                   int m,
+                                                   int n,
+                                                   int nnz,
+                                                   const float* alpha,
+                                                   const cusparseMatDescr_t descrA,
+                                                   const float* csrValA,
+                                                   const int* csrRowPtrA,
+                                                   const int* csrColIndA,
+                                                   const float* x,
+                                                   const float* beta,
+                                                   float* y,
+                                                   size_t* bufferSizeInBytes,
+                                                   cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
-            cusparseStatus_t result = cusparseSpMV(handle, transA, alpha, matA, vecX, beta, vecY,
-                                           CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer);
+  cusparseSpMatDescr_t matA;
+  cusparseCreateCsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    csrRowPtrA,
+                    csrColIndA,
+                    csrValA,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_BASE_ZERO,
+                    CUDA_R_32F);
+
+  cusparseDnVecDescr_t vecX;
+  cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F);
+
+  cusparseDnVecDescr_t vecY;
+  cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F);
+
+  cusparseStatus_t result = cusparseSpMV_bufferSize(handle,
+                                                    transA,
+                                                    alpha,
+                                                    matA,
+                                                    vecX,
+                                                    beta,
+                                                    vecY,
+                                                    CUDA_R_32F,
+                                                    CUSPARSE_SPMV_ALG_DEFAULT,
+                                                    bufferSizeInBytes);
+
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
+
+  return result;
+}
+template <>
+inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
+                                                   cusparseAlgMode_t alg,
+                                                   cusparseOperation_t transA,
+                                                   int m,
+                                                   int n,
+                                                   int nnz,
+                                                   const double* alpha,
+                                                   const cusparseMatDescr_t descrA,
+                                                   const double* csrValA,
+                                                   const int* csrRowPtrA,
+                                                   const int* csrColIndA,
+                                                   const double* x,
+                                                   const double* beta,
+                                                   double* y,
+                                                   size_t* bufferSizeInBytes,
+                                                   cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  cusparseSpMatDescr_t matA;
+  cusparseCreateCsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    csrRowPtrA,
+                    csrColIndA,
+                    csrValA,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_BASE_ZERO,
+                    CUDA_R_64F);
+
+  cusparseDnVecDescr_t vecX;
+  cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F);
+
+  cusparseDnVecDescr_t vecY;
+  cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F);
+
+  cusparseStatus_t result = cusparseSpMV_bufferSize(handle,
+                                                    transA,
+                                                    alpha,
+                                                    matA,
+                                                    vecX,
+                                                    beta,
+                                                    vecY,
+                                                    CUDA_R_64F,
+                                                    CUSPARSE_SPMV_ALG_DEFAULT,
+                                                    bufferSizeInBytes);
+
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
+
+  return result;
+}
 
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
+template <typename T>
+cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
+                                 cusparseAlgMode_t alg,
+                                 cusparseOperation_t transA,
+                                 int m,
+                                 int n,
+                                 int nnz,
+                                 const T* alpha,
+                                 const cusparseMatDescr_t descrA,
+                                 const T* csrValA,
+                                 const int* csrRowPtrA,
+                                 const int* csrColIndA,
+                                 const T* x,
+                                 const T* beta,
+                                 T* y,
+                                 T* buffer,
+                                 cudaStream_t stream);
+template <>
+inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
+                                        cusparseAlgMode_t alg,
+                                        cusparseOperation_t transA,
+                                        int m,
+                                        int n,
+                                        int nnz,
+                                        const float* alpha,
+                                        const cusparseMatDescr_t descrA,
+                                        const float* csrValA,
+                                        const int* csrRowPtrA,
+                                        const int* csrColIndA,
+                                        const float* x,
+                                        const float* beta,
+                                        float* y,
+                                        float* buffer,
+                                        cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  cusparseSpMatDescr_t matA;
+  cusparseCreateCsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    csrRowPtrA,
+                    csrColIndA,
+                    csrValA,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_BASE_ZERO,
+                    CUDA_R_32F);
+
+  cusparseDnVecDescr_t vecX;
+  cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F);
+
+  cusparseDnVecDescr_t vecY;
+  cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F);
+
+  cusparseStatus_t result = cusparseSpMV(
+    handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer);
+
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
+
+  return result;
+}
+template <>
+inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
+                                        cusparseAlgMode_t alg,
+                                        cusparseOperation_t transA,
+                                        int m,
+                                        int n,
+                                        int nnz,
+                                        const double* alpha,
+                                        const cusparseMatDescr_t descrA,
+                                        const double* csrValA,
+                                        const int* csrRowPtrA,
+                                        const int* csrColIndA,
+                                        const double* x,
+                                        const double* beta,
+                                        double* y,
+                                        double* buffer,
+                                        cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
-            return result;
-        }
+  cusparseSpMatDescr_t matA;
+  cusparseCreateCsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    csrRowPtrA,
+                    csrColIndA,
+                    csrValA,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_BASE_ZERO,
+                    CUDA_R_64F);
+
+  cusparseDnVecDescr_t vecX;
+  cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F);
+
+  cusparseDnVecDescr_t vecY;
+  cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F);
+
+  cusparseStatus_t result = cusparseSpMV(
+    handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer);
+
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
+
+  return result;
+}
 
 /** @} */
 
@@ -938,182 +984,182 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
  * @{
  */
 
-        template <typename T>
-        cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
-                                                    int m,
-                                                    int n,
-                                                    int nnz,
-                                                    const T* csrVal,
-                                                    const int* csrRowPtr,
-                                                    const int* csrColInd,
-                                                    void* cscVal,
-                                                    int* cscColPtr,
-                                                    int* cscRowInd,
-                                                    cusparseAction_t copyValues,
-                                                    cusparseIndexBase_t idxBase,
-                                                    cusparseCsr2CscAlg_t alg,
-                                                    size_t* bufferSize,
-                                                    cudaStream_t stream);
-
-        template <>
-        inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
-                                                           int m,
-                                                           int n,
-                                                           int nnz,
-                                                           const float* csrVal,
-                                                           const int* csrRowPtr,
-                                                           const int* csrColInd,
-                                                           void* cscVal,
-                                                           int* cscColPtr,
-                                                           int* cscRowInd,
-                                                           cusparseAction_t copyValues,
-                                                           cusparseIndexBase_t idxBase,
-                                                           cusparseCsr2CscAlg_t alg,
-                                                           size_t* bufferSize,
-                                                           cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-
-            return cusparseCsr2cscEx2_bufferSize(handle,
-                                                 m,
-                                                 n,
-                                                 nnz,
-                                                 csrVal,
-                                                 csrRowPtr,
-                                                 csrColInd,
-                                                 cscVal,
-                                                 cscColPtr,
-                                                 cscRowInd,
-                                                 CUDA_R_32F,
-                                                 copyValues,
-                                                 idxBase,
-                                                 alg,
-                                                 bufferSize);
-        }
-        template <>
-        inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
-                                                           int m,
-                                                           int n,
-                                                           int nnz,
-                                                           const double* csrVal,
-                                                           const int* csrRowPtr,
-                                                           const int* csrColInd,
-                                                           void* cscVal,
-                                                           int* cscColPtr,
-                                                           int* cscRowInd,
-                                                           cusparseAction_t copyValues,
-                                                           cusparseIndexBase_t idxBase,
-                                                           cusparseCsr2CscAlg_t alg,
-                                                           size_t* bufferSize,
-                                                           cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-
-            return cusparseCsr2cscEx2_bufferSize(handle,
-                                                 m,
-                                                 n,
-                                                 nnz,
-                                                 csrVal,
-                                                 csrRowPtr,
-                                                 csrColInd,
-                                                 cscVal,
-                                                 cscColPtr,
-                                                 cscRowInd,
-                                                 CUDA_R_64F,
-                                                 copyValues,
-                                                 idxBase,
-                                                 alg,
-                                                 bufferSize);
-        }
-
-        template <typename T>
-        cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
-                                         int m,
-                                         int n,
-                                         int nnz,
-                                         const T* csrVal,
-                                         const int* csrRowPtr,
-                                         const int* csrColInd,
-                                         void* cscVal,
-                                         int* cscColPtr,
-                                         int* cscRowInd,
-                                         cusparseAction_t copyValues,
-                                         cusparseIndexBase_t idxBase,
-                                         cusparseCsr2CscAlg_t alg,
-                                         void* buffer,
-                                         cudaStream_t stream);
+template <typename T>
+cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
+                                            int m,
+                                            int n,
+                                            int nnz,
+                                            const T* csrVal,
+                                            const int* csrRowPtr,
+                                            const int* csrColInd,
+                                            void* cscVal,
+                                            int* cscColPtr,
+                                            int* cscRowInd,
+                                            cusparseAction_t copyValues,
+                                            cusparseIndexBase_t idxBase,
+                                            cusparseCsr2CscAlg_t alg,
+                                            size_t* bufferSize,
+                                            cudaStream_t stream);
 
-        template <>
-        inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                int nnz,
-                                                const float* csrVal,
-                                                const int* csrRowPtr,
-                                                const int* csrColInd,
-                                                void* cscVal,
-                                                int* cscColPtr,
-                                                int* cscRowInd,
-                                                cusparseAction_t copyValues,
-                                                cusparseIndexBase_t idxBase,
-                                                cusparseCsr2CscAlg_t alg,
-                                                void* buffer,
-                                                cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-
-            return cusparseCsr2cscEx2(handle,
-                                      m,
-                                      n,
-                                      nnz,
-                                      csrVal,
-                                      csrRowPtr,
-                                      csrColInd,
-                                      cscVal,
-                                      cscColPtr,
-                                      cscRowInd,
-                                      CUDA_R_32F,
-                                      copyValues,
-                                      idxBase,
-                                      alg,
-                                      buffer);
-        }
-
-        template <>
-        inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
-                                                int m,
-                                                int n,
-                                                int nnz,
-                                                const double* csrVal,
-                                                const int* csrRowPtr,
-                                                const int* csrColInd,
-                                                void* cscVal,
-                                                int* cscColPtr,
-                                                int* cscRowInd,
-                                                cusparseAction_t copyValues,
-                                                cusparseIndexBase_t idxBase,
-                                                cusparseCsr2CscAlg_t alg,
-                                                void* buffer,
-                                                cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-
-            return cusparseCsr2cscEx2(handle,
-                                      m,
-                                      n,
-                                      nnz,
-                                      csrVal,
-                                      csrRowPtr,
-                                      csrColInd,
-                                      cscVal,
-                                      cscColPtr,
-                                      cscRowInd,
-                                      CUDA_R_64F,
-                                      copyValues,
-                                      idxBase,
-                                      alg,
-                                      buffer);
-        }
+template <>
+inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
+                                                   int m,
+                                                   int n,
+                                                   int nnz,
+                                                   const float* csrVal,
+                                                   const int* csrRowPtr,
+                                                   const int* csrColInd,
+                                                   void* cscVal,
+                                                   int* cscColPtr,
+                                                   int* cscRowInd,
+                                                   cusparseAction_t copyValues,
+                                                   cusparseIndexBase_t idxBase,
+                                                   cusparseCsr2CscAlg_t alg,
+                                                   size_t* bufferSize,
+                                                   cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+  return cusparseCsr2cscEx2_bufferSize(handle,
+                                       m,
+                                       n,
+                                       nnz,
+                                       csrVal,
+                                       csrRowPtr,
+                                       csrColInd,
+                                       cscVal,
+                                       cscColPtr,
+                                       cscRowInd,
+                                       CUDA_R_32F,
+                                       copyValues,
+                                       idxBase,
+                                       alg,
+                                       bufferSize);
+}
+template <>
+inline cusparseStatus_t cusparsecsr2csc_bufferSize(cusparseHandle_t handle,
+                                                   int m,
+                                                   int n,
+                                                   int nnz,
+                                                   const double* csrVal,
+                                                   const int* csrRowPtr,
+                                                   const int* csrColInd,
+                                                   void* cscVal,
+                                                   int* cscColPtr,
+                                                   int* cscRowInd,
+                                                   cusparseAction_t copyValues,
+                                                   cusparseIndexBase_t idxBase,
+                                                   cusparseCsr2CscAlg_t alg,
+                                                   size_t* bufferSize,
+                                                   cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+  return cusparseCsr2cscEx2_bufferSize(handle,
+                                       m,
+                                       n,
+                                       nnz,
+                                       csrVal,
+                                       csrRowPtr,
+                                       csrColInd,
+                                       cscVal,
+                                       cscColPtr,
+                                       cscRowInd,
+                                       CUDA_R_64F,
+                                       copyValues,
+                                       idxBase,
+                                       alg,
+                                       bufferSize);
+}
+
+template <typename T>
+cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
+                                 int m,
+                                 int n,
+                                 int nnz,
+                                 const T* csrVal,
+                                 const int* csrRowPtr,
+                                 const int* csrColInd,
+                                 void* cscVal,
+                                 int* cscColPtr,
+                                 int* cscRowInd,
+                                 cusparseAction_t copyValues,
+                                 cusparseIndexBase_t idxBase,
+                                 cusparseCsr2CscAlg_t alg,
+                                 void* buffer,
+                                 cudaStream_t stream);
+
+template <>
+inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
+                                        int m,
+                                        int n,
+                                        int nnz,
+                                        const float* csrVal,
+                                        const int* csrRowPtr,
+                                        const int* csrColInd,
+                                        void* cscVal,
+                                        int* cscColPtr,
+                                        int* cscRowInd,
+                                        cusparseAction_t copyValues,
+                                        cusparseIndexBase_t idxBase,
+                                        cusparseCsr2CscAlg_t alg,
+                                        void* buffer,
+                                        cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+  return cusparseCsr2cscEx2(handle,
+                            m,
+                            n,
+                            nnz,
+                            csrVal,
+                            csrRowPtr,
+                            csrColInd,
+                            cscVal,
+                            cscColPtr,
+                            cscRowInd,
+                            CUDA_R_32F,
+                            copyValues,
+                            idxBase,
+                            alg,
+                            buffer);
+}
+
+template <>
+inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle,
+                                        int m,
+                                        int n,
+                                        int nnz,
+                                        const double* csrVal,
+                                        const int* csrRowPtr,
+                                        const int* csrColInd,
+                                        void* cscVal,
+                                        int* cscColPtr,
+                                        int* cscRowInd,
+                                        cusparseAction_t copyValues,
+                                        cusparseIndexBase_t idxBase,
+                                        cusparseCsr2CscAlg_t alg,
+                                        void* buffer,
+                                        cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+  return cusparseCsr2cscEx2(handle,
+                            m,
+                            n,
+                            nnz,
+                            csrVal,
+                            csrRowPtr,
+                            csrColInd,
+                            cscVal,
+                            cscColPtr,
+                            cscRowInd,
+                            CUDA_R_64F,
+                            copyValues,
+                            idxBase,
+                            alg,
+                            buffer);
+}
 
 /** @} */
 
@@ -1122,332 +1168,332 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
  * @{
  */
 
-        template <typename T>
-        cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
-                                                        int m,
-                                                        int n,
-                                                        int k,
-                                                        const T* alpha,
-                                                        const T* beta,
-                                                        const cusparseMatDescr_t matA,
-                                                        int nnzA,
-                                                        const int* rowindA,
-                                                        const int* indicesA,
-                                                        const cusparseMatDescr_t matB,
-                                                        int nnzB,
-                                                        const int* rowindB,
-                                                        const int* indicesB,
-                                                        const cusparseMatDescr_t matD,
-                                                        int nnzD,
-                                                        const int* rowindD,
-                                                        const int* indicesD,
-                                                        csrgemm2Info_t info,
-                                                        size_t* pBufferSizeInBytes,
-                                                        cudaStream_t stream);
-
-        template <>
-        inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
-                                                               int m,
-                                                               int n,
-                                                               int k,
-                                                               const float* alpha,
-                                                               const float* beta,
-                                                               const cusparseMatDescr_t matA,
-                                                               int nnzA,
-                                                               const int* rowindA,
-                                                               const int* indicesA,
-                                                               const cusparseMatDescr_t matB,
-                                                               int nnzB,
-                                                               const int* rowindB,
-                                                               const int* indicesB,
-                                                               const cusparseMatDescr_t matD,
-                                                               int nnzD,
-                                                               const int* rowindD,
-                                                               const int* indicesD,
-                                                               csrgemm2Info_t info,
-                                                               size_t* pBufferSizeInBytes,
-                                                               cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+template <typename T>
+cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
+                                                int m,
+                                                int n,
+                                                int k,
+                                                const T* alpha,
+                                                const T* beta,
+                                                const cusparseMatDescr_t matA,
+                                                int nnzA,
+                                                const int* rowindA,
+                                                const int* indicesA,
+                                                const cusparseMatDescr_t matB,
+                                                int nnzB,
+                                                const int* rowindB,
+                                                const int* indicesB,
+                                                const cusparseMatDescr_t matD,
+                                                int nnzD,
+                                                const int* rowindD,
+                                                const int* indicesD,
+                                                csrgemm2Info_t info,
+                                                size_t* pBufferSizeInBytes,
+                                                cudaStream_t stream);
+
+template <>
+inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
+                                                       int m,
+                                                       int n,
+                                                       int k,
+                                                       const float* alpha,
+                                                       const float* beta,
+                                                       const cusparseMatDescr_t matA,
+                                                       int nnzA,
+                                                       const int* rowindA,
+                                                       const int* indicesA,
+                                                       const cusparseMatDescr_t matB,
+                                                       int nnzB,
+                                                       const int* rowindB,
+                                                       const int* indicesB,
+                                                       const cusparseMatDescr_t matD,
+                                                       int nnzD,
+                                                       const int* rowindD,
+                                                       const int* indicesD,
+                                                       csrgemm2Info_t info,
+                                                       size_t* pBufferSizeInBytes,
+                                                       cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-            return cusparseScsrgemm2_bufferSizeExt(handle,
-                                                   m,
-                                                   n,
-                                                   k,
-                                                   alpha,
-                                                   matA,
-                                                   nnzA,
-                                                   rowindA,
-                                                   indicesA,
-                                                   matB,
-                                                   nnzB,
-                                                   rowindB,
-                                                   indicesB,
-                                                   beta,
-                                                   matD,
-                                                   nnzD,
-                                                   rowindD,
-                                                   indicesD,
-                                                   info,
-                                                   pBufferSizeInBytes);
+  return cusparseScsrgemm2_bufferSizeExt(handle,
+                                         m,
+                                         n,
+                                         k,
+                                         alpha,
+                                         matA,
+                                         nnzA,
+                                         rowindA,
+                                         indicesA,
+                                         matB,
+                                         nnzB,
+                                         rowindB,
+                                         indicesB,
+                                         beta,
+                                         matD,
+                                         nnzD,
+                                         rowindD,
+                                         indicesD,
+                                         info,
+                                         pBufferSizeInBytes);
 #pragma GCC diagnostic pop
-        }
-
-        template <>
-        inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
-                                                               int m,
-                                                               int n,
-                                                               int k,
-                                                               const double* alpha,
-                                                               const double* beta,
-                                                               const cusparseMatDescr_t matA,
-                                                               int nnzA,
-                                                               const int* rowindA,
-                                                               const int* indicesA,
-                                                               const cusparseMatDescr_t matB,
-                                                               int nnzB,
-                                                               const int* rowindB,
-                                                               const int* indicesB,
-                                                               const cusparseMatDescr_t matD,
-                                                               int nnzD,
-                                                               const int* rowindD,
-                                                               const int* indicesD,
-                                                               csrgemm2Info_t info,
-                                                               size_t* pBufferSizeInBytes,
-                                                               cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+}
+
+template <>
+inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle,
+                                                       int m,
+                                                       int n,
+                                                       int k,
+                                                       const double* alpha,
+                                                       const double* beta,
+                                                       const cusparseMatDescr_t matA,
+                                                       int nnzA,
+                                                       const int* rowindA,
+                                                       const int* indicesA,
+                                                       const cusparseMatDescr_t matB,
+                                                       int nnzB,
+                                                       const int* rowindB,
+                                                       const int* indicesB,
+                                                       const cusparseMatDescr_t matD,
+                                                       int nnzD,
+                                                       const int* rowindD,
+                                                       const int* indicesD,
+                                                       csrgemm2Info_t info,
+                                                       size_t* pBufferSizeInBytes,
+                                                       cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-            return cusparseDcsrgemm2_bufferSizeExt(handle,
-                                                   m,
-                                                   n,
-                                                   k,
-                                                   alpha,
-                                                   matA,
-                                                   nnzA,
-                                                   rowindA,
-                                                   indicesA,
-                                                   matB,
-                                                   nnzB,
-                                                   rowindB,
-                                                   indicesB,
-                                                   beta,
-                                                   matD,
-                                                   nnzD,
-                                                   rowindD,
-                                                   indicesD,
-                                                   info,
-                                                   pBufferSizeInBytes);
+  return cusparseDcsrgemm2_bufferSizeExt(handle,
+                                         m,
+                                         n,
+                                         k,
+                                         alpha,
+                                         matA,
+                                         nnzA,
+                                         rowindA,
+                                         indicesA,
+                                         matB,
+                                         nnzB,
+                                         rowindB,
+                                         indicesB,
+                                         beta,
+                                         matD,
+                                         nnzD,
+                                         rowindD,
+                                         indicesD,
+                                         info,
+                                         pBufferSizeInBytes);
 #pragma GCC diagnostic pop
-        }
-
-        inline cusparseStatus_t cusparsecsrgemm2nnz(cusparseHandle_t handle,
-                                                    int m,
-                                                    int n,
-                                                    int k,
-                                                    const cusparseMatDescr_t matA,
-                                                    int nnzA,
-                                                    const int* rowindA,
-                                                    const int* indicesA,
-                                                    const cusparseMatDescr_t matB,
-                                                    int nnzB,
-                                                    const int* rowindB,
-                                                    const int* indicesB,
-                                                    const cusparseMatDescr_t matD,
-                                                    int nnzD,
-                                                    const int* rowindD,
-                                                    const int* indicesD,
-                                                    const cusparseMatDescr_t matC,
-                                                    int* rowindC,
-                                                    int* nnzC,
-                                                    const csrgemm2Info_t info,
-                                                    void* pBuffer,
-                                                    cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+}
+
+inline cusparseStatus_t cusparsecsrgemm2nnz(cusparseHandle_t handle,
+                                            int m,
+                                            int n,
+                                            int k,
+                                            const cusparseMatDescr_t matA,
+                                            int nnzA,
+                                            const int* rowindA,
+                                            const int* indicesA,
+                                            const cusparseMatDescr_t matB,
+                                            int nnzB,
+                                            const int* rowindB,
+                                            const int* indicesB,
+                                            const cusparseMatDescr_t matD,
+                                            int nnzD,
+                                            const int* rowindD,
+                                            const int* indicesD,
+                                            const cusparseMatDescr_t matC,
+                                            int* rowindC,
+                                            int* nnzC,
+                                            const csrgemm2Info_t info,
+                                            void* pBuffer,
+                                            cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-            return cusparseXcsrgemm2Nnz(handle,
-                                        m,
-                                        n,
-                                        k,
-                                        matA,
-                                        nnzA,
-                                        rowindA,
-                                        indicesA,
-                                        matB,
-                                        nnzB,
-                                        rowindB,
-                                        indicesB,
-                                        matD,
-                                        nnzD,
-                                        rowindD,
-                                        indicesD,
-                                        matC,
-                                        rowindC,
-                                        nnzC,
-                                        info,
-                                        pBuffer);
+  return cusparseXcsrgemm2Nnz(handle,
+                              m,
+                              n,
+                              k,
+                              matA,
+                              nnzA,
+                              rowindA,
+                              indicesA,
+                              matB,
+                              nnzB,
+                              rowindB,
+                              indicesB,
+                              matD,
+                              nnzD,
+                              rowindD,
+                              indicesD,
+                              matC,
+                              rowindC,
+                              nnzC,
+                              info,
+                              pBuffer);
 #pragma GCC diagnostic pop
-        }
+}
 
-        template <typename T>
-        cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
-                                          int m,
-                                          int n,
-                                          int k,
-                                          const T* alpha,
-                                          const cusparseMatDescr_t descrA,
-                                          int nnzA,
-                                          const T* csrValA,
-                                          const int* csrRowPtrA,
-                                          const int* csrColIndA,
-                                          const cusparseMatDescr_t descrB,
-                                          int nnzB,
-                                          const T* csrValB,
-                                          const int* csrRowPtrB,
-                                          const int* csrColIndB,
-                                          const T* beta,
-                                          const cusparseMatDescr_t descrD,
-                                          int nnzD,
-                                          const T* csrValD,
-                                          const int* csrRowPtrD,
-                                          const int* csrColIndD,
-                                          const cusparseMatDescr_t descrC,
-                                          T* csrValC,
-                                          const int* csrRowPtrC,
-                                          int* csrColIndC,
-                                          const csrgemm2Info_t info,
-                                          void* pBuffer,
-                                          cudaStream_t stream);
-
-        template <>
-        inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
-                                                 int m,
-                                                 int n,
-                                                 int k,
-                                                 const float* alpha,
-                                                 const cusparseMatDescr_t descrA,
-                                                 int nnzA,
-                                                 const float* csrValA,
-                                                 const int* csrRowPtrA,
-                                                 const int* csrColIndA,
-                                                 const cusparseMatDescr_t descrB,
-                                                 int nnzB,
-                                                 const float* csrValB,
-                                                 const int* csrRowPtrB,
-                                                 const int* csrColIndB,
-                                                 const float* beta,
-                                                 const cusparseMatDescr_t descrD,
-                                                 int nnzD,
-                                                 const float* csrValD,
-                                                 const int* csrRowPtrD,
-                                                 const int* csrColIndD,
-                                                 const cusparseMatDescr_t descrC,
-                                                 float* csrValC,
-                                                 const int* csrRowPtrC,
-                                                 int* csrColIndC,
-                                                 const csrgemm2Info_t info,
-                                                 void* pBuffer,
-                                                 cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+template <typename T>
+cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
+                                  int m,
+                                  int n,
+                                  int k,
+                                  const T* alpha,
+                                  const cusparseMatDescr_t descrA,
+                                  int nnzA,
+                                  const T* csrValA,
+                                  const int* csrRowPtrA,
+                                  const int* csrColIndA,
+                                  const cusparseMatDescr_t descrB,
+                                  int nnzB,
+                                  const T* csrValB,
+                                  const int* csrRowPtrB,
+                                  const int* csrColIndB,
+                                  const T* beta,
+                                  const cusparseMatDescr_t descrD,
+                                  int nnzD,
+                                  const T* csrValD,
+                                  const int* csrRowPtrD,
+                                  const int* csrColIndD,
+                                  const cusparseMatDescr_t descrC,
+                                  T* csrValC,
+                                  const int* csrRowPtrC,
+                                  int* csrColIndC,
+                                  const csrgemm2Info_t info,
+                                  void* pBuffer,
+                                  cudaStream_t stream);
+
+template <>
+inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
+                                         int m,
+                                         int n,
+                                         int k,
+                                         const float* alpha,
+                                         const cusparseMatDescr_t descrA,
+                                         int nnzA,
+                                         const float* csrValA,
+                                         const int* csrRowPtrA,
+                                         const int* csrColIndA,
+                                         const cusparseMatDescr_t descrB,
+                                         int nnzB,
+                                         const float* csrValB,
+                                         const int* csrRowPtrB,
+                                         const int* csrColIndB,
+                                         const float* beta,
+                                         const cusparseMatDescr_t descrD,
+                                         int nnzD,
+                                         const float* csrValD,
+                                         const int* csrRowPtrD,
+                                         const int* csrColIndD,
+                                         const cusparseMatDescr_t descrC,
+                                         float* csrValC,
+                                         const int* csrRowPtrC,
+                                         int* csrColIndC,
+                                         const csrgemm2Info_t info,
+                                         void* pBuffer,
+                                         cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-            return cusparseScsrgemm2(handle,
-                                     m,
-                                     n,
-                                     k,
-                                     alpha,
-                                     descrA,
-                                     nnzA,
-                                     csrValA,
-                                     csrRowPtrA,
-                                     csrColIndA,
-                                     descrB,
-                                     nnzB,
-                                     csrValB,
-                                     csrRowPtrB,
-                                     csrColIndB,
-                                     beta,
-                                     descrD,
-                                     nnzD,
-                                     csrValD,
-                                     csrRowPtrD,
-                                     csrColIndD,
-                                     descrC,
-                                     csrValC,
-                                     csrRowPtrC,
-                                     csrColIndC,
-                                     info,
-                                     pBuffer);
+  return cusparseScsrgemm2(handle,
+                           m,
+                           n,
+                           k,
+                           alpha,
+                           descrA,
+                           nnzA,
+                           csrValA,
+                           csrRowPtrA,
+                           csrColIndA,
+                           descrB,
+                           nnzB,
+                           csrValB,
+                           csrRowPtrB,
+                           csrColIndB,
+                           beta,
+                           descrD,
+                           nnzD,
+                           csrValD,
+                           csrRowPtrD,
+                           csrColIndD,
+                           descrC,
+                           csrValC,
+                           csrRowPtrC,
+                           csrColIndC,
+                           info,
+                           pBuffer);
 #pragma GCC diagnostic pop
-        }
-
-        template <>
-        inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
-                                                 int m,
-                                                 int n,
-                                                 int k,
-                                                 const double* alpha,
-                                                 const cusparseMatDescr_t descrA,
-                                                 int nnzA,
-                                                 const double* csrValA,
-                                                 const int* csrRowPtrA,
-                                                 const int* csrColIndA,
-                                                 const cusparseMatDescr_t descrB,
-                                                 int nnzB,
-                                                 const double* csrValB,
-                                                 const int* csrRowPtrB,
-                                                 const int* csrColIndB,
-                                                 const double* beta,
-                                                 const cusparseMatDescr_t descrD,
-                                                 int nnzD,
-                                                 const double* csrValD,
-                                                 const int* csrRowPtrD,
-                                                 const int* csrColIndD,
-                                                 const cusparseMatDescr_t descrC,
-                                                 double* csrValC,
-                                                 const int* csrRowPtrC,
-                                                 int* csrColIndC,
-                                                 const csrgemm2Info_t info,
-                                                 void* pBuffer,
-                                                 cudaStream_t stream)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+}
+
+template <>
+inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle,
+                                         int m,
+                                         int n,
+                                         int k,
+                                         const double* alpha,
+                                         const cusparseMatDescr_t descrA,
+                                         int nnzA,
+                                         const double* csrValA,
+                                         const int* csrRowPtrA,
+                                         const int* csrColIndA,
+                                         const cusparseMatDescr_t descrB,
+                                         int nnzB,
+                                         const double* csrValB,
+                                         const int* csrRowPtrB,
+                                         const int* csrColIndB,
+                                         const double* beta,
+                                         const cusparseMatDescr_t descrD,
+                                         int nnzD,
+                                         const double* csrValD,
+                                         const int* csrRowPtrD,
+                                         const int* csrColIndD,
+                                         const cusparseMatDescr_t descrC,
+                                         double* csrValC,
+                                         const int* csrRowPtrC,
+                                         int* csrColIndC,
+                                         const csrgemm2Info_t info,
+                                         void* pBuffer,
+                                         cudaStream_t stream)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-            return cusparseDcsrgemm2(handle,
-                                     m,
-                                     n,
-                                     k,
-                                     alpha,
-                                     descrA,
-                                     nnzA,
-                                     csrValA,
-                                     csrRowPtrA,
-                                     csrColIndA,
-                                     descrB,
-                                     nnzB,
-                                     csrValB,
-                                     csrRowPtrB,
-                                     csrColIndB,
-                                     beta,
-                                     descrD,
-                                     nnzD,
-                                     csrValD,
-                                     csrRowPtrD,
-                                     csrColIndD,
-                                     descrC,
-                                     csrValC,
-                                     csrRowPtrC,
-                                     csrColIndC,
-                                     info,
-                                     pBuffer);
+  return cusparseDcsrgemm2(handle,
+                           m,
+                           n,
+                           k,
+                           alpha,
+                           descrA,
+                           nnzA,
+                           csrValA,
+                           csrRowPtrA,
+                           csrColIndA,
+                           descrB,
+                           nnzB,
+                           csrValB,
+                           csrRowPtrB,
+                           csrColIndB,
+                           beta,
+                           descrD,
+                           nnzD,
+                           csrValD,
+                           csrRowPtrD,
+                           csrColIndD,
+                           descrC,
+                           csrValC,
+                           csrRowPtrC,
+                           csrColIndC,
+                           info,
+                           pBuffer);
 #pragma GCC diagnostic pop
-        }
+}
 
 /** @} */
 
@@ -1456,171 +1502,203 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle,
  * @{
  */
 
-        template <typename T>
-        cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
-                                           int m,
-                                           int n,
-                                           int nnz,
-                                           const cusparseMatDescr_t descrA,
-                                           const T* csrValA,
-                                           const int* csrRowPtrA,
-                                           const int* csrColIndA,
-                                           T* A,
-                                           int lda,
-                                           size_t *buffer_size,
-                                           bool row_major = false,
-                                           cudaStream_t stream);
-
-        template <>
-        cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      int nnz,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const float* csrValA,
-                                                      const int* csrRowPtrA,
-                                                      const int* csrColIndA,
-                                                      float* A,
-                                                      int lda,
-                                                      size_t *buffer_size,
-                                                      bool row_major = false,
-                                                      cudaStream_t stream) {
-            cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL
-            cusparseSpMatDescr_t matA;
-            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
-                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                              CUDA_R_32F);
-
-            cusparseDnMatDescr_t matB;
-            cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL);
-
-            cusparseStatus_t result = cusparseSparseToDense_bufferSize(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
-
-
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
-
-            return result;
-
-        }
-
-        template <>
-        cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      int nnz,
-                                                      const cusparseMatDescr_t descrA,
-                                                      const double* csrValA,
-                                                      const int* csrRowPtrA,
-                                                      const int* csrColIndA,
-                                                      double* A,
-                                                      int lda,
-                                                      size_t *buffer_size,
-                                                      bool row_major = false,
-                                                      cudaStream_t stream) {
-            cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL
-            cusparseSpMatDescr_t matA;
-            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
-                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                              CUDA_R_64F);
-
-            cusparseDnMatDescr_t matB;
-            cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL);
-
-            cusparseStatus_t result = cusparseSparseToDense_bufferSize(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
-
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
-
-            return result;
-
-        }
-
-
-
-        template <typename T>
-        cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
-                                           int m,
-                                           int n,
-                                           int nnz,
-                                           const cusparseMatDescr_t descrA,
-                                           const T* csrValA,
-                                           const int* csrRowPtrA,
-                                           const int* csrColIndA,
-                                           T* A,
-                                           int lda,
-                                           void *buffer,
-                                           cudaStream_t stream,
-                                           bool row_major = false);
-
-        template <>
-        inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
-                                                  int m,
-                                                  int n,
-                                                  int nnz,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const float* csrValA,
-                                                  const int* csrRowPtrA,
-                                                  const int* csrColIndA,
-                                                  float* A,
-                                                  int lda,
-                                                  void *buffer,
-                                                  cudaStream_t stream,
-                                                  bool row_major = false)
-        {
-
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-
-            cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL
-            cusparseSpMatDescr_t matA;
-            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
-                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                              CUDA_R_32F);
-
-            cusparseDnMatDescr_t matB;
-            cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL);
-
-            cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
-
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
-
-            return result;
-        }
-        template <>
-        inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
-                                                  int m,
-                                                  int n,
-                                                  int nnz,
-                                                  const cusparseMatDescr_t descrA,
-                                                  const double* csrValA,
-                                                  const int* csrRowPtrA,
-                                                  const int* csrColIndA,
-                                                  double* A,
-                                                  int lda,
-                                                  void *buffer,
-                                                  cudaStream_t stream,
-                                                  bool row_major = false)
-        {
-            CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-            cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL
-            cusparseSpMatDescr_t matA;
-            cusparseCreateCsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA,
-                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                              CUDA_R_64F);
-
-            cusparseDnMatDescr_t matB;
-            cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL);
-
-            cusparseStatus_t result = cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
-
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-            RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
-
-            return result;
-        }
+template <typename T>
+cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
+                                              int m,
+                                              int n,
+                                              int nnz,
+                                              const cusparseMatDescr_t descrA,
+                                              const T* csrValA,
+                                              const int* csrRowPtrA,
+                                              const int* csrColIndA,
+                                              T* A,
+                                              int lda,
+                                              size_t* buffer_size,
+                                              bool row_major = false,
+                                              cudaStream_t stream);
+
+template <>
+cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
+                                              int m,
+                                              int n,
+                                              int nnz,
+                                              const cusparseMatDescr_t descrA,
+                                              const float* csrValA,
+                                              const int* csrRowPtrA,
+                                              const int* csrColIndA,
+                                              float* A,
+                                              int lda,
+                                              size_t* buffer_size,
+                                              bool row_major = false,
+                                              cudaStream_t stream)
+{
+  cusparseOrder_t order =
+    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA;
+  cusparseCreateCsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    csrRowPtrA,
+                    csrColIndA,
+                    csrValA,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_BASE_ZERO,
+                    CUDA_R_32F);
+
+  cusparseDnMatDescr_t matB;
+  cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL);
+
+  cusparseStatus_t result = cusparseSparseToDense_bufferSize(
+    handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
+
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+
+  return result;
+}
+
+template <>
+cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
+                                              int m,
+                                              int n,
+                                              int nnz,
+                                              const cusparseMatDescr_t descrA,
+                                              const double* csrValA,
+                                              const int* csrRowPtrA,
+                                              const int* csrColIndA,
+                                              double* A,
+                                              int lda,
+                                              size_t* buffer_size,
+                                              bool row_major = false,
+                                              cudaStream_t stream)
+{
+  cusparseOrder_t order =
+    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA;
+  cusparseCreateCsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    csrRowPtrA,
+                    csrColIndA,
+                    csrValA,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_BASE_ZERO,
+                    CUDA_R_64F);
+
+  cusparseDnMatDescr_t matB;
+  cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL);
+
+  cusparseStatus_t result = cusparseSparseToDense_bufferSize(
+    handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
+
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+
+  return result;
+}
+
+template <typename T>
+cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
+                                   int m,
+                                   int n,
+                                   int nnz,
+                                   const cusparseMatDescr_t descrA,
+                                   const T* csrValA,
+                                   const int* csrRowPtrA,
+                                   const int* csrColIndA,
+                                   T* A,
+                                   int lda,
+                                   void* buffer,
+                                   cudaStream_t stream,
+                                   bool row_major = false);
+
+template <>
+inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
+                                          int m,
+                                          int n,
+                                          int nnz,
+                                          const cusparseMatDescr_t descrA,
+                                          const float* csrValA,
+                                          const int* csrRowPtrA,
+                                          const int* csrColIndA,
+                                          float* A,
+                                          int lda,
+                                          void* buffer,
+                                          cudaStream_t stream,
+                                          bool row_major = false)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+  cusparseOrder_t order =
+    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA;
+  cusparseCreateCsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    csrRowPtrA,
+                    csrColIndA,
+                    csrValA,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_BASE_ZERO,
+                    CUDA_R_32F);
+
+  cusparseDnMatDescr_t matB;
+  cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL);
+
+  cusparseStatus_t result =
+    cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
+
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+
+  return result;
+}
+template <>
+inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
+                                          int m,
+                                          int n,
+                                          int nnz,
+                                          const cusparseMatDescr_t descrA,
+                                          const double* csrValA,
+                                          const int* csrRowPtrA,
+                                          const int* csrColIndA,
+                                          double* A,
+                                          int lda,
+                                          void* buffer,
+                                          cudaStream_t stream,
+                                          bool row_major = false)
+{
+  CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+  cusparseOrder_t order =
+    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA;
+  cusparseCreateCsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    csrRowPtrA,
+                    csrColIndA,
+                    csrValA,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_32I,
+                    CUSPARSE_INDEX_BASE_ZERO,
+                    CUDA_R_64F);
+
+  cusparseDnMatDescr_t matB;
+  cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL);
+
+  cusparseStatus_t result =
+    cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
+
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+
+  return result;
+}
 
 /** @} */
 
-    }  // namespace sparse
+}  // namespace sparse
 }  // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h
index a604cb57f2..ecd0abf485 100644
--- a/cpp/include/raft/sparse/detail/cusparse_macros.h
+++ b/cpp/include/raft/sparse/detail/cusparse_macros.h
@@ -1,5 +1,5 @@
-#include <raft/error.hpp>
 #include <cusparse_v2.h>
+#include <raft/error.hpp>
 ///@todo: enable this once logging is enabled
 //#include <cuml/common/logger.hpp>
 
@@ -23,34 +23,34 @@ namespace raft {
  * @brief Exception thrown when a cuSparse error is encountered.
  */
 struct cusparse_error : public raft::exception {
-    explicit cusparse_error(char const* const message) : raft::exception(message) {}
-    explicit cusparse_error(std::string const& message) : raft::exception(message) {}
+  explicit cusparse_error(char const* const message) : raft::exception(message) {}
+  explicit cusparse_error(std::string const& message) : raft::exception(message) {}
 };
 
 namespace sparse {
 namespace detail {
 
-    inline const char* cusparse_error_to_string(cusparseStatus_t err)
-    {
+inline const char* cusparse_error_to_string(cusparseStatus_t err)
+{
 #if defined(CUDART_VERSION) && CUDART_VERSION >= 10100
-                return cusparseGetErrorString(err);
+  return cusparseGetErrorString(err);
 #else   // CUDART_VERSION
-                switch (err) {
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR);
-                    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
-                    default: return "CUSPARSE_STATUS_UNKNOWN";
-                };
+  switch (err) {
+    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS);
+    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED);
+    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED);
+    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE);
+    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH);
+    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED);
+    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR);
+    _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
+    default: return "CUSPARSE_STATUS_UNKNOWN";
+  };
 #endif  // CUDART_VERSION
-            }
+}
 
-        }  // namespace detail
-    }  // namespace sparse
+}  // namespace detail
+}  // namespace sparse
 }  // namespace raft
 
 #undef _CUSPARSE_ERR_TO_STR

From 1c1f3659648cea08e8fede11e5d8008a1ca8bba9 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 31 Jan 2022 16:05:23 -0500
Subject: [PATCH 05/16] Moving cusparse_wrappers behind detail namespace. These
 shouldn't be exposed

---
 cpp/include/raft/handle.hpp                   |   2 +-
 .../raft/sparse/detail/cusparse_macros.h      |  18 +++
 .../sparse/{ => detail}/cusparse_wrappers.h   | 125 +++++++-----------
 3 files changed, 68 insertions(+), 77 deletions(-)
 rename cpp/include/raft/sparse/{ => detail}/cusparse_wrappers.h (95%)

diff --git a/cpp/include/raft/handle.hpp b/cpp/include/raft/handle.hpp
index 8a55df114d..1babfaded5 100644
--- a/cpp/include/raft/handle.hpp
+++ b/cpp/include/raft/handle.hpp
@@ -36,7 +36,7 @@
 #include <raft/comms/comms.hpp>
 #include <raft/linalg/cublas_wrappers.h>
 #include <raft/linalg/cusolver_wrappers.h>
-#include <raft/sparse/cusparse_macros.h>
+#include <raft/sparse/detail/cusparse_macros.h>
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h
index ecd0abf485..080083006d 100644
--- a/cpp/include/raft/sparse/detail/cusparse_macros.h
+++ b/cpp/include/raft/sparse/detail/cusparse_macros.h
@@ -1,3 +1,21 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
 #include <cusparse_v2.h>
 #include <raft/error.hpp>
 ///@todo: enable this once logging is enabled
diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
similarity index 95%
rename from cpp/include/raft/sparse/cusparse_wrappers.h
rename to cpp/include/raft/sparse/detail/cusparse_wrappers.h
index a28d218960..02df917f99 100644
--- a/cpp/include/raft/sparse/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -773,23 +773,19 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
   cusparseSpMatDescr_t matA;
-  cusparseCreateCsr(&matA,
+    cusparsecreatecsr(&matA,
                     m,
                     n,
                     nnz,
                     csrRowPtrA,
                     csrColIndA,
-                    csrValA,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_BASE_ZERO,
-                    CUDA_R_32F);
+                    csrValA);
 
   cusparseDnVecDescr_t vecX;
-  cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F);
+    cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
 
   cusparseDnVecDescr_t vecY;
-  cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F);
+    cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
 
   cusparseStatus_t result = cusparseSpMV_bufferSize(handle,
                                                     transA,
@@ -828,23 +824,19 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
   cusparseSpMatDescr_t matA;
-  cusparseCreateCsr(&matA,
+    cusparsecreatecsr(&matA,
                     m,
                     n,
                     nnz,
                     csrRowPtrA,
                     csrColIndA,
-                    csrValA,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_BASE_ZERO,
-                    CUDA_R_64F);
+                    csrValA);
 
   cusparseDnVecDescr_t vecX;
-  cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F);
+    cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
 
   cusparseDnVecDescr_t vecY;
-  cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F);
+    cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
 
   cusparseStatus_t result = cusparseSpMV_bufferSize(handle,
                                                     transA,
@@ -901,23 +893,19 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
   cusparseSpMatDescr_t matA;
-  cusparseCreateCsr(&matA,
+    cusparsecreatecsr(&matA,
                     m,
                     n,
                     nnz,
                     csrRowPtrA,
                     csrColIndA,
-                    csrValA,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_BASE_ZERO,
-                    CUDA_R_32F);
+                    csrValA);
 
   cusparseDnVecDescr_t vecX;
-  cusparseCreateDnVec(&vecX, n, x, CUDA_R_32F);
+    cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
 
   cusparseDnVecDescr_t vecY;
-  cusparseCreateDnVec(&vecY, n, y, CUDA_R_32F);
+    cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
 
   cusparseStatus_t result = cusparseSpMV(
     handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer);
@@ -949,23 +937,19 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
   cusparseSpMatDescr_t matA;
-  cusparseCreateCsr(&matA,
+    cusparsecreatecsr(&matA,
                     m,
                     n,
                     nnz,
                     csrRowPtrA,
                     csrColIndA,
-                    csrValA,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_BASE_ZERO,
-                    CUDA_R_64F);
+                    csrValA);
 
   cusparseDnVecDescr_t vecX;
-  cusparseCreateDnVec(&vecX, n, x, CUDA_R_64F);
+    cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
 
   cusparseDnVecDescr_t vecY;
-  cusparseCreateDnVec(&vecY, n, y, CUDA_R_64F);
+  cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
 
   cusparseStatus_t result = cusparseSpMV(
     handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer);
@@ -1514,8 +1498,8 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
                                               T* A,
                                               int lda,
                                               size_t* buffer_size,
-                                              bool row_major = false,
-                                              cudaStream_t stream);
+                                              cudaStream_t stream,
+                                              bool row_major = false);
 
 template <>
 cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
@@ -1529,31 +1513,29 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
                                               float* A,
                                               int lda,
                                               size_t* buffer_size,
-                                              bool row_major = false,
-                                              cudaStream_t stream)
+                                              cudaStream_t stream,
+                                              bool row_major)
 {
   cusparseOrder_t order =
-    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA;
-  cusparseCreateCsr(&matA,
+    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
+
+  cusparseSpMatDescr_t matA;
+    cusparsecreatecsr(&matA,
                     m,
                     n,
                     nnz,
                     csrRowPtrA,
                     csrColIndA,
-                    csrValA,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_BASE_ZERO,
-                    CUDA_R_32F);
+                    csrValA);
 
   cusparseDnMatDescr_t matB;
-  cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL);
+    cusparsecreatednmat(&matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
 
   cusparseStatus_t result = cusparseSparseToDense_bufferSize(
     handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
 
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
   return result;
 }
@@ -1570,31 +1552,28 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
                                               double* A,
                                               int lda,
                                               size_t* buffer_size,
-                                              bool row_major = false,
-                                              cudaStream_t stream)
+                                              cudaStream_t stream,
+                                              bool row_major)
 {
   cusparseOrder_t order =
-    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA;
-  cusparseCreateCsr(&matA,
+    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
+  cusparseSpMatDescr_t matA;
+    cusparsecreatecsr(&matA,
                     m,
                     n,
                     nnz,
                     csrRowPtrA,
                     csrColIndA,
-                    csrValA,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_BASE_ZERO,
-                    CUDA_R_64F);
+                    csrValA);
 
   cusparseDnMatDescr_t matB;
-  cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL);
+    cusparsecreatednmat(&matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
 
   cusparseStatus_t result = cusparseSparseToDense_bufferSize(
     handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
 
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+    RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
   return result;
 }
@@ -1627,32 +1606,29 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
                                           int lda,
                                           void* buffer,
                                           cudaStream_t stream,
-                                          bool row_major = false)
+                                          bool row_major)
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
   cusparseOrder_t order =
-    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA;
-  cusparseCreateCsr(&matA,
+    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
+  cusparseSpMatDescr_t matA;
+    cusparsecreatecsr(&matA,
                     m,
                     n,
                     nnz,
                     csrRowPtrA,
                     csrColIndA,
-                    csrValA,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_BASE_ZERO,
-                    CUDA_R_32F);
+                    csrValA);
 
   cusparseDnMatDescr_t matB;
-  cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_32F, CUSPARSE_ORDER_COL);
+    cusparsecreatednmat(&matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
 
   cusparseStatus_t result =
     cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
 
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+    RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
   return result;
 }
@@ -1669,31 +1645,28 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
                                           int lda,
                                           void* buffer,
                                           cudaStream_t stream,
-                                          bool row_major = false)
+                                          bool row_major )
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
   cusparseOrder_t order =
-    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL cusparseSpMatDescr_t matA;
-  cusparseCreateCsr(&matA,
+    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
+  cusparseSpMatDescr_t matA;
+    cusparsecreatecsr(&matA,
                     m,
                     n,
                     nnz,
                     csrRowPtrA,
                     csrColIndA,
-                    csrValA,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_32I,
-                    CUSPARSE_INDEX_BASE_ZERO,
-                    CUDA_R_64F);
+                    csrValA);
 
   cusparseDnMatDescr_t matB;
-  cusparseCreateDnMat(handle, m, n, lda, A, CUDA_R_64F, CUSPARSE_ORDER_COL);
+  cusparsecreatednmat(&matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
 
   cusparseStatus_t result =
     cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
 
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(vecB));
+    RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
   return result;
 }

From ebd40325d6c6354315d224de5a653c0235ece314 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 31 Jan 2022 16:41:04 -0500
Subject: [PATCH 06/16] Moving cusparse wrappers to detail and adjusting all
 includes

---
 cpp/doxygen/Doxyfile.in                                  | 2 +-
 cpp/include/raft/sparse/convert/detail/coo.cuh           | 2 +-
 cpp/include/raft/sparse/convert/detail/csr.cuh           | 2 +-
 cpp/include/raft/sparse/convert/detail/dense.cuh         | 2 +-
 cpp/include/raft/sparse/detail/csr.cuh                   | 2 +-
 cpp/include/raft/sparse/detail/cusparse_macros.h         | 2 +-
 cpp/include/raft/sparse/detail/cusparse_wrappers.h       | 1 +
 cpp/include/raft/sparse/distance/detail/bin_distance.cuh | 2 +-
 cpp/include/raft/sparse/distance/detail/coo_spmv.cuh     | 2 +-
 cpp/include/raft/sparse/distance/detail/ip_distance.cuh  | 2 +-
 cpp/include/raft/sparse/distance/detail/l2_distance.cuh  | 2 +-
 cpp/include/raft/sparse/distance/detail/lp_distance.cuh  | 2 +-
 cpp/include/raft/sparse/distance/distance.hpp            | 2 +-
 cpp/include/raft/sparse/linalg/detail/add.cuh            | 2 +-
 cpp/include/raft/sparse/linalg/detail/norm.cuh           | 2 +-
 cpp/include/raft/sparse/linalg/detail/spectral.cuh       | 2 +-
 cpp/include/raft/sparse/linalg/detail/symmetrize.cuh     | 2 +-
 cpp/include/raft/sparse/linalg/detail/transpose.h        | 2 +-
 cpp/include/raft/sparse/op/detail/filter.cuh             | 2 +-
 cpp/include/raft/sparse/op/detail/reduce.cuh             | 2 +-
 cpp/include/raft/sparse/op/detail/row_op.cuh             | 2 +-
 cpp/include/raft/sparse/op/detail/slice.h                | 2 +-
 cpp/include/raft/sparse/op/detail/sort.h                 | 2 +-
 cpp/include/raft/spectral/matrix_wrappers.hpp            | 2 +-
 cpp/test/sparse/csr_row_slice.cu                         | 2 +-
 cpp/test/sparse/csr_to_dense.cu                          | 2 +-
 cpp/test/sparse/csr_transpose.cu                         | 2 +-
 cpp/test/sparse/dist_coo_spmv.cu                         | 2 +-
 cpp/test/sparse/distance.cu                              | 2 +-
 cpp/test/sparse/knn.cu                                   | 4 ++--
 30 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/cpp/doxygen/Doxyfile.in b/cpp/doxygen/Doxyfile.in
index eb27b2d02c..c83224050e 100644
--- a/cpp/doxygen/Doxyfile.in
+++ b/cpp/doxygen/Doxyfile.in
@@ -815,7 +815,7 @@ RECURSIVE              = YES
 
 EXCLUDE                = @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/linalg/symmetrize.hpp \ # Contains device code
                          @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/csr.hpp \    # Contains device code
-                         @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/cusparse_wrappers.h
+                         @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/detail/cusparse_wrappers.h
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
diff --git a/cpp/include/raft/sparse/convert/detail/coo.cuh b/cpp/include/raft/sparse/convert/detail/coo.cuh
index fd300dcdba..c37087789c 100644
--- a/cpp/include/raft/sparse/convert/detail/coo.cuh
+++ b/cpp/include/raft/sparse/convert/detail/coo.cuh
@@ -19,7 +19,7 @@
 #include <cusparse_v2.h>
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh
index 0f4dc4976c..b787a9d588 100644
--- a/cpp/include/raft/sparse/convert/detail/csr.cuh
+++ b/cpp/include/raft/sparse/convert/detail/csr.cuh
@@ -21,7 +21,7 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/device_ptr.h>
diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh
index 3c18a40526..0f3a4fb5e5 100644
--- a/cpp/include/raft/sparse/convert/detail/dense.cuh
+++ b/cpp/include/raft/sparse/convert/detail/dense.cuh
@@ -19,7 +19,7 @@
 #include <cusparse_v2.h>
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
diff --git a/cpp/include/raft/sparse/detail/csr.cuh b/cpp/include/raft/sparse/detail/csr.cuh
index cb39f34ba4..a256ac402b 100644
--- a/cpp/include/raft/sparse/detail/csr.cuh
+++ b/cpp/include/raft/sparse/detail/csr.cuh
@@ -19,7 +19,7 @@
 #include <cusparse_v2.h>
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h
index 080083006d..7e89014b61 100644
--- a/cpp/include/raft/sparse/detail/cusparse_macros.h
+++ b/cpp/include/raft/sparse/detail/cusparse_macros.h
@@ -29,7 +29,7 @@
 //(2.) to enforce a lower version,
 //
 //`#define CUDA_ENFORCE_LOWER
-// #include <raft/sparse/cusparse_wrappers.h>`
+// #include <raft/sparse/detail/cusparse_wrappers.h>`
 //
 // (i.e., before including this header)
 //
diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
index 02df917f99..922425221f 100644
--- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cusparse_v2.h>
 #include <raft/error.hpp>
 #include <raft/sparse/detail/cusparse_macros.h>
 
diff --git a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh
index 07bf251f14..5ba48cf9ca 100644
--- a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh
@@ -21,7 +21,7 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/linalg/distance_type.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/sparse/detail/utils.h>
 #include <raft/sparse/distance/common.h>
 #include <raft/sparse/distance/detail/ip_distance.cuh>
diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
index c23a2b1537..046b65a0f0 100644
--- a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
+++ b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
@@ -22,7 +22,7 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/mr/device/buffer.hpp>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include "../../csr.hpp"
 #include "../../detail/utils.h"
diff --git a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh
index 00054a8e96..1c5c51b654 100644
--- a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh
@@ -20,7 +20,7 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/linalg/distance_type.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <raft/sparse/convert/csr.hpp>
 #include <raft/sparse/convert/dense.hpp>
diff --git a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
index 7f63a7fec8..edbc50cc2b 100644
--- a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
@@ -23,7 +23,7 @@
 #include <raft/linalg/distance_type.h>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/sparse/csr.hpp>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/sparse/detail/utils.h>
 #include <raft/sparse/distance/common.h>
 #include <raft/sparse/distance/detail/ip_distance.cuh>
diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
index 1e907c98eb..142a0d6093 100644
--- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
@@ -21,7 +21,7 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/linalg/distance_type.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <rmm/device_uvector.hpp>
 
diff --git a/cpp/include/raft/sparse/distance/distance.hpp b/cpp/include/raft/sparse/distance/distance.hpp
index 2f121dce33..428883c3df 100644
--- a/cpp/include/raft/sparse/distance/distance.hpp
+++ b/cpp/include/raft/sparse/distance/distance.hpp
@@ -22,7 +22,7 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/linalg/distance_type.h>
 #include <raft/mr/device/buffer.hpp>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <raft/sparse/convert/coo.hpp>
 #include <raft/sparse/convert/csr.hpp>
diff --git a/cpp/include/raft/sparse/linalg/detail/add.cuh b/cpp/include/raft/sparse/linalg/detail/add.cuh
index 769c7e523f..b288d0a603 100644
--- a/cpp/include/raft/sparse/linalg/detail/add.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/add.cuh
@@ -20,7 +20,7 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh
index f4b4f65f7e..b7420a55e7 100644
--- a/cpp/include/raft/sparse/linalg/detail/norm.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh
@@ -19,7 +19,7 @@
 #include <cusparse_v2.h>
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh
index de62f25ffa..782ce2b909 100644
--- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh
@@ -17,7 +17,7 @@
 #include <raft/cudart_utils.h>
 
 #include <raft/cuda_utils.cuh>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/spectral/partition.hpp>
 #include <rmm/device_uvector.hpp>
 
diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh
index 045f0e14bc..4384f2ba55 100644
--- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh
@@ -20,7 +20,7 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/include/raft/sparse/linalg/detail/transpose.h b/cpp/include/raft/sparse/linalg/detail/transpose.h
index be74a72817..699c67544e 100644
--- a/cpp/include/raft/sparse/linalg/detail/transpose.h
+++ b/cpp/include/raft/sparse/linalg/detail/transpose.h
@@ -20,7 +20,7 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/device_ptr.h>
diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh
index 6e5d518619..80a6584251 100644
--- a/cpp/include/raft/sparse/op/detail/filter.cuh
+++ b/cpp/include/raft/sparse/op/detail/filter.cuh
@@ -20,7 +20,7 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/include/raft/sparse/op/detail/reduce.cuh b/cpp/include/raft/sparse/op/detail/reduce.cuh
index 074a139ba9..697e9cce7f 100644
--- a/cpp/include/raft/sparse/op/detail/reduce.cuh
+++ b/cpp/include/raft/sparse/op/detail/reduce.cuh
@@ -21,7 +21,7 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/mr/device/buffer.hpp>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <raft/device_atomics.cuh>
 #include <raft/sparse/op/sort.hpp>
diff --git a/cpp/include/raft/sparse/op/detail/row_op.cuh b/cpp/include/raft/sparse/op/detail/row_op.cuh
index b8803d4926..4754f753d4 100644
--- a/cpp/include/raft/sparse/op/detail/row_op.cuh
+++ b/cpp/include/raft/sparse/op/detail/row_op.cuh
@@ -20,7 +20,7 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
diff --git a/cpp/include/raft/sparse/op/detail/slice.h b/cpp/include/raft/sparse/op/detail/slice.h
index 3c47d19a0b..662ba55f96 100644
--- a/cpp/include/raft/sparse/op/detail/slice.h
+++ b/cpp/include/raft/sparse/op/detail/slice.h
@@ -21,7 +21,7 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/linalg/unary_op.cuh>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h
index 94feda1e76..9fc7cac5e3 100644
--- a/cpp/include/raft/sparse/op/detail/sort.h
+++ b/cpp/include/raft/sparse/op/detail/sort.h
@@ -19,7 +19,7 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/sparse/coo.hpp>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/sparse/detail/utils.h>
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp
index a260e75505..bd1866a4f0 100644
--- a/cpp/include/raft/spectral/matrix_wrappers.hpp
+++ b/cpp/include/raft/spectral/matrix_wrappers.hpp
@@ -18,7 +18,7 @@
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
 #include <raft/linalg/cublas_wrappers.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/fill.h>
diff --git a/cpp/test/sparse/csr_row_slice.cu b/cpp/test/sparse/csr_row_slice.cu
index e37827d18d..cdcc2fdd7f 100644
--- a/cpp/test/sparse/csr_row_slice.cu
+++ b/cpp/test/sparse/csr_row_slice.cu
@@ -19,7 +19,7 @@
 #include <raft/handle.hpp>
 
 #include <gtest/gtest.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/sparse/op/slice.hpp>
 
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/test/sparse/csr_to_dense.cu b/cpp/test/sparse/csr_to_dense.cu
index 8dec9492bb..f3291f3bb9 100644
--- a/cpp/test/sparse/csr_to_dense.cu
+++ b/cpp/test/sparse/csr_to_dense.cu
@@ -20,7 +20,7 @@
 
 #include <gtest/gtest.h>
 #include <raft/sparse/convert/dense.hpp>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <rmm/device_uvector.hpp>
 
diff --git a/cpp/test/sparse/csr_transpose.cu b/cpp/test/sparse/csr_transpose.cu
index b1a432422e..e6df4cada2 100644
--- a/cpp/test/sparse/csr_transpose.cu
+++ b/cpp/test/sparse/csr_transpose.cu
@@ -20,7 +20,7 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/sparse/linalg/transpose.hpp>
 
 #include "../test_utils.h"
diff --git a/cpp/test/sparse/dist_coo_spmv.cu b/cpp/test/sparse/dist_coo_spmv.cu
index dc136d6f18..67a61438d1 100644
--- a/cpp/test/sparse/dist_coo_spmv.cu
+++ b/cpp/test/sparse/dist_coo_spmv.cu
@@ -21,7 +21,7 @@
 #include <raft/cudart_utils.h>
 #include <raft/linalg/distance_type.h>
 #include <raft/linalg/unary_op.cuh>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_uvector.hpp>
 
 #include <raft/sparse/convert/coo.hpp>
diff --git a/cpp/test/sparse/distance.cu b/cpp/test/sparse/distance.cu
index f4f346561c..a9781567ab 100644
--- a/cpp/test/sparse/distance.cu
+++ b/cpp/test/sparse/distance.cu
@@ -20,7 +20,7 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/linalg/distance_type.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <raft/sparse/distance/distance.hpp>
 
diff --git a/cpp/test/sparse/knn.cu b/cpp/test/sparse/knn.cu
index bcfa796931..8da9d1e443 100644
--- a/cpp/test/sparse/knn.cu
+++ b/cpp/test/sparse/knn.cu
@@ -19,11 +19,11 @@
 
 #include "../test_utils.h"
 #include <raft/linalg/distance_type.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/sparse/selection/knn.hpp>
 
 #include <raft/cudart_utils.h>
-#include <raft/sparse/cusparse_wrappers.h>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 namespace raft {
 namespace sparse {

From 47d5176d769422d85483854fb08813c478df371b Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 31 Jan 2022 16:41:51 -0500
Subject: [PATCH 07/16] Updating style

---
 .../raft/sparse/detail/cusparse_wrappers.h    | 110 +++++-------------
 1 file changed, 31 insertions(+), 79 deletions(-)

diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
index 922425221f..5a512fea56 100644
--- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -774,19 +774,13 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
   cusparseSpMatDescr_t matA;
-    cusparsecreatecsr(&matA,
-                    m,
-                    n,
-                    nnz,
-                    csrRowPtrA,
-                    csrColIndA,
-                    csrValA);
+  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
 
   cusparseDnVecDescr_t vecX;
-    cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
+  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
 
   cusparseDnVecDescr_t vecY;
-    cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
+  cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
 
   cusparseStatus_t result = cusparseSpMV_bufferSize(handle,
                                                     transA,
@@ -825,19 +819,13 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
   cusparseSpMatDescr_t matA;
-    cusparsecreatecsr(&matA,
-                    m,
-                    n,
-                    nnz,
-                    csrRowPtrA,
-                    csrColIndA,
-                    csrValA);
+  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
 
   cusparseDnVecDescr_t vecX;
-    cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
+  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
 
   cusparseDnVecDescr_t vecY;
-    cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
+  cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
 
   cusparseStatus_t result = cusparseSpMV_bufferSize(handle,
                                                     transA,
@@ -894,19 +882,13 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
   cusparseSpMatDescr_t matA;
-    cusparsecreatecsr(&matA,
-                    m,
-                    n,
-                    nnz,
-                    csrRowPtrA,
-                    csrColIndA,
-                    csrValA);
+  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
 
   cusparseDnVecDescr_t vecX;
-    cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
+  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
 
   cusparseDnVecDescr_t vecY;
-    cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
+  cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
 
   cusparseStatus_t result = cusparseSpMV(
     handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer);
@@ -938,16 +920,10 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
   cusparseSpMatDescr_t matA;
-    cusparsecreatecsr(&matA,
-                    m,
-                    n,
-                    nnz,
-                    csrRowPtrA,
-                    csrColIndA,
-                    csrValA);
+  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
 
   cusparseDnVecDescr_t vecX;
-    cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
+  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
 
   cusparseDnVecDescr_t vecY;
   cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
@@ -1517,20 +1493,14 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
                                               cudaStream_t stream,
                                               bool row_major)
 {
-  cusparseOrder_t order =
-    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
+  cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
 
   cusparseSpMatDescr_t matA;
-    cusparsecreatecsr(&matA,
-                    m,
-                    n,
-                    nnz,
-                    csrRowPtrA,
-                    csrColIndA,
-                    csrValA);
+  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
 
   cusparseDnMatDescr_t matB;
-    cusparsecreatednmat(&matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
+  cusparsecreatednmat(
+    &matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
 
   cusparseStatus_t result = cusparseSparseToDense_bufferSize(
     handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
@@ -1556,25 +1526,19 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
                                               cudaStream_t stream,
                                               bool row_major)
 {
-  cusparseOrder_t order =
-    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
+  cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
   cusparseSpMatDescr_t matA;
-    cusparsecreatecsr(&matA,
-                    m,
-                    n,
-                    nnz,
-                    csrRowPtrA,
-                    csrColIndA,
-                    csrValA);
+  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
 
   cusparseDnMatDescr_t matB;
-    cusparsecreatednmat(&matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
+  cusparsecreatednmat(
+    &matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
 
   cusparseStatus_t result = cusparseSparseToDense_bufferSize(
     handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
 
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-    RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
   return result;
 }
@@ -1611,25 +1575,19 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
-  cusparseOrder_t order =
-    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
+  cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
   cusparseSpMatDescr_t matA;
-    cusparsecreatecsr(&matA,
-                    m,
-                    n,
-                    nnz,
-                    csrRowPtrA,
-                    csrColIndA,
-                    csrValA);
+  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
 
   cusparseDnMatDescr_t matB;
-    cusparsecreatednmat(&matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
+  cusparsecreatednmat(
+    &matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
 
   cusparseStatus_t result =
     cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
 
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-    RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
   return result;
 }
@@ -1646,28 +1604,22 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
                                           int lda,
                                           void* buffer,
                                           cudaStream_t stream,
-                                          bool row_major )
+                                          bool row_major)
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
-  cusparseOrder_t order =
-    row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
+  cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
   cusparseSpMatDescr_t matA;
-    cusparsecreatecsr(&matA,
-                    m,
-                    n,
-                    nnz,
-                    csrRowPtrA,
-                    csrColIndA,
-                    csrValA);
+  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
 
   cusparseDnMatDescr_t matB;
-  cusparsecreatednmat(&matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
+  cusparsecreatednmat(
+    &matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
 
   cusparseStatus_t result =
     cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
 
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
-    RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
   return result;
 }

From d9e8eec15dec156a0c762e6b60321a4a0e86287a Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 31 Jan 2022 17:55:30 -0500
Subject: [PATCH 08/16] Fixing definitions.

---
 cpp/include/raft/sparse/convert/dense.hpp     |   3 +-
 .../raft/sparse/convert/detail/dense.cuh      |   1 +
 .../raft/sparse/detail/cusparse_wrappers.h    | 156 ++++++++++++------
 cpp/test/sparse/csr_to_dense.cu               |   3 +
 4 files changed, 116 insertions(+), 47 deletions(-)

diff --git a/cpp/include/raft/sparse/convert/dense.hpp b/cpp/include/raft/sparse/convert/dense.hpp
index c8d3b46d03..ac8598033d 100644
--- a/cpp/include/raft/sparse/convert/dense.hpp
+++ b/cpp/include/raft/sparse/convert/dense.hpp
@@ -44,6 +44,7 @@ template <typename value_idx, typename value_t>
 void csr_to_dense(cusparseHandle_t handle,
                   value_idx nrows,
                   value_idx ncols,
+                  value_idx nnz,
                   const value_idx* csr_indptr,
                   const value_idx* csr_indices,
                   const value_t* csr_data,
@@ -53,7 +54,7 @@ void csr_to_dense(cusparseHandle_t handle,
                   bool row_major = true)
 {
   detail::csr_to_dense<value_idx, value_t>(
-    handle, nrows, ncols, csr_indptr, csr_indices, csr_data, lda, out, stream, row_major);
+    handle, nrows, ncols, nnz, csr_indptr, csr_indices, csr_data, lda, out, stream, row_major);
 }
 
 };  // end NAMESPACE convert
diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh
index 0f3a4fb5e5..15f2f8518e 100644
--- a/cpp/include/raft/sparse/convert/detail/dense.cuh
+++ b/cpp/include/raft/sparse/convert/detail/dense.cuh
@@ -31,6 +31,7 @@
 #include <iostream>
 
 #include <raft/sparse/detail/utils.h>
+#include <rmm/device_uvector.hpp>
 
 namespace raft {
 namespace sparse {
diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
index 5a512fea56..e76fba945e 100644
--- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -774,10 +774,16 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
   cusparseSpMatDescr_t matA;
-  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
+  cusparsecreatecsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    const_cast<int*>(csrRowPtrA),
+                    const_cast<int*>(csrColIndA),
+                    const_cast<float*>(csrValA));
 
   cusparseDnVecDescr_t vecX;
-  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
+  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), const_cast<float*>(x));
 
   cusparseDnVecDescr_t vecY;
   cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
@@ -819,10 +825,16 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
   cusparseSpMatDescr_t matA;
-  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
+  cusparsecreatecsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    const_cast<int*>(csrRowPtrA),
+                    const_cast<int*>(csrColIndA),
+                    const_cast<double*>(csrValA));
 
   cusparseDnVecDescr_t vecX;
-  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
+  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), const_cast<double*>(x));
 
   cusparseDnVecDescr_t vecY;
   cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
@@ -882,10 +894,16 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
   cusparseSpMatDescr_t matA;
-  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
+  cusparsecreatecsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    const_cast<int*>(csrRowPtrA),
+                    const_cast<int*>(csrColIndA),
+                    const_cast<float*>(csrValA));
 
   cusparseDnVecDescr_t vecX;
-  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
+  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), const_cast<float*>(x));
 
   cusparseDnVecDescr_t vecY;
   cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
@@ -920,10 +938,16 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
   cusparseSpMatDescr_t matA;
-  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
+  cusparsecreatecsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    const_cast<int*>(csrRowPtrA),
+                    const_cast<int*>(csrColIndA),
+                    const_cast<double*>(csrValA));
 
   cusparseDnVecDescr_t vecX;
-  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), x);
+  cusparsecreatednvec(&vecX, static_cast<int64_t>(n), const_cast<double*>(x));
 
   cusparseDnVecDescr_t vecY;
   cusparsecreatednvec(&vecY, static_cast<int64_t>(n), y);
@@ -1479,28 +1503,38 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
                                               bool row_major = false);
 
 template <>
-cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const cusparseMatDescr_t descrA,
-                                              const float* csrValA,
-                                              const int* csrRowPtrA,
-                                              const int* csrColIndA,
-                                              float* A,
-                                              int lda,
-                                              size_t* buffer_size,
-                                              cudaStream_t stream,
-                                              bool row_major)
+inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
+                                                     int m,
+                                                     int n,
+                                                     int nnz,
+                                                     const cusparseMatDescr_t descrA,
+                                                     const float* csrValA,
+                                                     const int* csrRowPtrA,
+                                                     const int* csrColIndA,
+                                                     float* A,
+                                                     int lda,
+                                                     size_t* buffer_size,
+                                                     cudaStream_t stream,
+                                                     bool row_major)
 {
   cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
 
   cusparseSpMatDescr_t matA;
-  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
+  cusparsecreatecsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    const_cast<int*>(csrRowPtrA),
+                    const_cast<int*>(csrColIndA),
+                    const_cast<float*>(csrValA));
 
   cusparseDnMatDescr_t matB;
-  cusparsecreatednmat(
-    &matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
+  cusparsecreatednmat(&matB,
+                      static_cast<int64_t>(m),
+                      static_cast<int64_t>(n),
+                      static_cast<int64_t>(lda),
+                      const_cast<float*>(A),
+                      order);
 
   cusparseStatus_t result = cusparseSparseToDense_bufferSize(
     handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
@@ -1512,27 +1546,37 @@ cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
 }
 
 template <>
-cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
-                                              int m,
-                                              int n,
-                                              int nnz,
-                                              const cusparseMatDescr_t descrA,
-                                              const double* csrValA,
-                                              const int* csrRowPtrA,
-                                              const int* csrColIndA,
-                                              double* A,
-                                              int lda,
-                                              size_t* buffer_size,
-                                              cudaStream_t stream,
-                                              bool row_major)
+inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
+                                                     int m,
+                                                     int n,
+                                                     int nnz,
+                                                     const cusparseMatDescr_t descrA,
+                                                     const double* csrValA,
+                                                     const int* csrRowPtrA,
+                                                     const int* csrColIndA,
+                                                     double* A,
+                                                     int lda,
+                                                     size_t* buffer_size,
+                                                     cudaStream_t stream,
+                                                     bool row_major)
 {
   cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
   cusparseSpMatDescr_t matA;
-  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
+  cusparsecreatecsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    const_cast<int*>(csrRowPtrA),
+                    const_cast<int*>(csrColIndA),
+                    const_cast<double*>(csrValA));
 
   cusparseDnMatDescr_t matB;
-  cusparsecreatednmat(
-    &matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
+  cusparsecreatednmat(&matB,
+                      static_cast<int64_t>(m),
+                      static_cast<int64_t>(n),
+                      static_cast<int64_t>(lda),
+                      const_cast<double*>(A),
+                      order);
 
   cusparseStatus_t result = cusparseSparseToDense_bufferSize(
     handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size);
@@ -1577,11 +1621,21 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
 
   cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
   cusparseSpMatDescr_t matA;
-  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
+  cusparsecreatecsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    const_cast<int*>(csrRowPtrA),
+                    const_cast<int*>(csrColIndA),
+                    const_cast<float*>(csrValA));
 
   cusparseDnMatDescr_t matB;
-  cusparsecreatednmat(
-    &matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
+  cusparsecreatednmat(&matB,
+                      static_cast<int64_t>(m),
+                      static_cast<int64_t>(n),
+                      static_cast<int64_t>(lda),
+                      const_cast<float*>(A),
+                      order);
 
   cusparseStatus_t result =
     cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
@@ -1609,11 +1663,21 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
   cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
   cusparseSpMatDescr_t matA;
-  cusparsecreatecsr(&matA, m, n, nnz, csrRowPtrA, csrColIndA, csrValA);
+  cusparsecreatecsr(&matA,
+                    m,
+                    n,
+                    nnz,
+                    const_cast<int*>(csrRowPtrA),
+                    const_cast<int*>(csrColIndA),
+                    const_cast<double*>(csrValA));
 
   cusparseDnMatDescr_t matB;
-  cusparsecreatednmat(
-    &matB, static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(lda), A, order);
+  cusparsecreatednmat(&matB,
+                      static_cast<int64_t>(m),
+                      static_cast<int64_t>(n),
+                      static_cast<int64_t>(lda),
+                      const_cast<double*>(A),
+                      order);
 
   cusparseStatus_t result =
     cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer);
diff --git a/cpp/test/sparse/csr_to_dense.cu b/cpp/test/sparse/csr_to_dense.cu
index f3291f3bb9..60447e3a81 100644
--- a/cpp/test/sparse/csr_to_dense.cu
+++ b/cpp/test/sparse/csr_to_dense.cu
@@ -36,6 +36,7 @@ template <typename value_idx, typename value_t>
 struct CSRToDenseInputs {
   value_idx nrows;
   value_idx ncols;
+  value_idx nnz;
 
   std::vector<value_idx> indptr_h;
   std::vector<value_idx> indices_h;
@@ -95,6 +96,7 @@ class CSRToDenseTest : public ::testing::TestWithParam<CSRToDenseInputs<value_id
     convert::csr_to_dense(handle,
                           params.nrows,
                           params.ncols,
+                          params.nnz,
                           indptr.data(),
                           indices.data(),
                           data.data(),
@@ -135,6 +137,7 @@ class CSRToDenseTest : public ::testing::TestWithParam<CSRToDenseInputs<value_id
 const std::vector<CSRToDenseInputs<int, float>> inputs_i32_f = {
   {4,
    4,
+   8,
    {0, 2, 4, 6, 8},
    {0, 1, 2, 3, 0, 1, 2, 3},  // indices
    {1.0f, 3.0f, 1.0f, 5.0f, 50.0f, 28.0f, 16.0f, 2.0f},

From 9a30ac561adbdf50c03f97d8e81c26f1a4c3a3cf Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 31 Jan 2022 18:11:23 -0500
Subject: [PATCH 09/16] Changing cusparse include

---
 cpp/include/raft/sparse/detail/cusparse_macros.h   | 2 +-
 cpp/include/raft/sparse/detail/cusparse_wrappers.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h
index 7e89014b61..1f9f0e5175 100644
--- a/cpp/include/raft/sparse/detail/cusparse_macros.h
+++ b/cpp/include/raft/sparse/detail/cusparse_macros.h
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <cusparse_v2.h>
+#include <cusparse.h>
 #include <raft/error.hpp>
 ///@todo: enable this once logging is enabled
 //#include <cuml/common/logger.hpp>
diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
index e76fba945e..c1b6934bee 100644
--- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <cusparse_v2.h>
+#include <cusparse.h>
 #include <raft/error.hpp>
 #include <raft/sparse/detail/cusparse_macros.h>
 

From 8dd3564301414547e14ca9872ea85c73a5408ce4 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 31 Jan 2022 19:10:43 -0500
Subject: [PATCH 10/16] Adding doxygen for missing param

---
 cpp/include/raft/sparse/convert/detail/dense.cuh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh
index 15f2f8518e..bc6828d471 100644
--- a/cpp/include/raft/sparse/convert/detail/dense.cuh
+++ b/cpp/include/raft/sparse/convert/detail/dense.cuh
@@ -68,6 +68,7 @@ __global__ void csr_to_dense_warp_per_row_kernel(
  * @param[in] handle : cusparse handle for conversion
  * @param[in] nrows : number of rows in CSR
  * @param[in] ncols : number of columns in CSR
+ * @param[in] nnz : the number of nonzeros in CSR
  * @param[in] csr_indptr : CSR row index pointer array
  * @param[in] csr_indices : CSR column indices array
  * @param[in] csr_data : CSR data array

From 75c095cf0e4b1516e1c289fa270041296ed67c98 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 1 Feb 2022 10:32:38 -0500
Subject: [PATCH 11/16] Fixing cusparse API compatibility for CUDA versions <
 11.2

---
 .../raft/sparse/detail/cusparse_wrappers.h    | 131 +++++++++++++++++-
 1 file changed, 127 insertions(+), 4 deletions(-)

diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
index c1b6934bee..febe1c0e78 100644
--- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -773,6 +773,7 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
+#if CUDART_VERSION >= 11020
   cusparseSpMatDescr_t matA;
   cusparsecreatecsr(&matA,
                     m,
@@ -802,8 +803,32 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
-
   return result;
+
+#else
+
+  return cusparseCsrmvEx_bufferSize(handle,
+                                    alg,
+                                    transA,
+                                    m,
+                                    n,
+                                    nnz,
+                                    alpha,
+                                    CUDA_R_32F,
+                                    descrA,
+                                    csrValA,
+                                    CUDA_R_32F,
+                                    csrRowPtrA,
+                                    csrColIndA,
+                                    x,
+                                    CUDA_R_32F,
+                                    beta,
+                                    CUDA_R_32F,
+                                    y,
+                                    CUDA_R_32F,
+                                    CUDA_R_32F,
+                                    bufferSizeInBytes);
+#endif
 }
 template <>
 inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
@@ -824,6 +849,8 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
                                                    cudaStream_t stream)
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+#if CUDART_VERSION >= 11020
   cusparseSpMatDescr_t matA;
   cusparsecreatecsr(&matA,
                     m,
@@ -853,8 +880,30 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle,
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
-
   return result;
+#else
+  return cusparseCsrmvEx_bufferSize(handle,
+                                    alg,
+                                    transA,
+                                    m,
+                                    n,
+                                    nnz,
+                                    alpha,
+                                    CUDA_R_64F,
+                                    descrA,
+                                    csrValA,
+                                    CUDA_R_64F,
+                                    csrRowPtrA,
+                                    csrColIndA,
+                                    x,
+                                    CUDA_R_64F,
+                                    beta,
+                                    CUDA_R_64F,
+                                    y,
+                                    CUDA_R_64F,
+                                    CUDA_R_64F,
+                                    bufferSizeInBytes);
+#endif
 }
 
 template <typename T>
@@ -893,6 +942,8 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
                                         cudaStream_t stream)
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+#if CUDART_VERSION >= 11020
   cusparseSpMatDescr_t matA;
   cusparsecreatecsr(&matA,
                     m,
@@ -914,8 +965,30 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
-
   return result;
+#else
+  return cusparseCsrmvEx(handle,
+                         alg,
+                         transA,
+                         m,
+                         n,
+                         nnz,
+                         alpha,
+                         CUDA_R_32F,
+                         descrA,
+                         csrValA,
+                         CUDA_R_32F,
+                         csrRowPtrA,
+                         csrColIndA,
+                         x,
+                         CUDA_R_32F,
+                         beta,
+                         CUDA_R_32F,
+                         y,
+                         CUDA_R_32F,
+                         CUDA_R_32F,
+                         buffer);
+#endif
 }
 template <>
 inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
@@ -937,6 +1010,7 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
+#if CUDART_VERSION >= 11020
   cusparseSpMatDescr_t matA;
   cusparsecreatecsr(&matA,
                     m,
@@ -958,8 +1032,32 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle,
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY));
-
   return result;
+
+#else
+
+  return cusparseCsrmvEx(handle,
+                         alg,
+                         transA,
+                         m,
+                         n,
+                         nnz,
+                         alpha,
+                         CUDA_R_64F,
+                         descrA,
+                         csrValA,
+                         CUDA_R_64F,
+                         csrRowPtrA,
+                         csrColIndA,
+                         x,
+                         CUDA_R_64F,
+                         beta,
+                         CUDA_R_64F,
+                         y,
+                         CUDA_R_64F,
+                         CUDA_R_64F,
+                         buffer);
+#endif
 }
 
 /** @} */
@@ -1517,6 +1615,7 @@ inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
                                                      cudaStream_t stream,
                                                      bool row_major)
 {
+#if CUDART_VERSION >= 11020
   cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
 
   cusparseSpMatDescr_t matA;
@@ -1542,6 +1641,12 @@ inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
+#else
+
+  cusparseStatus_t result = CUSPARSE_STATUS_SUCCESS;
+  buffer_size[0]          = 0;
+
+#endif
   return result;
 }
 
@@ -1560,6 +1665,7 @@ inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
                                                      cudaStream_t stream,
                                                      bool row_major)
 {
+#if CUDART_VERSION >= 11020
   cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
   cusparseSpMatDescr_t matA;
   cusparsecreatecsr(&matA,
@@ -1584,6 +1690,12 @@ inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle,
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
+#else
+  cusparseStatus_t result = CUSPARSE_STATUS_SUCCESS;
+  buffer_size[0]          = 0;
+
+#endif
+
   return result;
 }
 
@@ -1619,6 +1731,7 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
 
+#if CUDART_VERSION >= 11020
   cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
   cusparseSpMatDescr_t matA;
   cusparsecreatecsr(&matA,
@@ -1643,6 +1756,10 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
+#else
+  return cusparseScsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda);
+#endif
+
   return result;
 }
 template <>
@@ -1661,6 +1778,8 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
                                           bool row_major)
 {
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
+
+#if CUDART_VERSION >= 11020
   cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
   cusparseSpMatDescr_t matA;
   cusparsecreatecsr(&matA,
@@ -1684,6 +1803,10 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
 
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
+#else
+
+  return cusparseDcsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda);
+#endif
 
   return result;
 }

From 0ce2d3a6285b78bc7886a132e65e935b2783b7af Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 1 Feb 2022 10:33:49 -0500
Subject: [PATCH 12/16] Fixing doc

---
 cpp/include/raft/sparse/convert/dense.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/include/raft/sparse/convert/dense.hpp b/cpp/include/raft/sparse/convert/dense.hpp
index ac8598033d..e28cfb4d2c 100644
--- a/cpp/include/raft/sparse/convert/dense.hpp
+++ b/cpp/include/raft/sparse/convert/dense.hpp
@@ -32,6 +32,7 @@ namespace convert {
  * @param[in] handle : cusparse handle for conversion
  * @param[in] nrows : number of rows in CSR
  * @param[in] ncols : number of columns in CSR
+ * @param[in] nnz : number of nonzeros in CSR 
  * @param[in] csr_indptr : CSR row index pointer array
  * @param[in] csr_indices : CSR column indices array
  * @param[in] csr_data : CSR data array

From 65cfd0401e08da7c29479881ca35467f08eb462e Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 1 Feb 2022 10:37:03 -0500
Subject: [PATCH 13/16] Fixing doc and style

---
 cpp/include/raft/sparse/convert/dense.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/raft/sparse/convert/dense.hpp b/cpp/include/raft/sparse/convert/dense.hpp
index e28cfb4d2c..2570d7ae65 100644
--- a/cpp/include/raft/sparse/convert/dense.hpp
+++ b/cpp/include/raft/sparse/convert/dense.hpp
@@ -32,7 +32,7 @@ namespace convert {
  * @param[in] handle : cusparse handle for conversion
  * @param[in] nrows : number of rows in CSR
  * @param[in] ncols : number of columns in CSR
- * @param[in] nnz : number of nonzeros in CSR 
+ * @param[in] nnz : number of nonzeros in CSR
  * @param[in] csr_indptr : CSR row index pointer array
  * @param[in] csr_indices : CSR column indices array
  * @param[in] csr_data : CSR data array

From 213b314b125efd76e8a059497e136011e00165d1 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 1 Feb 2022 11:15:43 -0500
Subject: [PATCH 14/16] Fixing bad return

---
 cpp/include/raft/sparse/detail/cusparse_wrappers.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
index febe1c0e78..8a1bf24dd2 100644
--- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -1756,11 +1756,10 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
 
+  return result;
 #else
   return cusparseScsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda);
 #endif
-
-  return result;
 }
 template <>
 inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
@@ -1803,12 +1802,12 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
 
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA));
   RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB));
+
+  return result;
 #else
 
   return cusparseDcsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda);
 #endif
-
-  return result;
 }
 
 /** @} */

From a0aa3f6ff0da80763476fa0a29b4e432c98b3e6d Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 1 Feb 2022 12:14:44 -0500
Subject: [PATCH 15/16] Moving cusparse wrappers to detail namespace

---
 .../raft/sparse/convert/detail/csr.cuh        |  9 +--
 .../raft/sparse/convert/detail/dense.cuh      | 48 +++++++--------
 .../raft/sparse/detail/cusparse_wrappers.h    |  2 +
 .../raft/sparse/linalg/detail/transpose.h     | 60 +++++++++----------
 cpp/include/raft/spectral/matrix_wrappers.hpp | 53 ++++++++--------
 5 files changed, 88 insertions(+), 84 deletions(-)

diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh
index b787a9d588..751335dfca 100644
--- a/cpp/include/raft/sparse/convert/detail/csr.cuh
+++ b/cpp/include/raft/sparse/convert/detail/csr.cuh
@@ -61,15 +61,16 @@ void coo_to_csr(const raft::handle_t& handle,
     cudaMemcpyAsync(dstRows.data(), srcRows, sizeof(int) * nnz, cudaMemcpyDeviceToDevice, stream));
   RAFT_CUDA_TRY(
     cudaMemcpyAsync(dstCols, srcCols, sizeof(int) * nnz, cudaMemcpyDeviceToDevice, stream));
-  auto buffSize = raft::sparse::cusparsecoosort_bufferSizeExt(
+  auto buffSize = raft::sparse::detail::cusparsecoosort_bufferSizeExt(
     cusparseHandle, m, m, nnz, srcRows, srcCols, stream);
   rmm::device_uvector<char> pBuffer(buffSize, stream);
   rmm::device_uvector<int> P(nnz, stream);
   RAFT_CUSPARSE_TRY(cusparseCreateIdentityPermutation(cusparseHandle, nnz, P.data()));
-  raft::sparse::cusparsecoosortByRow(
+  raft::sparse::detail::cusparsecoosortByRow(
     cusparseHandle, m, m, nnz, dstRows.data(), dstCols, P.data(), pBuffer.data(), stream);
-  raft::sparse::cusparsegthr(cusparseHandle, nnz, srcVals, dstVals, P.data(), stream);
-  raft::sparse::cusparsecoo2csr(cusparseHandle, dstRows.data(), nnz, m, dst_offsets, stream);
+  raft::sparse::detail::cusparsegthr(cusparseHandle, nnz, srcVals, dstVals, P.data(), stream);
+  raft::sparse::detail::cusparsecoo2csr(
+    cusparseHandle, dstRows.data(), nnz, m, dst_offsets, stream);
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
 }
 
diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh
index bc6828d471..b2756b81c9 100644
--- a/cpp/include/raft/sparse/convert/detail/dense.cuh
+++ b/cpp/include/raft/sparse/convert/detail/dense.cuh
@@ -100,33 +100,33 @@ void csr_to_dense(cusparseHandle_t handle,
     RAFT_CUSPARSE_TRY(cusparseSetMatType(out_mat, CUSPARSE_MATRIX_TYPE_GENERAL));
 
     size_t buffer_size;
-    RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense_buffersize(handle,
-                                                                 nrows,
-                                                                 ncols,
-                                                                 nnz,
-                                                                 out_mat,
-                                                                 csr_data,
-                                                                 csr_indptr,
-                                                                 csr_indices,
-                                                                 out,
-                                                                 lda,
-                                                                 &buffer_size,
-                                                                 stream));
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2dense_buffersize(handle,
+                                                                         nrows,
+                                                                         ncols,
+                                                                         nnz,
+                                                                         out_mat,
+                                                                         csr_data,
+                                                                         csr_indptr,
+                                                                         csr_indices,
+                                                                         out,
+                                                                         lda,
+                                                                         &buffer_size,
+                                                                         stream));
 
     rmm::device_uvector<char> buffer(buffer_size, stream);
 
-    RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense(handle,
-                                                      nrows,
-                                                      ncols,
-                                                      nnz,
-                                                      out_mat,
-                                                      csr_data,
-                                                      csr_indptr,
-                                                      csr_indices,
-                                                      out,
-                                                      lda,
-                                                      buffer.data(),
-                                                      stream));
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2dense(handle,
+                                                              nrows,
+                                                              ncols,
+                                                              nnz,
+                                                              out_mat,
+                                                              csr_data,
+                                                              csr_indptr,
+                                                              csr_indices,
+                                                              out,
+                                                              lda,
+                                                              buffer.data(),
+                                                              stream));
 
     RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyMatDescr(out_mat));
 
diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
index 8a1bf24dd2..aef3976294 100644
--- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -22,6 +22,7 @@
 
 namespace raft {
 namespace sparse {
+namespace detail {
 
 /**
  * @defgroup gthr cusparse gather methods
@@ -1812,5 +1813,6 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle,
 
 /** @} */
 
+}  // namespace detail
 }  // namespace sparse
 }  // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/sparse/linalg/detail/transpose.h b/cpp/include/raft/sparse/linalg/detail/transpose.h
index 699c67544e..398877eaab 100644
--- a/cpp/include/raft/sparse/linalg/detail/transpose.h
+++ b/cpp/include/raft/sparse/linalg/detail/transpose.h
@@ -70,39 +70,39 @@ void csr_transpose(cusparseHandle_t handle,
 {
   size_t convert_csc_workspace_size = 0;
 
-  RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2csc_bufferSize(handle,
-                                                             csr_nrows,
-                                                             csr_ncols,
-                                                             nnz,
-                                                             csr_data,
-                                                             csr_indptr,
-                                                             csr_indices,
-                                                             csc_data,
-                                                             csc_indptr,
-                                                             csc_indices,
-                                                             CUSPARSE_ACTION_NUMERIC,
-                                                             CUSPARSE_INDEX_BASE_ZERO,
-                                                             CUSPARSE_CSR2CSC_ALG1,
-                                                             &convert_csc_workspace_size,
-                                                             stream));
+  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2csc_bufferSize(handle,
+                                                                     csr_nrows,
+                                                                     csr_ncols,
+                                                                     nnz,
+                                                                     csr_data,
+                                                                     csr_indptr,
+                                                                     csr_indices,
+                                                                     csc_data,
+                                                                     csc_indptr,
+                                                                     csc_indices,
+                                                                     CUSPARSE_ACTION_NUMERIC,
+                                                                     CUSPARSE_INDEX_BASE_ZERO,
+                                                                     CUSPARSE_CSR2CSC_ALG1,
+                                                                     &convert_csc_workspace_size,
+                                                                     stream));
 
   rmm::device_uvector<char> convert_csc_workspace(convert_csc_workspace_size, stream);
 
-  RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2csc(handle,
-                                                  csr_nrows,
-                                                  csr_ncols,
-                                                  nnz,
-                                                  csr_data,
-                                                  csr_indptr,
-                                                  csr_indices,
-                                                  csc_data,
-                                                  csc_indptr,
-                                                  csc_indices,
-                                                  CUSPARSE_ACTION_NUMERIC,
-                                                  CUSPARSE_INDEX_BASE_ZERO,
-                                                  CUSPARSE_CSR2CSC_ALG1,
-                                                  convert_csc_workspace.data(),
-                                                  stream));
+  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2csc(handle,
+                                                          csr_nrows,
+                                                          csr_ncols,
+                                                          nnz,
+                                                          csr_data,
+                                                          csr_indptr,
+                                                          csr_indices,
+                                                          csc_data,
+                                                          csc_indptr,
+                                                          csc_indices,
+                                                          CUSPARSE_ACTION_NUMERIC,
+                                                          CUSPARSE_INDEX_BASE_ZERO,
+                                                          CUSPARSE_CSR2CSC_ALG1,
+                                                          convert_csc_workspace.data(),
+                                                          stream));
 }
 
 };  // end NAMESPACE detail
diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp
index bd1866a4f0..378cebcb4a 100644
--- a/cpp/include/raft/spectral/matrix_wrappers.hpp
+++ b/cpp/include/raft/spectral/matrix_wrappers.hpp
@@ -208,24 +208,24 @@ struct sparse_matrix_t {
     // void*; the casts should be harmless)
     //
     cusparseSpMatDescr_t matA;
-    RAFT_CUSPARSE_TRY(cusparsecreatecsr(&matA,
-                                        nrows_,
-                                        ncols_,
-                                        nnz_,
-                                        const_cast<index_type*>(row_offsets_),
-                                        const_cast<index_type*>(col_indices_),
-                                        const_cast<value_type*>(values_)));
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&matA,
+                                                              nrows_,
+                                                              ncols_,
+                                                              nnz_,
+                                                              const_cast<index_type*>(row_offsets_),
+                                                              const_cast<index_type*>(col_indices_),
+                                                              const_cast<value_type*>(values_)));
 
     cusparseDnVecDescr_t vecX;
-    RAFT_CUSPARSE_TRY(cusparsecreatednvec(&vecX, size_x, x));
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecX, size_x, x));
 
     cusparseDnVecDescr_t vecY;
-    RAFT_CUSPARSE_TRY(cusparsecreatednvec(&vecY, size_y, y));
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecY, size_y, y));
 
     // get (scratch) external device buffer size:
     //
     size_t bufferSize;
-    RAFT_CUSPARSE_TRY(cusparsespmv_buffersize(
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv_buffersize(
       cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, &bufferSize, stream));
 
     // allocate external buffer:
@@ -234,7 +234,7 @@ struct sparse_matrix_t {
 
     // finally perform SpMV:
     //
-    RAFT_CUSPARSE_TRY(cusparsespmv(
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(
       cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, external_buffer.raw(), stream));
 
     // free descriptors:
@@ -244,7 +244,8 @@ struct sparse_matrix_t {
     RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecX));
     RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matA));
 #else
-    RAFT_CUSPARSE_TRY(cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream));
+    RAFT_CUSPARSE_TRY(
+      raft::sparse::detail::cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream));
     cusparseMatDescr_t descr = 0;
     RAFT_CUSPARSE_TRY(cusparseCreateMatDescr(&descr));
     if (symmetric) {
@@ -253,20 +254,20 @@ struct sparse_matrix_t {
       RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL));
     }
     RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO));
-    RAFT_CUSPARSE_TRY(cusparsecsrmv(cusparse_h,
-                                    trans,
-                                    nrows_,
-                                    ncols_,
-                                    nnz_,
-                                    &alpha,
-                                    descr,
-                                    values_,
-                                    row_offsets_,
-                                    col_indices_,
-                                    x,
-                                    &beta,
-                                    y,
-                                    stream));
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsrmv(cusparse_h,
+                                                          trans,
+                                                          nrows_,
+                                                          ncols_,
+                                                          nnz_,
+                                                          &alpha,
+                                                          descr,
+                                                          values_,
+                                                          row_offsets_,
+                                                          col_indices_,
+                                                          x,
+                                                          &beta,
+                                                          y,
+                                                          stream));
     RAFT_CUSPARSE_TRY(cusparseDestroyMatDescr(descr));
 #endif
   }

From 16474fbaaa77cca7c8c35491433cfb8f5b25b046 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 8 Feb 2022 11:32:59 -0500
Subject: [PATCH 16/16] Fixing merge

---
 cpp/include/raft/handle.hpp                        |  8 +-------
 .../raft/sparse/distance/detail/bin_distance.cuh   |  3 +--
 .../raft/sparse/distance/detail/ip_distance.cuh    |  5 ++---
 .../raft/sparse/distance/detail/lp_distance.cuh    |  4 +---
 cpp/include/raft/sparse/distance/distance.hpp      | 14 +-------------
 cpp/include/raft/sparse/op/detail/slice.h          |  2 +-
 cpp/include/raft/spectral/matrix_wrappers.hpp      |  2 +-
 cpp/test/sparse/dist_coo_spmv.cu                   |  6 ++----
 cpp/test/sparse/distance.cu                        |  2 +-
 cpp/test/sparse/knn.cu                             |  2 --
 10 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/cpp/include/raft/handle.hpp b/cpp/include/raft/handle.hpp
index 1e866ea1d5..7d6a5bfafd 100644
--- a/cpp/include/raft/handle.hpp
+++ b/cpp/include/raft/handle.hpp
@@ -35,16 +35,10 @@
 #include "cudart_utils.h"
 
 #include <raft/comms/comms.hpp>
-<<<<<<< HEAD
-#include <raft/linalg/cublas_wrappers.h>
-#include <raft/linalg/cusolver_wrappers.h>
-#include <raft/sparse/detail/cusparse_macros.h>
-=======
 #include <raft/interruptible.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/linalg/detail/cusolver_wrappers.hpp>
-#include <raft/sparse/cusparse_wrappers.h>
->>>>>>> rapidsai/branch-22.04
+#include <raft/sparse/detail/cusparse_macros.h>
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh
index 1d302f2fc7..124fa2285d 100644
--- a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh
@@ -20,8 +20,7 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/linalg/distance_type.hpp>
-#include <raft/sparse/detail/cusparse_wrappers.h>
+#include <raft/distance/distance_type.hpp>
 #include <raft/sparse/detail/utils.h>
 #include <raft/sparse/distance/common.h>
 #include <raft/sparse/distance/detail/ip_distance.cuh>
diff --git a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh
index be5f2405c9..6e717e9920 100644
--- a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh
@@ -19,11 +19,10 @@
 #include <limits.h>
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/linalg/distance_type.hpp>
+#include <raft/distance/distance_type.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 
-#include <raft/sparse/convert/csr.hpp>
-#include <raft/sparse/convert/dense.hpp>
+#include <raft/sparse/convert/coo.hpp>
 #include <raft/sparse/detail/utils.h>
 #include <raft/sparse/distance/common.h>
 #include <raft/sparse/distance/detail/coo_spmv.cuh>
diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
index c63bcecee1..de9049ced7 100644
--- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
@@ -20,9 +20,7 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/linalg/distance_type.hpp>
-#include <raft/sparse/detail/cusparse_wrappers.h>
-
+#include <raft/distance/distance_type.hpp>
 #include <rmm/device_uvector.hpp>
 
 #include <raft/sparse/csr.hpp>
diff --git a/cpp/include/raft/sparse/distance/distance.hpp b/cpp/include/raft/sparse/distance/distance.hpp
index 028b8f8531..dc9837ab43 100644
--- a/cpp/include/raft/sparse/distance/distance.hpp
+++ b/cpp/include/raft/sparse/distance/distance.hpp
@@ -16,28 +16,16 @@
 
 #pragma once
 
-#include <raft/cudart_utils.h>
+#include <raft/sparse/distance/common.h>
 #include <unordered_set>
 
-#include <raft/cuda_utils.cuh>
 #include <raft/distance/distance_type.hpp>
-#include <raft/mr/device/buffer.hpp>
-#include <raft/sparse/detail/cusparse_wrappers.h>
-
-#include <raft/sparse/convert/coo.hpp>
-#include <raft/sparse/convert/csr.hpp>
-#include <raft/sparse/convert/dense.hpp>
-#include <raft/sparse/csr.hpp>
-#include <raft/sparse/detail/utils.h>
-#include <raft/sparse/linalg/transpose.hpp>
 
 #include <raft/sparse/distance/detail/bin_distance.cuh>
 #include <raft/sparse/distance/detail/ip_distance.cuh>
 #include <raft/sparse/distance/detail/l2_distance.cuh>
 #include <raft/sparse/distance/detail/lp_distance.cuh>
 
-#include <cusparse_v2.h>
-
 namespace raft {
 namespace sparse {
 namespace distance {
diff --git a/cpp/include/raft/sparse/op/detail/slice.h b/cpp/include/raft/sparse/op/detail/slice.h
index c1a2fb7121..e3c0f09e14 100644
--- a/cpp/include/raft/sparse/op/detail/slice.h
+++ b/cpp/include/raft/sparse/op/detail/slice.h
@@ -20,7 +20,7 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/linalg/unary_op.cuh>
+#include <raft/linalg/unary_op.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <thrust/device_ptr.h>
diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp
index cd33d9c28e..d86dc21135 100644
--- a/cpp/include/raft/spectral/matrix_wrappers.hpp
+++ b/cpp/include/raft/spectral/matrix_wrappers.hpp
@@ -17,8 +17,8 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/fill.h>
diff --git a/cpp/test/sparse/dist_coo_spmv.cu b/cpp/test/sparse/dist_coo_spmv.cu
index 778295bda6..e2288daed9 100644
--- a/cpp/test/sparse/dist_coo_spmv.cu
+++ b/cpp/test/sparse/dist_coo_spmv.cu
@@ -16,12 +16,10 @@
 
 #include <gtest/gtest.h>
 
-#include <cusparse_v2.h>
-
 #include <raft/cudart_utils.h>
-#include <raft/linalg/distance_type.hpp>
-#include <raft/sparse/detail/cusparse_wrappers.h>
+#include <raft/distance/distance_type.hpp>
 #include <raft/linalg/unary_op.hpp>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <rmm/device_uvector.hpp>
 
 #include <raft/sparse/convert/coo.hpp>
diff --git a/cpp/test/sparse/distance.cu b/cpp/test/sparse/distance.cu
index 4a3c4133f9..7c61f2ed1c 100644
--- a/cpp/test/sparse/distance.cu
+++ b/cpp/test/sparse/distance.cu
@@ -19,8 +19,8 @@
 #include <cusparse_v2.h>
 
 #include <raft/cudart_utils.h>
-#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/distance/distance_type.hpp>
+#include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <raft/sparse/distance/distance.hpp>
 
diff --git a/cpp/test/sparse/knn.cu b/cpp/test/sparse/knn.cu
index a480d1be6a..5a066c2c28 100644
--- a/cpp/test/sparse/knn.cu
+++ b/cpp/test/sparse/knn.cu
@@ -18,12 +18,10 @@
 #include <gtest/gtest.h>
 
 #include "../test_utils.h"
-#include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/distance/distance_type.hpp>
 #include <raft/sparse/selection/knn.hpp>
 
 #include <raft/cudart_utils.h>
-#include <raft/sparse/detail/cusparse_wrappers.h>
 
 namespace raft {
 namespace sparse {