diff --git a/features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml b/features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml
new file mode 100644
index 000000000..b9cc01791
--- /dev/null
+++ b/features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml
@@ -0,0 +1,14 @@
+
+
+
+ test
+
+
+
+
+
+
+
+
+
+
diff --git a/features/feature_case/cusparse/cusparse_4.cu b/features/feature_case/cusparse/cusparse_4.cu
new file mode 100644
index 000000000..bc239b9bd
--- /dev/null
+++ b/features/feature_case/cusparse/cusparse_4.cu
@@ -0,0 +1,375 @@
+// ===------- cusparse_4.cu -------------------------------- *- CUDA -* ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// ===----------------------------------------------------------------------===//
+
+#include "cusparse.h"
+
+#include
+#include
+#include
+#include
+
+template
+struct Data {
+ float *h_data;
+ d_data_t *d_data;
+ int element_num;
+ Data(int element_num) : element_num(element_num) {
+ h_data = (float *)malloc(sizeof(float) * element_num);
+ memset(h_data, 0, sizeof(float) * element_num);
+ cudaMalloc(&d_data, sizeof(d_data_t) * element_num);
+ cudaMemset(d_data, 0, sizeof(d_data_t) * element_num);
+ }
+ Data(float *input_data, int element_num) : element_num(element_num) {
+ h_data = (float *)malloc(sizeof(float) * element_num);
+ cudaMalloc(&d_data, sizeof(d_data_t) * element_num);
+ cudaMemset(d_data, 0, sizeof(d_data_t) * element_num);
+ memcpy(h_data, input_data, sizeof(float) * element_num);
+ }
+ ~Data() {
+ free(h_data);
+ cudaFree(d_data);
+ }
+ void H2D() {
+ d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num);
+ memset(h_temp, 0, sizeof(d_data_t) * element_num);
+ from_float_convert(h_data, h_temp);
+ cudaMemcpy(d_data, h_temp, sizeof(d_data_t) * element_num,
+ cudaMemcpyHostToDevice);
+ free(h_temp);
+ }
+ void D2H() {
+ d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num);
+ memset(h_temp, 0, sizeof(d_data_t) * element_num);
+ cudaMemcpy(h_temp, d_data, sizeof(d_data_t) * element_num,
+ cudaMemcpyDeviceToHost);
+ to_float_convert(h_temp, h_data);
+ free(h_temp);
+ }
+
+private:
+ inline void from_float_convert(float *in, d_data_t *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i] = in[i];
+ }
+ inline void to_float_convert(d_data_t *in, float *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i] = in[i];
+ }
+};
+template <>
+inline void Data::from_float_convert(float *in, float2 *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i].x = in[i];
+}
+template <>
+inline void Data::from_float_convert(float *in, double2 *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i].x = in[i];
+}
+
+template <>
+inline void Data::to_float_convert(float2 *in, float *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i] = in[i].x;
+}
+template <>
+inline void Data::to_float_convert(double2 *in, float *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i] = in[i].x;
+}
+
+bool compare_result(float *expect, float *result, int element_num) {
+ for (int i = 0; i < element_num; i++) {
+ if (std::abs(result[i] - expect[i]) >= 0.05) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool compare_result(float *expect, float *result, std::vector indices) {
+ for (int i = 0; i < indices.size(); i++) {
+ if (std::abs(result[indices[i]] - expect[indices[i]]) >= 0.05) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool test_passed = true;
+
+const bool run_complex_datatype = true;
+
+// A * B = C
+//
+// | 0 1 2 | | 1 0 0 0 | | 2 3 10 12 |
+// | 0 0 3 | * | 2 3 0 0 | = | 0 0 15 18 |
+// | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 |
+void test_cusparseSpGEMM() {
+ std::vector a_val_vec = {1, 2, 3, 4};
+ Data a_s_val(a_val_vec.data(), 4);
+ Data a_d_val(a_val_vec.data(), 4);
+ Data a_c_val(a_val_vec.data(), 4);
+ Data a_z_val(a_val_vec.data(), 4);
+ std::vector a_row_ptr_vec = {0, 2, 3, 4};
+ Data a_row_ptr(a_row_ptr_vec.data(), 4);
+ std::vector a_col_ind_vec = {1, 2, 2, 0};
+ Data a_col_ind(a_col_ind_vec.data(), 4);
+
+ std::vector b_val_vec = {1, 2, 3, 5, 6};
+ Data b_s_val(b_val_vec.data(), 5);
+ Data b_d_val(b_val_vec.data(), 5);
+ Data b_c_val(b_val_vec.data(), 5);
+ Data b_z_val(b_val_vec.data(), 5);
+ std::vector b_row_ptr_vec = {0, 1, 3, 5};
+ Data b_row_ptr(b_row_ptr_vec.data(), 4);
+ std::vector b_col_ind_vec = {0, 0, 1, 2, 3};
+ Data b_col_ind(b_col_ind_vec.data(), 5);
+
+ float alpha = 1;
+ Data alpha_s(&alpha, 1);
+ Data alpha_d(&alpha, 1);
+ Data alpha_c(&alpha, 1);
+ Data alpha_z(&alpha, 1);
+
+ float beta = 0;
+ Data beta_s(&beta, 1);
+ Data beta_d(&beta, 1);
+ Data beta_c(&beta, 1);
+ Data beta_z(&beta, 1);
+
+ cusparseHandle_t handle;
+ cusparseCreate(&handle);
+
+ cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_DEVICE);
+
+ a_s_val.H2D();
+ a_d_val.H2D();
+ a_c_val.H2D();
+ a_z_val.H2D();
+ a_row_ptr.H2D();
+ a_col_ind.H2D();
+ b_s_val.H2D();
+ b_d_val.H2D();
+ b_c_val.H2D();
+ b_z_val.H2D();
+ b_row_ptr.H2D();
+ b_col_ind.H2D();
+ alpha_s.H2D();
+ alpha_d.H2D();
+ alpha_c.H2D();
+ alpha_z.H2D();
+ beta_s.H2D();
+ beta_d.H2D();
+ beta_c.H2D();
+ beta_z.H2D();
+
+ cusparseSpMatDescr_t a_descr_s;
+ cusparseSpMatDescr_t a_descr_d;
+ cusparseSpMatDescr_t a_descr_c;
+ cusparseSpMatDescr_t a_descr_z;
+ cusparseCreateCsr(&a_descr_s, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
+ cusparseCreateCsr(&a_descr_d, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
+ cusparseCreateCsr(&a_descr_c, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
+ cusparseCreateCsr(&a_descr_z, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
+
+ cusparseSpMatDescr_t b_descr_s;
+ cusparseSpMatDescr_t b_descr_d;
+ cusparseSpMatDescr_t b_descr_c;
+ cusparseSpMatDescr_t b_descr_z;
+ cusparseCreateCsr(&b_descr_s, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
+ cusparseCreateCsr(&b_descr_d, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
+ cusparseCreateCsr(&b_descr_c, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
+ cusparseCreateCsr(&b_descr_z, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
+
+ Data c_s_row_ptr(4);
+ Data c_d_row_ptr(4);
+ Data c_c_row_ptr(4);
+ Data c_z_row_ptr(4);
+
+ cusparseSpMatDescr_t c_descr_s;
+ cusparseSpMatDescr_t c_descr_d;
+ cusparseSpMatDescr_t c_descr_c;
+ cusparseSpMatDescr_t c_descr_z;
+ cusparseCreateCsr(&c_descr_s, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
+ cusparseCreateCsr(&c_descr_d, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
+ cusparseCreateCsr(&c_descr_c, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
+ cusparseCreateCsr(&c_descr_z, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
+
+ cusparseSpGEMMDescr_t SpGEMMDescr_s;
+ cusparseSpGEMMDescr_t SpGEMMDescr_d;
+ cusparseSpGEMMDescr_t SpGEMMDescr_c;
+ cusparseSpGEMMDescr_t SpGEMMDescr_z;
+ cusparseSpGEMM_createDescr(&SpGEMMDescr_s);
+ cusparseSpGEMM_createDescr(&SpGEMMDescr_d);
+ cusparseSpGEMM_createDescr(&SpGEMMDescr_c);
+ cusparseSpGEMM_createDescr(&SpGEMMDescr_z);
+
+ size_t ws_1_size_s = 0;
+ size_t ws_1_size_d = 0;
+ size_t ws_1_size_c = 0;
+ size_t ws_1_size_z = 0;
+ cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_1_size_s, NULL);
+ cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_1_size_d, NULL);
+ if (run_complex_datatype) {
+ cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_1_size_c, NULL);
+ cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_1_size_z, NULL);
+ }
+
+ void *ws_1_s;
+ void *ws_1_d;
+ void *ws_1_c;
+ void *ws_1_z;
+ cudaMalloc(&ws_1_s, ws_1_size_s);
+ cudaMalloc(&ws_1_d, ws_1_size_d);
+ cudaMalloc(&ws_1_c, ws_1_size_c);
+ cudaMalloc(&ws_1_z, ws_1_size_z);
+
+ cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_1_size_s, ws_1_s);
+ cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_1_size_d, ws_1_d);
+ if (run_complex_datatype) {
+ cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_1_size_c, ws_1_c);
+ cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_1_size_z, ws_1_z);
+ }
+
+ size_t ws_2_size_s = 0;
+ size_t ws_2_size_d = 0;
+ size_t ws_2_size_c = 0;
+ size_t ws_2_size_z = 0;
+ cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_2_size_s, NULL);
+ cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_2_size_d, NULL);
+ if (run_complex_datatype) {
+ cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_2_size_c, NULL);
+ cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_2_size_z, NULL);
+ }
+
+ void *ws_2_s;
+ void *ws_2_d;
+ void *ws_2_c;
+ void *ws_2_z;
+ cudaMalloc(&ws_2_s, ws_2_size_s);
+ cudaMalloc(&ws_2_d, ws_2_size_d);
+ cudaMalloc(&ws_2_c, ws_2_size_c);
+ cudaMalloc(&ws_2_z, ws_2_size_z);
+
+ cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_2_size_s, ws_2_s);
+ cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_2_size_d, ws_2_d);
+ if (run_complex_datatype) {
+ cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_2_size_c, ws_2_c);
+ cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_2_size_z, ws_2_z);
+ }
+
+ int64_t c_row_s;
+ int64_t c_row_d;
+ int64_t c_row_c;
+ int64_t c_row_z;
+ int64_t c_col_s;
+ int64_t c_col_d;
+ int64_t c_col_c;
+ int64_t c_col_z;
+ int64_t c_nnz_s;
+ int64_t c_nnz_d;
+ int64_t c_nnz_c;
+ int64_t c_nnz_z;
+ cusparseSpMatGetSize(c_descr_s, &c_row_s, &c_col_s, &c_nnz_s);
+ cusparseSpMatGetSize(c_descr_d, &c_row_d, &c_col_d, &c_nnz_d);
+ cusparseSpMatGetSize(c_descr_c, &c_row_c, &c_col_c, &c_nnz_c);
+ cusparseSpMatGetSize(c_descr_z, &c_row_z, &c_col_z, &c_nnz_z);
+
+ Data c_s_val(c_nnz_s);
+ Data c_d_val(c_nnz_d);
+ Data c_c_val(c_nnz_c);
+ Data c_z_val(c_nnz_z);
+ Data c_s_col_ind(c_nnz_s);
+ Data c_d_col_ind(c_nnz_d);
+ Data c_c_col_ind(c_nnz_c);
+ Data c_z_col_ind(c_nnz_z);
+
+ cusparseCsrSetPointers(c_descr_s, c_s_row_ptr.d_data, c_s_col_ind.d_data, c_s_val.d_data);
+ cusparseCsrSetPointers(c_descr_d, c_d_row_ptr.d_data, c_d_col_ind.d_data, c_d_val.d_data);
+ cusparseCsrSetPointers(c_descr_c, c_c_row_ptr.d_data, c_c_col_ind.d_data, c_c_val.d_data);
+ cusparseCsrSetPointers(c_descr_z, c_z_row_ptr.d_data, c_z_col_ind.d_data, c_z_val.d_data);
+
+ cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s);
+ cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d);
+ if (run_complex_datatype) {
+ cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c);
+ cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z);
+ }
+
+ cudaStreamSynchronize(0);
+
+ cudaFree(ws_1_s);
+ cudaFree(ws_1_d);
+ cudaFree(ws_1_c);
+ cudaFree(ws_1_z);
+ cudaFree(ws_2_s);
+ cudaFree(ws_2_d);
+ cudaFree(ws_2_c);
+ cudaFree(ws_2_z);
+ cusparseDestroySpMat(a_descr_s);
+ cusparseDestroySpMat(a_descr_d);
+ cusparseDestroySpMat(a_descr_c);
+ cusparseDestroySpMat(a_descr_z);
+ cusparseDestroySpMat(b_descr_s);
+ cusparseDestroySpMat(b_descr_d);
+ cusparseDestroySpMat(b_descr_c);
+ cusparseDestroySpMat(b_descr_z);
+ cusparseDestroySpMat(c_descr_s);
+ cusparseDestroySpMat(c_descr_d);
+ cusparseDestroySpMat(c_descr_c);
+ cusparseDestroySpMat(c_descr_z);
+ cusparseSpGEMM_destroyDescr(SpGEMMDescr_s);
+ cusparseSpGEMM_destroyDescr(SpGEMMDescr_d);
+ cusparseSpGEMM_destroyDescr(SpGEMMDescr_c);
+ cusparseSpGEMM_destroyDescr(SpGEMMDescr_z);
+ cusparseDestroy(handle);
+
+ c_s_val.D2H();
+ c_d_val.D2H();
+ c_c_val.D2H();
+ c_z_val.D2H();
+ c_s_row_ptr.D2H();
+ c_d_row_ptr.D2H();
+ c_c_row_ptr.D2H();
+ c_z_row_ptr.D2H();
+ c_s_col_ind.D2H();
+ c_d_col_ind.D2H();
+ c_c_col_ind.D2H();
+ c_z_col_ind.D2H();
+
+ float expect_c_val[7] = {2.000000, 3.000000, 10.000000, 12.000000, 15.000000, 18.000000, 4.000000};
+ float expect_c_row_ptr[4] = {0.000000, 4.000000, 6.000000, 7.000000};
+ float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000};
+ if (compare_result(expect_c_val, c_s_val.h_data, 7) &&
+ compare_result(expect_c_val, c_d_val.h_data, 7) &&
+ compare_result(expect_c_val, c_c_val.h_data, 7) &&
+ compare_result(expect_c_val, c_z_val.h_data, 7) &&
+ compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) &&
+ compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) &&
+ compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) &&
+ compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) &&
+ compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) &&
+ compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) &&
+ compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) &&
+ compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7)
+ )
+ printf("SpGEMM pass\n");
+ else {
+ printf("SpGEMM fail\n");
+ test_passed = false;
+ }
+}
+
+int main() {
+ test_cusparseSpGEMM();
+
+ if (test_passed)
+ return 0;
+ return -1;
+}
diff --git a/features/feature_case/cusparse/cusparse_5.cu b/features/feature_case/cusparse/cusparse_5.cu
new file mode 100644
index 000000000..bb47948ec
--- /dev/null
+++ b/features/feature_case/cusparse/cusparse_5.cu
@@ -0,0 +1,289 @@
+// ===------- cusparse_3.cu -------------------------------- *- CUDA -* ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// ===----------------------------------------------------------------------===//
+
+#include "cusparse.h"
+
+#include
+#include
+#include
+#include
+
+template
+struct Data {
+ float *h_data;
+ d_data_t *d_data;
+ int element_num;
+ Data(int element_num) : element_num(element_num) {
+ h_data = (float *)malloc(sizeof(float) * element_num);
+ memset(h_data, 0, sizeof(float) * element_num);
+ cudaMalloc(&d_data, sizeof(d_data_t) * element_num);
+ cudaMemset(d_data, 0, sizeof(d_data_t) * element_num);
+ }
+ Data(float *input_data, int element_num) : element_num(element_num) {
+ h_data = (float *)malloc(sizeof(float) * element_num);
+ cudaMalloc(&d_data, sizeof(d_data_t) * element_num);
+ cudaMemset(d_data, 0, sizeof(d_data_t) * element_num);
+ memcpy(h_data, input_data, sizeof(float) * element_num);
+ }
+ ~Data() {
+ free(h_data);
+ cudaFree(d_data);
+ }
+ void H2D() {
+ d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num);
+ memset(h_temp, 0, sizeof(d_data_t) * element_num);
+ from_float_convert(h_data, h_temp);
+ cudaMemcpy(d_data, h_temp, sizeof(d_data_t) * element_num,
+ cudaMemcpyHostToDevice);
+ free(h_temp);
+ }
+ void D2H() {
+ d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num);
+ memset(h_temp, 0, sizeof(d_data_t) * element_num);
+ cudaMemcpy(h_temp, d_data, sizeof(d_data_t) * element_num,
+ cudaMemcpyDeviceToHost);
+ to_float_convert(h_temp, h_data);
+ free(h_temp);
+ }
+
+private:
+ inline void from_float_convert(float *in, d_data_t *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i] = in[i];
+ }
+ inline void to_float_convert(d_data_t *in, float *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i] = in[i];
+ }
+};
+template <>
+inline void Data::from_float_convert(float *in, float2 *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i].x = in[i];
+}
+template <>
+inline void Data::from_float_convert(float *in, double2 *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i].x = in[i];
+}
+
+template <>
+inline void Data::to_float_convert(float2 *in, float *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i] = in[i].x;
+}
+template <>
+inline void Data::to_float_convert(double2 *in, float *out) {
+ for (int i = 0; i < element_num; i++)
+ out[i] = in[i].x;
+}
+
+bool compare_result(float *expect, float *result, int element_num) {
+ for (int i = 0; i < element_num; i++) {
+ if (std::abs(result[i] - expect[i]) >= 0.05) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool compare_result(float *expect, float *result, std::vector indices) {
+ for (int i = 0; i < indices.size(); i++) {
+ if (std::abs(result[indices[i]] - expect[indices[i]]) >= 0.05) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool test_passed = true;
+
+const bool run_complex_datatype = true;
+
+// A * C = B
+//
+// | 1 1 2 | | 1 | | 9 |
+// | 0 1 3 | * | 2 | = | 11 |
+// | 0 0 1 | | 3 | | 3 |
+void test_cusparseSpSV() {
+ std::vector a_val_vec = {1, 1, 2, 1, 3, 1};
+ Data a_s_val(a_val_vec.data(), 6);
+ Data a_d_val(a_val_vec.data(), 6);
+ Data a_c_val(a_val_vec.data(), 6);
+ Data a_z_val(a_val_vec.data(), 6);
+ std::vector a_row_ptr_vec = {0, 3, 5, 6};
+ Data a_row_ptr(a_row_ptr_vec.data(), 4);
+ std::vector a_col_ind_vec = {0, 1, 2, 1, 2, 3};
+ Data a_col_ind(a_col_ind_vec.data(), 6);
+
+ std::vector b_vec = {9, 11, 3};
+ Data b_s(b_vec.data(), 3);
+ Data b_d(b_vec.data(), 3);
+ Data b_c(b_vec.data(), 3);
+ Data b_z(b_vec.data(), 3);
+
+ Data c_s(3);
+ Data c_d(3);
+ Data c_c(3);
+ Data c_z(3);
+
+ float alpha = 1;
+ Data alpha_s(&alpha, 1);
+ Data alpha_d(&alpha, 1);
+ Data alpha_c(&alpha, 1);
+ Data alpha_z(&alpha, 1);
+
+ cusparseHandle_t handle;
+ cusparseCreate(&handle);
+
+ cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_DEVICE);
+
+ a_s_val.H2D();
+ a_d_val.H2D();
+ a_c_val.H2D();
+ a_z_val.H2D();
+ a_row_ptr.H2D();
+ a_col_ind.H2D();
+ b_s.H2D();
+ b_d.H2D();
+ b_c.H2D();
+ b_z.H2D();
+ alpha_s.H2D();
+ alpha_d.H2D();
+ alpha_c.H2D();
+ alpha_z.H2D();
+
+ cusparseSpMatDescr_t a_descr_s;
+ cusparseSpMatDescr_t a_descr_d;
+ cusparseSpMatDescr_t a_descr_c;
+ cusparseSpMatDescr_t a_descr_z;
+ cusparseCreateCsr(&a_descr_s, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
+ cusparseCreateCsr(&a_descr_d, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
+ cusparseCreateCsr(&a_descr_c, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
+ cusparseCreateCsr(&a_descr_z, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
+
+ cusparseDnVecDescr_t b_descr_s;
+ cusparseDnVecDescr_t b_descr_d;
+ cusparseDnVecDescr_t b_descr_c;
+ cusparseDnVecDescr_t b_descr_z;
+ cusparseCreateDnVec(&b_descr_s, 3, b_s.d_data, CUDA_R_32F);
+ cusparseCreateDnVec(&b_descr_d, 3, b_d.d_data, CUDA_R_64F);
+ cusparseCreateDnVec(&b_descr_c, 3, b_c.d_data, CUDA_C_32F);
+ cusparseCreateDnVec(&b_descr_z, 3, b_z.d_data, CUDA_C_64F);
+
+ cusparseDnVecDescr_t c_descr_s;
+ cusparseDnVecDescr_t c_descr_d;
+ cusparseDnVecDescr_t c_descr_c;
+ cusparseDnVecDescr_t c_descr_z;
+ cusparseCreateDnVec(&c_descr_s, 3, c_s.d_data, CUDA_R_32F);
+ cusparseCreateDnVec(&c_descr_d, 3, c_d.d_data, CUDA_R_64F);
+ cusparseCreateDnVec(&c_descr_c, 3, c_c.d_data, CUDA_C_32F);
+ cusparseCreateDnVec(&c_descr_z, 3, c_z.d_data, CUDA_C_64F);
+
+ cusparseFillMode_t uplo = CUSPARSE_FILL_MODE_UPPER;
+ cusparseSpMatSetAttribute(a_descr_s, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo));
+ cusparseSpMatSetAttribute(a_descr_d, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo));
+ cusparseSpMatSetAttribute(a_descr_c, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo));
+ cusparseSpMatSetAttribute(a_descr_z, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo));
+ cusparseDiagType_t diag = CUSPARSE_DIAG_TYPE_UNIT;
+ cusparseSpMatSetAttribute(a_descr_s, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag));
+ cusparseSpMatSetAttribute(a_descr_d, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag));
+ cusparseSpMatSetAttribute(a_descr_c, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag));
+ cusparseSpMatSetAttribute(a_descr_z, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag));
+
+ cusparseSpSVDescr_t SpSVDescr_s;
+ cusparseSpSVDescr_t SpSVDescr_d;
+ cusparseSpSVDescr_t SpSVDescr_c;
+ cusparseSpSVDescr_t SpSVDescr_z;
+ cusparseSpSV_createDescr(&SpSVDescr_s);
+ cusparseSpSV_createDescr(&SpSVDescr_d);
+ cusparseSpSV_createDescr(&SpSVDescr_c);
+ cusparseSpSV_createDescr(&SpSVDescr_z);
+
+ size_t ws_size_s = 0;
+ size_t ws_size_d = 0;
+ size_t ws_size_c = 0;
+ size_t ws_size_z = 0;
+ cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s, &ws_size_s);
+ cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d, &ws_size_d);
+ if (run_complex_datatype) {
+ cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c, &ws_size_c);
+ cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z, &ws_size_z);
+ }
+
+ void *ws_s;
+ void *ws_d;
+ void *ws_c;
+ void *ws_z;
+ cudaMalloc(&ws_s, ws_size_s);
+ cudaMalloc(&ws_d, ws_size_d);
+ cudaMalloc(&ws_c, ws_size_c);
+ cudaMalloc(&ws_z, ws_size_z);
+
+ cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s, &ws_size_s);
+ cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d, &ws_size_d);
+ if (run_complex_datatype) {
+ cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c, &ws_size_c);
+ cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z, &ws_size_z);
+ }
+
+ cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s);
+ cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d);
+ if (run_complex_datatype) {
+ cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c);
+ cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z);
+ }
+
+ c_s.D2H();
+ c_d.D2H();
+ c_c.D2H();
+ c_z.D2H();
+
+ cudaStreamSynchronize(0);
+
+ cudaFree(ws_s);
+ cudaFree(ws_d);
+ cudaFree(ws_c);
+ cudaFree(ws_z);
+ cusparseDestroySpMat(a_descr_s);
+ cusparseDestroySpMat(a_descr_d);
+ cusparseDestroySpMat(a_descr_c);
+ cusparseDestroySpMat(a_descr_z);
+ cusparseDestroyDnVec(b_descr_s);
+ cusparseDestroyDnVec(b_descr_d);
+ cusparseDestroyDnVec(b_descr_c);
+ cusparseDestroyDnVec(b_descr_z);
+ cusparseDestroyDnVec(c_descr_s);
+ cusparseDestroyDnVec(c_descr_d);
+ cusparseDestroyDnVec(c_descr_c);
+ cusparseDestroyDnVec(c_descr_z);
+ cusparseSpSV_destroyDescr(SpSVDescr_s);
+ cusparseSpSV_destroyDescr(SpSVDescr_d);
+ cusparseSpSV_destroyDescr(SpSVDescr_c);
+ cusparseSpSV_destroyDescr(SpSVDescr_z);
+ cusparseDestroy(handle);
+
+ float expect_c[4] = {1, 2, 3};
+ if (compare_result(expect_c, c_s.h_data, 3) &&
+ compare_result(expect_c, c_d.h_data, 3) &&
+ compare_result(expect_c, c_c.h_data, 3) &&
+ compare_result(expect_c, c_z.h_data, 3))
+ printf("SpSV pass\n");
+ else {
+ printf("SpSV fail\n");
+ test_passed = false;
+ }
+}
+
+int main() {
+ test_cusparseSpSV();
+
+ if (test_passed)
+ return 0;
+ return -1;
+}
diff --git a/features/features.xml b/features/features.xml
index 34fb3b016..ec0c8673e 100644
--- a/features/features.xml
+++ b/features/features.xml
@@ -254,6 +254,8 @@
+
+
diff --git a/features/test_feature.py b/features/test_feature.py
index cefff8909..5bc05aa2e 100644
--- a/features/test_feature.py
+++ b/features/test_feature.py
@@ -38,7 +38,7 @@
'math-bf16-conv', 'math-half-conv',
'math-bfloat16', 'libcu_atomic', 'test_shared_memory', 'cudnn-reduction', 'cudnn-binary', 'cudnn-bnp1', 'cudnn-bnp2', 'cudnn-bnp3',
'cudnn-normp1', 'cudnn-normp2', 'cudnn-normp3', 'cudnn-convp1', 'cudnn-convp2', 'cudnn-convp3', 'cudnn-convp4', 'cudnn-convp5', 'cudnn-convp6',
- 'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3",
+ 'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3", "cusparse_4", "cusparse_5",
'cudnn-GetErrorString', 'cub_device_histgram',
'cudnn-types', 'cudnn-version', 'cudnn-dropout',
'constant_attr', 'sync_warp_p2', 'occupancy_calculation',
diff --git a/help_function/config/TEMPLATE_help_function_before_11_skip_double.xml b/help_function/config/TEMPLATE_help_function_before_11_skip_double.xml
deleted file mode 100644
index 6da1f7005..000000000
--- a/help_function/config/TEMPLATE_help_function_before_11_skip_double.xml
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
- WARNING: DON'T UPDATE THIS FILE MANUALLY!!!
- This is auto-generated accessors configuration file which affects all tests in ported gcc suites
- If you have any issue with this file please contact Compiler QA team
-
-
-
-
-
-
-
-
-
-
diff --git a/help_function/config/TEMPLATE_help_function_sparse_buffer.xml b/help_function/config/TEMPLATE_help_function_sparse_buffer.xml
new file mode 100644
index 000000000..9e20a921a
--- /dev/null
+++ b/help_function/config/TEMPLATE_help_function_sparse_buffer.xml
@@ -0,0 +1,14 @@
+
+
+
+ test help function
+
+
+
+
+
+
+
+
+
+
diff --git a/help_function/config/TEMPLATE_help_function_sparse_usm.xml b/help_function/config/TEMPLATE_help_function_sparse_usm.xml
new file mode 100644
index 000000000..7dbc611de
--- /dev/null
+++ b/help_function/config/TEMPLATE_help_function_sparse_usm.xml
@@ -0,0 +1,14 @@
+
+
+
+ test help function
+
+
+
+
+
+
+
+
+
+
diff --git a/help_function/help_function.xml b/help_function/help_function.xml
index 38cd52ecc..5eca8e704 100644
--- a/help_function/help_function.xml
+++ b/help_function/help_function.xml
@@ -185,8 +185,8 @@
-
-
+
+
diff --git a/help_function/src/sparse_utils_2_buffer.cpp b/help_function/src/sparse_utils_2_buffer.cpp
index 9748c5d00..1cdc6f3c8 100644
--- a/help_function/src/sparse_utils_2_buffer.cpp
+++ b/help_function/src/sparse_utils_2_buffer.cpp
@@ -1438,6 +1438,644 @@ void test_cusparseCsrmvEx() {
}
}
+// A * B = C
+//
+// | 0 1 2 | | 1 0 0 0 | | 2 3 10 12 |
+// | 0 0 3 | * | 2 3 0 0 | = | 0 0 15 18 |
+// | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 |
+void test_cusparseSpGEMM() {
+ dpct::device_ext &dev_ct1 = dpct::get_current_device();
+ sycl::queue &q_ct1 = dev_ct1.out_of_order_queue();
+ std::vector a_val_vec = {1, 2, 3, 4};
+ Data a_s_val(a_val_vec.data(), 4);
+ Data a_d_val(a_val_vec.data(), 4);
+ Data a_c_val(a_val_vec.data(), 4);
+ Data a_z_val(a_val_vec.data(), 4);
+ std::vector a_row_ptr_vec = {0, 2, 3, 4};
+ Data a_row_ptr(a_row_ptr_vec.data(), 4);
+ std::vector a_col_ind_vec = {1, 2, 2, 0};
+ Data a_col_ind(a_col_ind_vec.data(), 4);
+
+ std::vector b_val_vec = {1, 2, 3, 5, 6};
+ Data b_s_val(b_val_vec.data(), 5);
+ Data b_d_val(b_val_vec.data(), 5);
+ Data b_c_val(b_val_vec.data(), 5);
+ Data b_z_val(b_val_vec.data(), 5);
+ std::vector b_row_ptr_vec = {0, 1, 3, 5};
+ Data b_row_ptr(b_row_ptr_vec.data(), 4);
+ std::vector b_col_ind_vec = {0, 0, 1, 2, 3};
+ Data b_col_ind(b_col_ind_vec.data(), 5);
+
+ float alpha = 1;
+ Data alpha_s(&alpha, 1);
+ Data alpha_d(&alpha, 1);
+ Data alpha_c(&alpha, 1);
+ Data alpha_z(&alpha, 1);
+
+ float beta = 0;
+ Data beta_s(&beta, 1);
+ Data beta_d(&beta, 1);
+ Data beta_c(&beta, 1);
+ Data beta_z(&beta, 1);
+
+ sycl::queue *handle;
+ handle = &q_ct1;
+
+ /*
+ DPCT1026:0: The call to cusparseSetPointerMode was removed because this call
+ is redundant in SYCL.
+ */
+
+ a_s_val.H2D();
+ a_d_val.H2D();
+ a_c_val.H2D();
+ a_z_val.H2D();
+ a_row_ptr.H2D();
+ a_col_ind.H2D();
+ b_s_val.H2D();
+ b_d_val.H2D();
+ b_c_val.H2D();
+ b_z_val.H2D();
+ b_row_ptr.H2D();
+ b_col_ind.H2D();
+ alpha_s.H2D();
+ alpha_d.H2D();
+ alpha_c.H2D();
+ alpha_z.H2D();
+ beta_s.H2D();
+ beta_d.H2D();
+ beta_c.H2D();
+ beta_z.H2D();
+
+ dpct::sparse::sparse_matrix_desc_t a_descr_s;
+ dpct::sparse::sparse_matrix_desc_t a_descr_d;
+ dpct::sparse::sparse_matrix_desc_t a_descr_c;
+ dpct::sparse::sparse_matrix_desc_t a_descr_z;
+ a_descr_s = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_float,
+ dpct::sparse::matrix_format::csr);
+ a_descr_d = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_double,
+ dpct::sparse::matrix_format::csr);
+ a_descr_c = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float,
+ dpct::sparse::matrix_format::csr);
+ a_descr_z = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double,
+ dpct::sparse::matrix_format::csr);
+
+ dpct::sparse::sparse_matrix_desc_t b_descr_s;
+ dpct::sparse::sparse_matrix_desc_t b_descr_d;
+ dpct::sparse::sparse_matrix_desc_t b_descr_c;
+ dpct::sparse::sparse_matrix_desc_t b_descr_z;
+ b_descr_s = std::make_shared(
+ 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_s_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_float,
+ dpct::sparse::matrix_format::csr);
+ b_descr_d = std::make_shared(
+ 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_d_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_double,
+ dpct::sparse::matrix_format::csr);
+ b_descr_c = std::make_shared(
+ 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float,
+ dpct::sparse::matrix_format::csr);
+ b_descr_z = std::make_shared(
+ 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double,
+ dpct::sparse::matrix_format::csr);
+
+ Data c_s_row_ptr(4);
+ Data c_d_row_ptr(4);
+ Data c_c_row_ptr(4);
+ Data c_z_row_ptr(4);
+
+ dpct::sparse::sparse_matrix_desc_t c_descr_s;
+ dpct::sparse::sparse_matrix_desc_t c_descr_d;
+ dpct::sparse::sparse_matrix_desc_t c_descr_c;
+ dpct::sparse::sparse_matrix_desc_t c_descr_z;
+ c_descr_s = std::make_shared(
+ 3, 4, 0, c_s_row_ptr.d_data, nullptr, nullptr,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_float,
+ dpct::sparse::matrix_format::csr);
+ c_descr_d = std::make_shared(
+ 3, 4, 0, c_d_row_ptr.d_data, nullptr, nullptr,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_double,
+ dpct::sparse::matrix_format::csr);
+ c_descr_c = std::make_shared(
+ 3, 4, 0, c_c_row_ptr.d_data, nullptr, nullptr,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float,
+ dpct::sparse::matrix_format::csr);
+ c_descr_z = std::make_shared(
+ 3, 4, 0, c_z_row_ptr.d_data, nullptr, nullptr,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double,
+ dpct::sparse::matrix_format::csr);
+
+ oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_s;
+ oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_d;
+ oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_c;
+ oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_z;
+ oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_s);
+ oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_d);
+ oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_c);
+ oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_z);
+
+ size_t ws_1_size_s = 0;
+ size_t ws_1_size_d = 0;
+ size_t ws_1_size_c = 0;
+ size_t ws_1_size_z = 0;
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s,
+ beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, NULL);
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d,
+ beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, NULL);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, NULL);
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, NULL);
+ }
+
+ void *ws_1_s;
+ void *ws_1_d;
+ void *ws_1_c;
+ void *ws_1_z;
+ ws_1_s = dpct::dpct_malloc(ws_1_size_s);
+ ws_1_d = dpct::dpct_malloc(ws_1_size_d);
+ ws_1_c = dpct::dpct_malloc(ws_1_size_c);
+ ws_1_z = dpct::dpct_malloc(ws_1_size_z);
+
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s,
+ beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, ws_1_s);
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d,
+ beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, ws_1_d);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, ws_1_c);
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, ws_1_z);
+ }
+
+ size_t ws_2_size_s = 0;
+ size_t ws_2_size_d = 0;
+ size_t ws_2_size_c = 0;
+ size_t ws_2_size_z = 0;
+ dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_s.d_data,
+ a_descr_s, b_descr_s, beta_s.d_data, c_descr_s,
+ SpGEMMDescr_s, &ws_2_size_s, NULL);
+ dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_d.d_data,
+ a_descr_d, b_descr_d, beta_d.d_data, c_descr_d,
+ SpGEMMDescr_d, &ws_2_size_d, NULL);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_compute(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, NULL);
+ dpct::sparse::spgemm_compute(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, NULL);
+ }
+
+ void *ws_2_s;
+ void *ws_2_d;
+ void *ws_2_c;
+ void *ws_2_z;
+ ws_2_s = dpct::dpct_malloc(ws_2_size_s);
+ ws_2_d = dpct::dpct_malloc(ws_2_size_d);
+ ws_2_c = dpct::dpct_malloc(ws_2_size_c);
+ ws_2_z = dpct::dpct_malloc(ws_2_size_z);
+
+ dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_s.d_data,
+ a_descr_s, b_descr_s, beta_s.d_data, c_descr_s,
+ SpGEMMDescr_s, &ws_2_size_s, ws_2_s);
+ dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_d.d_data,
+ a_descr_d, b_descr_d, beta_d.d_data, c_descr_d,
+ SpGEMMDescr_d, &ws_2_size_d, ws_2_d);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_compute(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, ws_2_c);
+ dpct::sparse::spgemm_compute(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, ws_2_z);
+ }
+
+ int64_t c_row_s;
+ int64_t c_row_d;
+ int64_t c_row_c;
+ int64_t c_row_z;
+ int64_t c_col_s;
+ int64_t c_col_d;
+ int64_t c_col_c;
+ int64_t c_col_z;
+ int64_t c_nnz_s;
+ int64_t c_nnz_d;
+ int64_t c_nnz_c;
+ int64_t c_nnz_z;
+ c_descr_s->get_size(&c_row_s, &c_col_s, &c_nnz_s);
+ c_descr_d->get_size(&c_row_d, &c_col_d, &c_nnz_d);
+ c_descr_c->get_size(&c_row_c, &c_col_c, &c_nnz_c);
+ c_descr_z->get_size(&c_row_z, &c_col_z, &c_nnz_z);
+
+ Data c_s_val(c_nnz_s);
+ Data c_d_val(c_nnz_d);
+ Data c_c_val(c_nnz_c);
+ Data c_z_val(c_nnz_z);
+ Data c_s_col_ind(c_nnz_s);
+ Data c_d_col_ind(c_nnz_d);
+ Data c_c_col_ind(c_nnz_c);
+ Data c_z_col_ind(c_nnz_z);
+
+ c_descr_s->set_pointers(c_s_row_ptr.d_data, c_s_col_ind.d_data,
+ c_s_val.d_data);
+ c_descr_d->set_pointers(c_d_row_ptr.d_data, c_d_col_ind.d_data,
+ c_d_val.d_data);
+ c_descr_c->set_pointers(c_c_row_ptr.d_data, c_c_col_ind.d_data,
+ c_c_val.d_data);
+ c_descr_z->set_pointers(c_z_row_ptr.d_data, c_z_col_ind.d_data,
+ c_z_val.d_data);
+
+ dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans,
+ alpha_s.d_data, a_descr_s, b_descr_s,
+ beta_s.d_data, c_descr_s, SpGEMMDescr_s);
+ dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans,
+ alpha_d.d_data, a_descr_d, b_descr_d,
+ beta_d.d_data, c_descr_d, SpGEMMDescr_d);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans,
+ alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c);
+ dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans,
+ alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z);
+ }
+
+ q_ct1.wait();
+
+ dpct::dpct_free(ws_1_s);
+ dpct::dpct_free(ws_1_d);
+ dpct::dpct_free(ws_1_c);
+ dpct::dpct_free(ws_1_z);
+ dpct::dpct_free(ws_2_s);
+ dpct::dpct_free(ws_2_d);
+ dpct::dpct_free(ws_2_c);
+ dpct::dpct_free(ws_2_z);
+ a_descr_s.reset();
+ a_descr_d.reset();
+ a_descr_c.reset();
+ a_descr_z.reset();
+ b_descr_s.reset();
+ b_descr_d.reset();
+ b_descr_c.reset();
+ b_descr_z.reset();
+ c_descr_s.reset();
+ c_descr_d.reset();
+ c_descr_c.reset();
+ c_descr_z.reset();
+ oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_s);
+ oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_d);
+ oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_c);
+ oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_z);
+ handle = nullptr;
+
+ c_s_val.D2H();
+ c_d_val.D2H();
+ c_c_val.D2H();
+ c_z_val.D2H();
+ c_s_row_ptr.D2H();
+ c_d_row_ptr.D2H();
+ c_c_row_ptr.D2H();
+ c_z_row_ptr.D2H();
+ c_s_col_ind.D2H();
+ c_d_col_ind.D2H();
+ c_c_col_ind.D2H();
+ c_z_col_ind.D2H();
+
+ float expect_c_val[7] = {2.000000, 3.000000, 10.000000, 12.000000, 15.000000, 18.000000, 4.000000};
+ float expect_c_row_ptr[4] = {0.000000, 4.000000, 6.000000, 7.000000};
+ float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000};
+ if (compare_result(expect_c_val, c_s_val.h_data, 7) &&
+ compare_result(expect_c_val, c_d_val.h_data, 7) &&
+ compare_result(expect_c_val, c_c_val.h_data, 7) &&
+ compare_result(expect_c_val, c_z_val.h_data, 7) &&
+ compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) &&
+ compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) &&
+ compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) &&
+ compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) &&
+ compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) &&
+ compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) &&
+ compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) &&
+ compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7)
+ )
+ printf("SpGEMM pass\n");
+ else {
+ printf("SpGEMM fail\n");
+ test_passed = false;
+ }
+}
+
+// A * C = B
+//
+// | 1 1 2 | | 1 | | 9 |
+// | 0 1 3 | * | 2 | = | 11 |
+// | 0 0 1 | | 3 | | 3 |
+void test_cusparseSpSV() {
+ dpct::device_ext &dev_ct1 = dpct::get_current_device();
+ sycl::queue &q_ct1 = dev_ct1.default_queue();
+ std::vector a_val_vec = {1, 1, 2, 1, 3, 1};
+ Data a_s_val(a_val_vec.data(), 6);
+ Data a_d_val(a_val_vec.data(), 6);
+ Data a_c_val(a_val_vec.data(), 6);
+ Data a_z_val(a_val_vec.data(), 6);
+ std::vector a_row_ptr_vec = {0, 3, 5, 6};
+ Data a_row_ptr(a_row_ptr_vec.data(), 4);
+ std::vector a_col_ind_vec = {0, 1, 2, 1, 2, 3};
+ Data a_col_ind(a_col_ind_vec.data(), 6);
+
+ std::vector b_vec = {9, 11, 3};
+ Data b_s(b_vec.data(), 3);
+ Data b_d(b_vec.data(), 3);
+ Data b_c(b_vec.data(), 3);
+ Data b_z(b_vec.data(), 3);
+
+ Data c_s(3);
+ Data c_d(3);
+ Data c_c(3);
+ Data c_z(3);
+
+ float alpha = 1;
+ Data alpha_s(&alpha, 1);
+ Data alpha_d(&alpha, 1);
+ Data alpha_c(&alpha, 1);
+ Data alpha_z(&alpha, 1);
+
+ sycl::queue *handle;
+ handle = &q_ct1;
+
+ /*
+ DPCT1026:1: The call to cusparseSetPointerMode was removed because this call
+ is redundant in SYCL.
+ */
+
+ a_s_val.H2D();
+ a_d_val.H2D();
+ a_c_val.H2D();
+ a_z_val.H2D();
+ a_row_ptr.H2D();
+ a_col_ind.H2D();
+ b_s.H2D();
+ b_d.H2D();
+ b_c.H2D();
+ b_z.H2D();
+ alpha_s.H2D();
+ alpha_d.H2D();
+ alpha_c.H2D();
+ alpha_z.H2D();
+
+ dpct::sparse::sparse_matrix_desc_t a_descr_s;
+ dpct::sparse::sparse_matrix_desc_t a_descr_d;
+ dpct::sparse::sparse_matrix_desc_t a_descr_c;
+ dpct::sparse::sparse_matrix_desc_t a_descr_z;
+ a_descr_s = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_float,
+ dpct::sparse::matrix_format::csr);
+ a_descr_d = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_double,
+ dpct::sparse::matrix_format::csr);
+ a_descr_c = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float,
+ dpct::sparse::matrix_format::csr);
+ a_descr_z = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double,
+ dpct::sparse::matrix_format::csr);
+
+ std::shared_ptr b_descr_s;
+ std::shared_ptr b_descr_d;
+ std::shared_ptr b_descr_c;
+ std::shared_ptr b_descr_z;
+ b_descr_s = std::make_shared(
+ 3, b_s.d_data, dpct::library_data_t::real_float);
+ b_descr_d = std::make_shared(
+ 3, b_d.d_data, dpct::library_data_t::real_double);
+ b_descr_c = std::make_shared(
+ 3, b_c.d_data, dpct::library_data_t::complex_float);
+ b_descr_z = std::make_shared(
+ 3, b_z.d_data, dpct::library_data_t::complex_double);
+
+ std::shared_ptr c_descr_s;
+ std::shared_ptr c_descr_d;
+ std::shared_ptr c_descr_c;
+ std::shared_ptr c_descr_z;
+ c_descr_s = std::make_shared(
+ 3, c_s.d_data, dpct::library_data_t::real_float);
+ c_descr_d = std::make_shared(
+ 3, c_d.d_data, dpct::library_data_t::real_double);
+ c_descr_c = std::make_shared(
+ 3, c_c.d_data, dpct::library_data_t::complex_float);
+ c_descr_z = std::make_shared(
+ 3, c_z.d_data, dpct::library_data_t::complex_double);
+
+ oneapi::mkl::uplo uplo = oneapi::mkl::uplo::upper;
+ a_descr_s->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo,
+ sizeof(uplo));
+ a_descr_d->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo,
+ sizeof(uplo));
+ a_descr_c->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo,
+ sizeof(uplo));
+ a_descr_z->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo,
+ sizeof(uplo));
+ oneapi::mkl::diag diag = oneapi::mkl::diag::unit;
+ a_descr_s->set_attribute(dpct::sparse::matrix_attribute::diag, &diag,
+ sizeof(diag));
+ a_descr_d->set_attribute(dpct::sparse::matrix_attribute::diag, &diag,
+ sizeof(diag));
+ a_descr_c->set_attribute(dpct::sparse::matrix_attribute::diag, &diag,
+ sizeof(diag));
+ a_descr_z->set_attribute(dpct::sparse::matrix_attribute::diag, &diag,
+ sizeof(diag));
+
+ int SpSVDescr_s;
+ int SpSVDescr_d;
+ int SpSVDescr_c;
+ int SpSVDescr_z;
+ /*
+ DPCT1026:2: The call to cusparseSpSV_createDescr was removed because this call
+ is redundant in SYCL.
+ */
+ /*
+ DPCT1026:3: The call to cusparseSpSV_createDescr was removed because this call
+ is redundant in SYCL.
+ */
+ /*
+ DPCT1026:4: The call to cusparseSpSV_createDescr was removed because this call
+ is redundant in SYCL.
+ */
+ /*
+ DPCT1026:5: The call to cusparseSpSV_createDescr was removed because this call
+ is redundant in SYCL.
+ */
+
+ size_t ws_size_s = 0;
+ size_t ws_size_d = 0;
+ size_t ws_size_c = 0;
+ size_t ws_size_z = 0;
+ /*
+ DPCT1026:6: The call to cusparseSpSV_bufferSize was removed because this call
+ is redundant in SYCL.
+ */
+ /*
+ DPCT1026:7: The call to cusparseSpSV_bufferSize was removed because this call
+ is redundant in SYCL.
+ */
+ if (run_complex_datatype) {
+ /*
+ DPCT1026:12: The call to cusparseSpSV_bufferSize was removed because this
+ call is redundant in SYCL.
+ */
+ /*
+ DPCT1026:13: The call to cusparseSpSV_bufferSize was removed because this
+ call is redundant in SYCL.
+ */
+ }
+
+ void *ws_s;
+ void *ws_d;
+ void *ws_c;
+ void *ws_z;
+ ws_s = dpct::dpct_malloc(ws_size_s);
+ ws_d = dpct::dpct_malloc(ws_size_d);
+ ws_c = dpct::dpct_malloc(ws_size_c);
+ ws_z = dpct::dpct_malloc(ws_size_z);
+
+ dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans,
+ a_descr_s);
+ dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans,
+ a_descr_d);
+ if (run_complex_datatype) {
+ dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans,
+ a_descr_c);
+ dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans,
+ a_descr_z);
+ }
+
+ dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_s.d_data,
+ a_descr_s, b_descr_s, c_descr_s,
+ dpct::library_data_t::real_float);
+ dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_d.d_data,
+ a_descr_d, b_descr_d, c_descr_d,
+ dpct::library_data_t::real_double);
+ if (run_complex_datatype) {
+ dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans,
+ alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c,
+ dpct::library_data_t::complex_float);
+ dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans,
+ alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z,
+ dpct::library_data_t::complex_double);
+ }
+
+ c_s.D2H();
+ c_d.D2H();
+ c_c.D2H();
+ c_z.D2H();
+
+ q_ct1.wait();
+
+ dpct::dpct_free(ws_s);
+ dpct::dpct_free(ws_d);
+ dpct::dpct_free(ws_c);
+ dpct::dpct_free(ws_z);
+ a_descr_s.reset();
+ a_descr_d.reset();
+ a_descr_c.reset();
+ a_descr_z.reset();
+ b_descr_s.reset();
+ b_descr_d.reset();
+ b_descr_c.reset();
+ b_descr_z.reset();
+ c_descr_s.reset();
+ c_descr_d.reset();
+ c_descr_c.reset();
+ c_descr_z.reset();
+ /*
+ DPCT1026:8: The call to cusparseSpSV_destroyDescr was removed because this
+ call is redundant in SYCL.
+ */
+ /*
+ DPCT1026:9: The call to cusparseSpSV_destroyDescr was removed because this
+ call is redundant in SYCL.
+ */
+ /*
+ DPCT1026:10: The call to cusparseSpSV_destroyDescr was removed because this
+ call is redundant in SYCL.
+ */
+ /*
+ DPCT1026:11: The call to cusparseSpSV_destroyDescr was removed because this
+ call is redundant in SYCL.
+ */
+ handle = nullptr;
+
+ float expect_c[4] = {1, 2, 3};
+ if (compare_result(expect_c, c_s.h_data, 3) &&
+ compare_result(expect_c, c_d.h_data, 3) &&
+ compare_result(expect_c, c_c.h_data, 3) &&
+ compare_result(expect_c, c_z.h_data, 3))
+ printf("SpSV pass\n");
+ else {
+ printf("SpSV fail\n");
+ test_passed = false;
+ }
+}
+
int main() {
test_cusparseSetGetStream();
test_cusparseTcsrmv_ge();
@@ -1449,6 +2087,8 @@ int main() {
test_cusparseSpMM();
test_cusparseTcsrmv_mp();
test_cusparseCsrmvEx();
+ test_cusparseSpGEMM();
+ test_cusparseSpSV();
if (test_passed)
return 0;
diff --git a/help_function/src/sparse_utils_2_usm.cpp b/help_function/src/sparse_utils_2_usm.cpp
index c90217533..c0f07c123 100644
--- a/help_function/src/sparse_utils_2_usm.cpp
+++ b/help_function/src/sparse_utils_2_usm.cpp
@@ -1445,6 +1445,644 @@ void test_cusparseCsrmvEx() {
}
}
+// A * B = C
+//
+// | 0 1 2 | | 1 0 0 0 | | 2 3 10 12 |
+// | 0 0 3 | * | 2 3 0 0 | = | 0 0 15 18 |
+// | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 |
+void test_cusparseSpGEMM() {
+ dpct::device_ext &dev_ct1 = dpct::get_current_device();
+ sycl::queue &q_ct1 = dev_ct1.in_order_queue();
+ std::vector a_val_vec = {1, 2, 3, 4};
+ Data a_s_val(a_val_vec.data(), 4);
+ Data a_d_val(a_val_vec.data(), 4);
+ Data a_c_val(a_val_vec.data(), 4);
+ Data a_z_val(a_val_vec.data(), 4);
+ std::vector a_row_ptr_vec = {0, 2, 3, 4};
+ Data a_row_ptr(a_row_ptr_vec.data(), 4);
+ std::vector a_col_ind_vec = {1, 2, 2, 0};
+ Data a_col_ind(a_col_ind_vec.data(), 4);
+
+ std::vector b_val_vec = {1, 2, 3, 5, 6};
+ Data b_s_val(b_val_vec.data(), 5);
+ Data b_d_val(b_val_vec.data(), 5);
+ Data b_c_val(b_val_vec.data(), 5);
+ Data b_z_val(b_val_vec.data(), 5);
+ std::vector b_row_ptr_vec = {0, 1, 3, 5};
+ Data b_row_ptr(b_row_ptr_vec.data(), 4);
+ std::vector b_col_ind_vec = {0, 0, 1, 2, 3};
+ Data b_col_ind(b_col_ind_vec.data(), 5);
+
+ float alpha = 1;
+ Data alpha_s(&alpha, 1);
+ Data alpha_d(&alpha, 1);
+ Data alpha_c(&alpha, 1);
+ Data alpha_z(&alpha, 1);
+
+ float beta = 0;
+ Data beta_s(&beta, 1);
+ Data beta_d(&beta, 1);
+ Data beta_c(&beta, 1);
+ Data beta_z(&beta, 1);
+
+ sycl::queue *handle;
+ handle = &q_ct1;
+
+ /*
+ DPCT1026:0: The call to cusparseSetPointerMode was removed because this call
+ is redundant in SYCL.
+ */
+
+ a_s_val.H2D();
+ a_d_val.H2D();
+ a_c_val.H2D();
+ a_z_val.H2D();
+ a_row_ptr.H2D();
+ a_col_ind.H2D();
+ b_s_val.H2D();
+ b_d_val.H2D();
+ b_c_val.H2D();
+ b_z_val.H2D();
+ b_row_ptr.H2D();
+ b_col_ind.H2D();
+ alpha_s.H2D();
+ alpha_d.H2D();
+ alpha_c.H2D();
+ alpha_z.H2D();
+ beta_s.H2D();
+ beta_d.H2D();
+ beta_c.H2D();
+ beta_z.H2D();
+
+ dpct::sparse::sparse_matrix_desc_t a_descr_s;
+ dpct::sparse::sparse_matrix_desc_t a_descr_d;
+ dpct::sparse::sparse_matrix_desc_t a_descr_c;
+ dpct::sparse::sparse_matrix_desc_t a_descr_z;
+ a_descr_s = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_float,
+ dpct::sparse::matrix_format::csr);
+ a_descr_d = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_double,
+ dpct::sparse::matrix_format::csr);
+ a_descr_c = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float,
+ dpct::sparse::matrix_format::csr);
+ a_descr_z = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double,
+ dpct::sparse::matrix_format::csr);
+
+ dpct::sparse::sparse_matrix_desc_t b_descr_s;
+ dpct::sparse::sparse_matrix_desc_t b_descr_d;
+ dpct::sparse::sparse_matrix_desc_t b_descr_c;
+ dpct::sparse::sparse_matrix_desc_t b_descr_z;
+ b_descr_s = std::make_shared(
+ 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_s_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_float,
+ dpct::sparse::matrix_format::csr);
+ b_descr_d = std::make_shared(
+ 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_d_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_double,
+ dpct::sparse::matrix_format::csr);
+ b_descr_c = std::make_shared(
+ 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float,
+ dpct::sparse::matrix_format::csr);
+ b_descr_z = std::make_shared(
+ 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double,
+ dpct::sparse::matrix_format::csr);
+
+ Data c_s_row_ptr(4);
+ Data c_d_row_ptr(4);
+ Data c_c_row_ptr(4);
+ Data c_z_row_ptr(4);
+
+ dpct::sparse::sparse_matrix_desc_t c_descr_s;
+ dpct::sparse::sparse_matrix_desc_t c_descr_d;
+ dpct::sparse::sparse_matrix_desc_t c_descr_c;
+ dpct::sparse::sparse_matrix_desc_t c_descr_z;
+ c_descr_s = std::make_shared(
+ 3, 4, 0, c_s_row_ptr.d_data, nullptr, nullptr,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_float,
+ dpct::sparse::matrix_format::csr);
+ c_descr_d = std::make_shared(
+ 3, 4, 0, c_d_row_ptr.d_data, nullptr, nullptr,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_double,
+ dpct::sparse::matrix_format::csr);
+ c_descr_c = std::make_shared(
+ 3, 4, 0, c_c_row_ptr.d_data, nullptr, nullptr,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float,
+ dpct::sparse::matrix_format::csr);
+ c_descr_z = std::make_shared(
+ 3, 4, 0, c_z_row_ptr.d_data, nullptr, nullptr,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double,
+ dpct::sparse::matrix_format::csr);
+
+ oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_s;
+ oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_d;
+ oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_c;
+ oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_z;
+ oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_s);
+ oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_d);
+ oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_c);
+ oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_z);
+
+ size_t ws_1_size_s = 0;
+ size_t ws_1_size_d = 0;
+ size_t ws_1_size_c = 0;
+ size_t ws_1_size_z = 0;
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s,
+ beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, NULL);
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d,
+ beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, NULL);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, NULL);
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, NULL);
+ }
+
+ void *ws_1_s;
+ void *ws_1_d;
+ void *ws_1_c;
+ void *ws_1_z;
+ ws_1_s = (void *)sycl::malloc_device(ws_1_size_s, q_ct1);
+ ws_1_d = (void *)sycl::malloc_device(ws_1_size_d, q_ct1);
+ ws_1_c = (void *)sycl::malloc_device(ws_1_size_c, q_ct1);
+ ws_1_z = (void *)sycl::malloc_device(ws_1_size_z, q_ct1);
+
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s,
+ beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, ws_1_s);
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d,
+ beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, ws_1_d);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, ws_1_c);
+ dpct::sparse::spgemm_work_estimation(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, ws_1_z);
+ }
+
+ size_t ws_2_size_s = 0;
+ size_t ws_2_size_d = 0;
+ size_t ws_2_size_c = 0;
+ size_t ws_2_size_z = 0;
+ dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_s.d_data,
+ a_descr_s, b_descr_s, beta_s.d_data, c_descr_s,
+ SpGEMMDescr_s, &ws_2_size_s, NULL);
+ dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_d.d_data,
+ a_descr_d, b_descr_d, beta_d.d_data, c_descr_d,
+ SpGEMMDescr_d, &ws_2_size_d, NULL);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_compute(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, NULL);
+ dpct::sparse::spgemm_compute(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, NULL);
+ }
+
+ void *ws_2_s;
+ void *ws_2_d;
+ void *ws_2_c;
+ void *ws_2_z;
+ ws_2_s = (void *)sycl::malloc_device(ws_2_size_s, q_ct1);
+ ws_2_d = (void *)sycl::malloc_device(ws_2_size_d, q_ct1);
+ ws_2_c = (void *)sycl::malloc_device(ws_2_size_c, q_ct1);
+ ws_2_z = (void *)sycl::malloc_device(ws_2_size_z, q_ct1);
+
+ dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_s.d_data,
+ a_descr_s, b_descr_s, beta_s.d_data, c_descr_s,
+ SpGEMMDescr_s, &ws_2_size_s, ws_2_s);
+ dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_d.d_data,
+ a_descr_d, b_descr_d, beta_d.d_data, c_descr_d,
+ SpGEMMDescr_d, &ws_2_size_d, ws_2_d);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_compute(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, ws_2_c);
+ dpct::sparse::spgemm_compute(
+ *handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, ws_2_z);
+ }
+
+ int64_t c_row_s;
+ int64_t c_row_d;
+ int64_t c_row_c;
+ int64_t c_row_z;
+ int64_t c_col_s;
+ int64_t c_col_d;
+ int64_t c_col_c;
+ int64_t c_col_z;
+ int64_t c_nnz_s;
+ int64_t c_nnz_d;
+ int64_t c_nnz_c;
+ int64_t c_nnz_z;
+ c_descr_s->get_size(&c_row_s, &c_col_s, &c_nnz_s);
+ c_descr_d->get_size(&c_row_d, &c_col_d, &c_nnz_d);
+ c_descr_c->get_size(&c_row_c, &c_col_c, &c_nnz_c);
+ c_descr_z->get_size(&c_row_z, &c_col_z, &c_nnz_z);
+
+ Data c_s_val(c_nnz_s);
+ Data c_d_val(c_nnz_d);
+ Data c_c_val(c_nnz_c);
+ Data c_z_val(c_nnz_z);
+ Data c_s_col_ind(c_nnz_s);
+ Data c_d_col_ind(c_nnz_d);
+ Data c_c_col_ind(c_nnz_c);
+ Data c_z_col_ind(c_nnz_z);
+
+ c_descr_s->set_pointers(c_s_row_ptr.d_data, c_s_col_ind.d_data,
+ c_s_val.d_data);
+ c_descr_d->set_pointers(c_d_row_ptr.d_data, c_d_col_ind.d_data,
+ c_d_val.d_data);
+ c_descr_c->set_pointers(c_c_row_ptr.d_data, c_c_col_ind.d_data,
+ c_c_val.d_data);
+ c_descr_z->set_pointers(c_z_row_ptr.d_data, c_z_col_ind.d_data,
+ c_z_val.d_data);
+
+ dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans,
+ alpha_s.d_data, a_descr_s, b_descr_s,
+ beta_s.d_data, c_descr_s, SpGEMMDescr_s);
+ dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans,
+ alpha_d.d_data, a_descr_d, b_descr_d,
+ beta_d.d_data, c_descr_d, SpGEMMDescr_d);
+ if (run_complex_datatype) {
+ dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans,
+ alpha_c.d_data, a_descr_c, b_descr_c,
+ beta_c.d_data, c_descr_c, SpGEMMDescr_c);
+ dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans,
+ oneapi::mkl::transpose::nontrans,
+ alpha_z.d_data, a_descr_z, b_descr_z,
+ beta_z.d_data, c_descr_z, SpGEMMDescr_z);
+ }
+
+ q_ct1.wait();
+
+ sycl::free(ws_1_s, q_ct1);
+ sycl::free(ws_1_d, q_ct1);
+ sycl::free(ws_1_c, q_ct1);
+ sycl::free(ws_1_z, q_ct1);
+ sycl::free(ws_2_s, q_ct1);
+ sycl::free(ws_2_d, q_ct1);
+ sycl::free(ws_2_c, q_ct1);
+ sycl::free(ws_2_z, q_ct1);
+ a_descr_s.reset();
+ a_descr_d.reset();
+ a_descr_c.reset();
+ a_descr_z.reset();
+ b_descr_s.reset();
+ b_descr_d.reset();
+ b_descr_c.reset();
+ b_descr_z.reset();
+ c_descr_s.reset();
+ c_descr_d.reset();
+ c_descr_c.reset();
+ c_descr_z.reset();
+ oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_s);
+ oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_d);
+ oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_c);
+ oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_z);
+ handle = nullptr;
+
+ c_s_val.D2H();
+ c_d_val.D2H();
+ c_c_val.D2H();
+ c_z_val.D2H();
+ c_s_row_ptr.D2H();
+ c_d_row_ptr.D2H();
+ c_c_row_ptr.D2H();
+ c_z_row_ptr.D2H();
+ c_s_col_ind.D2H();
+ c_d_col_ind.D2H();
+ c_c_col_ind.D2H();
+ c_z_col_ind.D2H();
+
+ float expect_c_val[7] = {2.000000, 3.000000, 10.000000, 12.000000, 15.000000, 18.000000, 4.000000};
+ float expect_c_row_ptr[4] = {0.000000, 4.000000, 6.000000, 7.000000};
+ float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000};
+ if (compare_result(expect_c_val, c_s_val.h_data, 7) &&
+ compare_result(expect_c_val, c_d_val.h_data, 7) &&
+ compare_result(expect_c_val, c_c_val.h_data, 7) &&
+ compare_result(expect_c_val, c_z_val.h_data, 7) &&
+ compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) &&
+ compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) &&
+ compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) &&
+ compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) &&
+ compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) &&
+ compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) &&
+ compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) &&
+ compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7)
+ )
+ printf("SpGEMM pass\n");
+ else {
+ printf("SpGEMM fail\n");
+ test_passed = false;
+ }
+}
+
+// A * C = B
+//
+// | 1 1 2 | | 1 | | 9 |
+// | 0 1 3 | * | 2 | = | 11 |
+// | 0 0 1 | | 3 | | 3 |
+void test_cusparseSpSV() {
+ dpct::device_ext &dev_ct1 = dpct::get_current_device();
+ sycl::queue &q_ct1 = dev_ct1.default_queue();
+ std::vector a_val_vec = {1, 1, 2, 1, 3, 1};
+ Data a_s_val(a_val_vec.data(), 6);
+ Data a_d_val(a_val_vec.data(), 6);
+ Data a_c_val(a_val_vec.data(), 6);
+ Data a_z_val(a_val_vec.data(), 6);
+ std::vector a_row_ptr_vec = {0, 3, 5, 6};
+ Data a_row_ptr(a_row_ptr_vec.data(), 4);
+ std::vector a_col_ind_vec = {0, 1, 2, 1, 2, 3};
+ Data a_col_ind(a_col_ind_vec.data(), 6);
+
+ std::vector b_vec = {9, 11, 3};
+ Data b_s(b_vec.data(), 3);
+ Data b_d(b_vec.data(), 3);
+ Data b_c(b_vec.data(), 3);
+ Data b_z(b_vec.data(), 3);
+
+ Data c_s(3);
+ Data c_d(3);
+ Data c_c(3);
+ Data c_z(3);
+
+ float alpha = 1;
+ Data alpha_s(&alpha, 1);
+ Data alpha_d(&alpha, 1);
+ Data alpha_c(&alpha, 1);
+ Data alpha_z(&alpha, 1);
+
+ sycl::queue *handle;
+ handle = &q_ct1;
+
+ /*
+ DPCT1026:1: The call to cusparseSetPointerMode was removed because this call
+ is redundant in SYCL.
+ */
+
+ a_s_val.H2D();
+ a_d_val.H2D();
+ a_c_val.H2D();
+ a_z_val.H2D();
+ a_row_ptr.H2D();
+ a_col_ind.H2D();
+ b_s.H2D();
+ b_d.H2D();
+ b_c.H2D();
+ b_z.H2D();
+ alpha_s.H2D();
+ alpha_d.H2D();
+ alpha_c.H2D();
+ alpha_z.H2D();
+
+ dpct::sparse::sparse_matrix_desc_t a_descr_s;
+ dpct::sparse::sparse_matrix_desc_t a_descr_d;
+ dpct::sparse::sparse_matrix_desc_t a_descr_c;
+ dpct::sparse::sparse_matrix_desc_t a_descr_z;
+ a_descr_s = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_float,
+ dpct::sparse::matrix_format::csr);
+ a_descr_d = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::real_double,
+ dpct::sparse::matrix_format::csr);
+ a_descr_c = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float,
+ dpct::sparse::matrix_format::csr);
+ a_descr_z = std::make_shared(
+ 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data,
+ dpct::library_data_t::real_int32, dpct::library_data_t::real_int32,
+ oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double,
+ dpct::sparse::matrix_format::csr);
+
+ std::shared_ptr b_descr_s;
+ std::shared_ptr b_descr_d;
+ std::shared_ptr b_descr_c;
+ std::shared_ptr b_descr_z;
+ b_descr_s = std::make_shared(
+ 3, b_s.d_data, dpct::library_data_t::real_float);
+ b_descr_d = std::make_shared(
+ 3, b_d.d_data, dpct::library_data_t::real_double);
+ b_descr_c = std::make_shared