diff --git a/features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml b/features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml new file mode 100644 index 000000000..b9cc01791 --- /dev/null +++ b/features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml @@ -0,0 +1,14 @@ + + + + test + + + + + + + + + + diff --git a/features/feature_case/cusparse/cusparse_4.cu b/features/feature_case/cusparse/cusparse_4.cu new file mode 100644 index 000000000..bc239b9bd --- /dev/null +++ b/features/feature_case/cusparse/cusparse_4.cu @@ -0,0 +1,375 @@ +// ===------- cusparse_4.cu -------------------------------- *- CUDA -* ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===----------------------------------------------------------------------===// + +#include "cusparse.h" + +#include +#include +#include +#include + +template +struct Data { + float *h_data; + d_data_t *d_data; + int element_num; + Data(int element_num) : element_num(element_num) { + h_data = (float *)malloc(sizeof(float) * element_num); + memset(h_data, 0, sizeof(float) * element_num); + cudaMalloc(&d_data, sizeof(d_data_t) * element_num); + cudaMemset(d_data, 0, sizeof(d_data_t) * element_num); + } + Data(float *input_data, int element_num) : element_num(element_num) { + h_data = (float *)malloc(sizeof(float) * element_num); + cudaMalloc(&d_data, sizeof(d_data_t) * element_num); + cudaMemset(d_data, 0, sizeof(d_data_t) * element_num); + memcpy(h_data, input_data, sizeof(float) * element_num); + } + ~Data() { + free(h_data); + cudaFree(d_data); + } + void H2D() { + d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num); + memset(h_temp, 0, sizeof(d_data_t) * element_num); + from_float_convert(h_data, h_temp); + cudaMemcpy(d_data, h_temp, sizeof(d_data_t) * element_num, + cudaMemcpyHostToDevice); + free(h_temp); + } + void D2H() { + d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num); + memset(h_temp, 0, sizeof(d_data_t) * element_num); + cudaMemcpy(h_temp, d_data, sizeof(d_data_t) * element_num, + cudaMemcpyDeviceToHost); + to_float_convert(h_temp, h_data); + free(h_temp); + } + +private: + inline void from_float_convert(float *in, d_data_t *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i]; + } + inline void to_float_convert(d_data_t *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i]; + } +}; +template <> +inline void Data::from_float_convert(float *in, float2 *out) { + for (int i = 0; i < element_num; i++) + out[i].x = in[i]; +} +template <> +inline void Data::from_float_convert(float *in, double2 *out) { + for (int i = 0; i < element_num; i++) + out[i].x = in[i]; +} + +template <> +inline void Data::to_float_convert(float2 *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i].x; +} +template <> +inline void Data::to_float_convert(double2 *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i].x; +} + +bool compare_result(float *expect, float *result, int element_num) { + for (int i = 0; i < element_num; i++) { + if (std::abs(result[i] - expect[i]) >= 0.05) { + return false; + } + } + return true; +} + +bool compare_result(float *expect, float *result, std::vector indices) { + for (int i = 0; i < indices.size(); i++) { + if (std::abs(result[indices[i]] - expect[indices[i]]) >= 0.05) { + return false; + } + } + return true; +} + +bool test_passed = true; + +const bool run_complex_datatype = true; + +// A * B = C +// +// | 0 1 2 | | 1 0 0 0 | | 2 3 10 12 | +// | 0 0 3 | * | 2 3 0 0 | = | 0 0 15 18 | +// | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 | +void test_cusparseSpGEMM() { + std::vector a_val_vec = {1, 2, 3, 4}; + Data a_s_val(a_val_vec.data(), 4); + Data a_d_val(a_val_vec.data(), 4); + Data a_c_val(a_val_vec.data(), 4); + Data a_z_val(a_val_vec.data(), 4); + std::vector a_row_ptr_vec = {0, 2, 3, 4}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {1, 2, 2, 0}; + Data a_col_ind(a_col_ind_vec.data(), 4); + + std::vector b_val_vec = {1, 2, 3, 5, 6}; + Data b_s_val(b_val_vec.data(), 5); + Data b_d_val(b_val_vec.data(), 5); + Data b_c_val(b_val_vec.data(), 5); + Data b_z_val(b_val_vec.data(), 5); + std::vector b_row_ptr_vec = {0, 1, 3, 5}; + Data b_row_ptr(b_row_ptr_vec.data(), 4); + std::vector b_col_ind_vec = {0, 0, 1, 2, 3}; + Data b_col_ind(b_col_ind_vec.data(), 5); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + float beta = 0; + Data beta_s(&beta, 1); + Data beta_d(&beta, 1); + Data beta_c(&beta, 1); + Data beta_z(&beta, 1); + + cusparseHandle_t handle; + cusparseCreate(&handle); + + cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_DEVICE); + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s_val.H2D(); + b_d_val.H2D(); + b_c_val.H2D(); + b_z_val.H2D(); + b_row_ptr.H2D(); + b_col_ind.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + beta_s.H2D(); + beta_d.H2D(); + beta_c.H2D(); + beta_z.H2D(); + + cusparseSpMatDescr_t a_descr_s; + cusparseSpMatDescr_t a_descr_d; + cusparseSpMatDescr_t a_descr_c; + cusparseSpMatDescr_t a_descr_z; + cusparseCreateCsr(&a_descr_s, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&a_descr_d, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&a_descr_c, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&a_descr_z, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + + cusparseSpMatDescr_t b_descr_s; + cusparseSpMatDescr_t b_descr_d; + cusparseSpMatDescr_t b_descr_c; + cusparseSpMatDescr_t b_descr_z; + cusparseCreateCsr(&b_descr_s, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&b_descr_d, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&b_descr_c, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&b_descr_z, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + + Data c_s_row_ptr(4); + Data c_d_row_ptr(4); + Data c_c_row_ptr(4); + Data c_z_row_ptr(4); + + cusparseSpMatDescr_t c_descr_s; + cusparseSpMatDescr_t c_descr_d; + cusparseSpMatDescr_t c_descr_c; + cusparseSpMatDescr_t c_descr_z; + cusparseCreateCsr(&c_descr_s, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&c_descr_d, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&c_descr_c, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&c_descr_z, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + + cusparseSpGEMMDescr_t SpGEMMDescr_s; + cusparseSpGEMMDescr_t SpGEMMDescr_d; + cusparseSpGEMMDescr_t SpGEMMDescr_c; + cusparseSpGEMMDescr_t SpGEMMDescr_z; + cusparseSpGEMM_createDescr(&SpGEMMDescr_s); + cusparseSpGEMM_createDescr(&SpGEMMDescr_d); + cusparseSpGEMM_createDescr(&SpGEMMDescr_c); + cusparseSpGEMM_createDescr(&SpGEMMDescr_z); + + size_t ws_1_size_s = 0; + size_t ws_1_size_d = 0; + size_t ws_1_size_c = 0; + size_t ws_1_size_z = 0; + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_1_size_s, NULL); + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_1_size_d, NULL); + if (run_complex_datatype) { + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_1_size_c, NULL); + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_1_size_z, NULL); + } + + void *ws_1_s; + void *ws_1_d; + void *ws_1_c; + void *ws_1_z; + cudaMalloc(&ws_1_s, ws_1_size_s); + cudaMalloc(&ws_1_d, ws_1_size_d); + cudaMalloc(&ws_1_c, ws_1_size_c); + cudaMalloc(&ws_1_z, ws_1_size_z); + + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_1_size_s, ws_1_s); + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_1_size_d, ws_1_d); + if (run_complex_datatype) { + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_1_size_c, ws_1_c); + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_1_size_z, ws_1_z); + } + + size_t ws_2_size_s = 0; + size_t ws_2_size_d = 0; + size_t ws_2_size_c = 0; + size_t ws_2_size_z = 0; + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_2_size_s, NULL); + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_2_size_d, NULL); + if (run_complex_datatype) { + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_2_size_c, NULL); + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_2_size_z, NULL); + } + + void *ws_2_s; + void *ws_2_d; + void *ws_2_c; + void *ws_2_z; + cudaMalloc(&ws_2_s, ws_2_size_s); + cudaMalloc(&ws_2_d, ws_2_size_d); + cudaMalloc(&ws_2_c, ws_2_size_c); + cudaMalloc(&ws_2_z, ws_2_size_z); + + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_2_size_s, ws_2_s); + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_2_size_d, ws_2_d); + if (run_complex_datatype) { + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_2_size_c, ws_2_c); + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_2_size_z, ws_2_z); + } + + int64_t c_row_s; + int64_t c_row_d; + int64_t c_row_c; + int64_t c_row_z; + int64_t c_col_s; + int64_t c_col_d; + int64_t c_col_c; + int64_t c_col_z; + int64_t c_nnz_s; + int64_t c_nnz_d; + int64_t c_nnz_c; + int64_t c_nnz_z; + cusparseSpMatGetSize(c_descr_s, &c_row_s, &c_col_s, &c_nnz_s); + cusparseSpMatGetSize(c_descr_d, &c_row_d, &c_col_d, &c_nnz_d); + cusparseSpMatGetSize(c_descr_c, &c_row_c, &c_col_c, &c_nnz_c); + cusparseSpMatGetSize(c_descr_z, &c_row_z, &c_col_z, &c_nnz_z); + + Data c_s_val(c_nnz_s); + Data c_d_val(c_nnz_d); + Data c_c_val(c_nnz_c); + Data c_z_val(c_nnz_z); + Data c_s_col_ind(c_nnz_s); + Data c_d_col_ind(c_nnz_d); + Data c_c_col_ind(c_nnz_c); + Data c_z_col_ind(c_nnz_z); + + cusparseCsrSetPointers(c_descr_s, c_s_row_ptr.d_data, c_s_col_ind.d_data, c_s_val.d_data); + cusparseCsrSetPointers(c_descr_d, c_d_row_ptr.d_data, c_d_col_ind.d_data, c_d_val.d_data); + cusparseCsrSetPointers(c_descr_c, c_c_row_ptr.d_data, c_c_col_ind.d_data, c_c_val.d_data); + cusparseCsrSetPointers(c_descr_z, c_z_row_ptr.d_data, c_z_col_ind.d_data, c_z_val.d_data); + + cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s); + cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d); + if (run_complex_datatype) { + cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c); + cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z); + } + + cudaStreamSynchronize(0); + + cudaFree(ws_1_s); + cudaFree(ws_1_d); + cudaFree(ws_1_c); + cudaFree(ws_1_z); + cudaFree(ws_2_s); + cudaFree(ws_2_d); + cudaFree(ws_2_c); + cudaFree(ws_2_z); + cusparseDestroySpMat(a_descr_s); + cusparseDestroySpMat(a_descr_d); + cusparseDestroySpMat(a_descr_c); + cusparseDestroySpMat(a_descr_z); + cusparseDestroySpMat(b_descr_s); + cusparseDestroySpMat(b_descr_d); + cusparseDestroySpMat(b_descr_c); + cusparseDestroySpMat(b_descr_z); + cusparseDestroySpMat(c_descr_s); + cusparseDestroySpMat(c_descr_d); + cusparseDestroySpMat(c_descr_c); + cusparseDestroySpMat(c_descr_z); + cusparseSpGEMM_destroyDescr(SpGEMMDescr_s); + cusparseSpGEMM_destroyDescr(SpGEMMDescr_d); + cusparseSpGEMM_destroyDescr(SpGEMMDescr_c); + cusparseSpGEMM_destroyDescr(SpGEMMDescr_z); + cusparseDestroy(handle); + + c_s_val.D2H(); + c_d_val.D2H(); + c_c_val.D2H(); + c_z_val.D2H(); + c_s_row_ptr.D2H(); + c_d_row_ptr.D2H(); + c_c_row_ptr.D2H(); + c_z_row_ptr.D2H(); + c_s_col_ind.D2H(); + c_d_col_ind.D2H(); + c_c_col_ind.D2H(); + c_z_col_ind.D2H(); + + float expect_c_val[7] = {2.000000, 3.000000, 10.000000, 12.000000, 15.000000, 18.000000, 4.000000}; + float expect_c_row_ptr[4] = {0.000000, 4.000000, 6.000000, 7.000000}; + float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000}; + if (compare_result(expect_c_val, c_s_val.h_data, 7) && + compare_result(expect_c_val, c_d_val.h_data, 7) && + compare_result(expect_c_val, c_c_val.h_data, 7) && + compare_result(expect_c_val, c_z_val.h_data, 7) && + compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) && + compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7) + ) + printf("SpGEMM pass\n"); + else { + printf("SpGEMM fail\n"); + test_passed = false; + } +} + +int main() { + test_cusparseSpGEMM(); + + if (test_passed) + return 0; + return -1; +} diff --git a/features/feature_case/cusparse/cusparse_5.cu b/features/feature_case/cusparse/cusparse_5.cu new file mode 100644 index 000000000..bb47948ec --- /dev/null +++ b/features/feature_case/cusparse/cusparse_5.cu @@ -0,0 +1,289 @@ +// ===------- cusparse_3.cu -------------------------------- *- CUDA -* ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===----------------------------------------------------------------------===// + +#include "cusparse.h" + +#include +#include +#include +#include + +template +struct Data { + float *h_data; + d_data_t *d_data; + int element_num; + Data(int element_num) : element_num(element_num) { + h_data = (float *)malloc(sizeof(float) * element_num); + memset(h_data, 0, sizeof(float) * element_num); + cudaMalloc(&d_data, sizeof(d_data_t) * element_num); + cudaMemset(d_data, 0, sizeof(d_data_t) * element_num); + } + Data(float *input_data, int element_num) : element_num(element_num) { + h_data = (float *)malloc(sizeof(float) * element_num); + cudaMalloc(&d_data, sizeof(d_data_t) * element_num); + cudaMemset(d_data, 0, sizeof(d_data_t) * element_num); + memcpy(h_data, input_data, sizeof(float) * element_num); + } + ~Data() { + free(h_data); + cudaFree(d_data); + } + void H2D() { + d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num); + memset(h_temp, 0, sizeof(d_data_t) * element_num); + from_float_convert(h_data, h_temp); + cudaMemcpy(d_data, h_temp, sizeof(d_data_t) * element_num, + cudaMemcpyHostToDevice); + free(h_temp); + } + void D2H() { + d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num); + memset(h_temp, 0, sizeof(d_data_t) * element_num); + cudaMemcpy(h_temp, d_data, sizeof(d_data_t) * element_num, + cudaMemcpyDeviceToHost); + to_float_convert(h_temp, h_data); + free(h_temp); + } + +private: + inline void from_float_convert(float *in, d_data_t *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i]; + } + inline void to_float_convert(d_data_t *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i]; + } +}; +template <> +inline void Data::from_float_convert(float *in, float2 *out) { + for (int i = 0; i < element_num; i++) + out[i].x = in[i]; +} +template <> +inline void Data::from_float_convert(float *in, double2 *out) { + for (int i = 0; i < element_num; i++) + out[i].x = in[i]; +} + +template <> +inline void Data::to_float_convert(float2 *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i].x; +} +template <> +inline void Data::to_float_convert(double2 *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i].x; +} + +bool compare_result(float *expect, float *result, int element_num) { + for (int i = 0; i < element_num; i++) { + if (std::abs(result[i] - expect[i]) >= 0.05) { + return false; + } + } + return true; +} + +bool compare_result(float *expect, float *result, std::vector indices) { + for (int i = 0; i < indices.size(); i++) { + if (std::abs(result[indices[i]] - expect[indices[i]]) >= 0.05) { + return false; + } + } + return true; +} + +bool test_passed = true; + +const bool run_complex_datatype = true; + +// A * C = B +// +// | 1 1 2 | | 1 | | 9 | +// | 0 1 3 | * | 2 | = | 11 | +// | 0 0 1 | | 3 | | 3 | +void test_cusparseSpSV() { + std::vector a_val_vec = {1, 1, 2, 1, 3, 1}; + Data a_s_val(a_val_vec.data(), 6); + Data a_d_val(a_val_vec.data(), 6); + Data a_c_val(a_val_vec.data(), 6); + Data a_z_val(a_val_vec.data(), 6); + std::vector a_row_ptr_vec = {0, 3, 5, 6}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {0, 1, 2, 1, 2, 3}; + Data a_col_ind(a_col_ind_vec.data(), 6); + + std::vector b_vec = {9, 11, 3}; + Data b_s(b_vec.data(), 3); + Data b_d(b_vec.data(), 3); + Data b_c(b_vec.data(), 3); + Data b_z(b_vec.data(), 3); + + Data c_s(3); + Data c_d(3); + Data c_c(3); + Data c_z(3); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + cusparseHandle_t handle; + cusparseCreate(&handle); + + cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_DEVICE); + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s.H2D(); + b_d.H2D(); + b_c.H2D(); + b_z.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + + cusparseSpMatDescr_t a_descr_s; + cusparseSpMatDescr_t a_descr_d; + cusparseSpMatDescr_t a_descr_c; + cusparseSpMatDescr_t a_descr_z; + cusparseCreateCsr(&a_descr_s, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&a_descr_d, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&a_descr_c, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&a_descr_z, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + + cusparseDnVecDescr_t b_descr_s; + cusparseDnVecDescr_t b_descr_d; + cusparseDnVecDescr_t b_descr_c; + cusparseDnVecDescr_t b_descr_z; + cusparseCreateDnVec(&b_descr_s, 3, b_s.d_data, CUDA_R_32F); + cusparseCreateDnVec(&b_descr_d, 3, b_d.d_data, CUDA_R_64F); + cusparseCreateDnVec(&b_descr_c, 3, b_c.d_data, CUDA_C_32F); + cusparseCreateDnVec(&b_descr_z, 3, b_z.d_data, CUDA_C_64F); + + cusparseDnVecDescr_t c_descr_s; + cusparseDnVecDescr_t c_descr_d; + cusparseDnVecDescr_t c_descr_c; + cusparseDnVecDescr_t c_descr_z; + cusparseCreateDnVec(&c_descr_s, 3, c_s.d_data, CUDA_R_32F); + cusparseCreateDnVec(&c_descr_d, 3, c_d.d_data, CUDA_R_64F); + cusparseCreateDnVec(&c_descr_c, 3, c_c.d_data, CUDA_C_32F); + cusparseCreateDnVec(&c_descr_z, 3, c_z.d_data, CUDA_C_64F); + + cusparseFillMode_t uplo = CUSPARSE_FILL_MODE_UPPER; + cusparseSpMatSetAttribute(a_descr_s, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo)); + cusparseSpMatSetAttribute(a_descr_d, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo)); + cusparseSpMatSetAttribute(a_descr_c, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo)); + cusparseSpMatSetAttribute(a_descr_z, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo)); + cusparseDiagType_t diag = CUSPARSE_DIAG_TYPE_UNIT; + cusparseSpMatSetAttribute(a_descr_s, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag)); + cusparseSpMatSetAttribute(a_descr_d, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag)); + cusparseSpMatSetAttribute(a_descr_c, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag)); + cusparseSpMatSetAttribute(a_descr_z, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag)); + + cusparseSpSVDescr_t SpSVDescr_s; + cusparseSpSVDescr_t SpSVDescr_d; + cusparseSpSVDescr_t SpSVDescr_c; + cusparseSpSVDescr_t SpSVDescr_z; + cusparseSpSV_createDescr(&SpSVDescr_s); + cusparseSpSV_createDescr(&SpSVDescr_d); + cusparseSpSV_createDescr(&SpSVDescr_c); + cusparseSpSV_createDescr(&SpSVDescr_z); + + size_t ws_size_s = 0; + size_t ws_size_d = 0; + size_t ws_size_c = 0; + size_t ws_size_z = 0; + cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s, &ws_size_s); + cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d, &ws_size_d); + if (run_complex_datatype) { + cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c, &ws_size_c); + cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z, &ws_size_z); + } + + void *ws_s; + void *ws_d; + void *ws_c; + void *ws_z; + cudaMalloc(&ws_s, ws_size_s); + cudaMalloc(&ws_d, ws_size_d); + cudaMalloc(&ws_c, ws_size_c); + cudaMalloc(&ws_z, ws_size_z); + + cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s, &ws_size_s); + cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d, &ws_size_d); + if (run_complex_datatype) { + cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c, &ws_size_c); + cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z, &ws_size_z); + } + + cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s); + cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d); + if (run_complex_datatype) { + cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c); + cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z); + } + + c_s.D2H(); + c_d.D2H(); + c_c.D2H(); + c_z.D2H(); + + cudaStreamSynchronize(0); + + cudaFree(ws_s); + cudaFree(ws_d); + cudaFree(ws_c); + cudaFree(ws_z); + cusparseDestroySpMat(a_descr_s); + cusparseDestroySpMat(a_descr_d); + cusparseDestroySpMat(a_descr_c); + cusparseDestroySpMat(a_descr_z); + cusparseDestroyDnVec(b_descr_s); + cusparseDestroyDnVec(b_descr_d); + cusparseDestroyDnVec(b_descr_c); + cusparseDestroyDnVec(b_descr_z); + cusparseDestroyDnVec(c_descr_s); + cusparseDestroyDnVec(c_descr_d); + cusparseDestroyDnVec(c_descr_c); + cusparseDestroyDnVec(c_descr_z); + cusparseSpSV_destroyDescr(SpSVDescr_s); + cusparseSpSV_destroyDescr(SpSVDescr_d); + cusparseSpSV_destroyDescr(SpSVDescr_c); + cusparseSpSV_destroyDescr(SpSVDescr_z); + cusparseDestroy(handle); + + float expect_c[4] = {1, 2, 3}; + if (compare_result(expect_c, c_s.h_data, 3) && + compare_result(expect_c, c_d.h_data, 3) && + compare_result(expect_c, c_c.h_data, 3) && + compare_result(expect_c, c_z.h_data, 3)) + printf("SpSV pass\n"); + else { + printf("SpSV fail\n"); + test_passed = false; + } +} + +int main() { + test_cusparseSpSV(); + + if (test_passed) + return 0; + return -1; +} diff --git a/features/features.xml b/features/features.xml index 34fb3b016..ec0c8673e 100644 --- a/features/features.xml +++ b/features/features.xml @@ -254,6 +254,8 @@ + + diff --git a/features/test_feature.py b/features/test_feature.py index cefff8909..5bc05aa2e 100644 --- a/features/test_feature.py +++ b/features/test_feature.py @@ -38,7 +38,7 @@ 'math-bf16-conv', 'math-half-conv', 'math-bfloat16', 'libcu_atomic', 'test_shared_memory', 'cudnn-reduction', 'cudnn-binary', 'cudnn-bnp1', 'cudnn-bnp2', 'cudnn-bnp3', 'cudnn-normp1', 'cudnn-normp2', 'cudnn-normp3', 'cudnn-convp1', 'cudnn-convp2', 'cudnn-convp3', 'cudnn-convp4', 'cudnn-convp5', 'cudnn-convp6', - 'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3", + 'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3", "cusparse_4", "cusparse_5", 'cudnn-GetErrorString', 'cub_device_histgram', 'cudnn-types', 'cudnn-version', 'cudnn-dropout', 'constant_attr', 'sync_warp_p2', 'occupancy_calculation', diff --git a/help_function/config/TEMPLATE_help_function_before_11_skip_double.xml b/help_function/config/TEMPLATE_help_function_before_11_skip_double.xml deleted file mode 100644 index 6da1f7005..000000000 --- a/help_function/config/TEMPLATE_help_function_before_11_skip_double.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - WARNING: DON'T UPDATE THIS FILE MANUALLY!!! - This is auto-generated accessors configuration file which affects all tests in ported gcc suites - If you have any issue with this file please contact Compiler QA team - - - - - - - - - - diff --git a/help_function/config/TEMPLATE_help_function_sparse_buffer.xml b/help_function/config/TEMPLATE_help_function_sparse_buffer.xml new file mode 100644 index 000000000..9e20a921a --- /dev/null +++ b/help_function/config/TEMPLATE_help_function_sparse_buffer.xml @@ -0,0 +1,14 @@ + + + + test help function + + + + + + + + + + diff --git a/help_function/config/TEMPLATE_help_function_sparse_usm.xml b/help_function/config/TEMPLATE_help_function_sparse_usm.xml new file mode 100644 index 000000000..7dbc611de --- /dev/null +++ b/help_function/config/TEMPLATE_help_function_sparse_usm.xml @@ -0,0 +1,14 @@ + + + + test help function + + + + + + + + + + diff --git a/help_function/help_function.xml b/help_function/help_function.xml index 38cd52ecc..5eca8e704 100644 --- a/help_function/help_function.xml +++ b/help_function/help_function.xml @@ -185,8 +185,8 @@ - - + + diff --git a/help_function/src/sparse_utils_2_buffer.cpp b/help_function/src/sparse_utils_2_buffer.cpp index 9748c5d00..1cdc6f3c8 100644 --- a/help_function/src/sparse_utils_2_buffer.cpp +++ b/help_function/src/sparse_utils_2_buffer.cpp @@ -1438,6 +1438,644 @@ void test_cusparseCsrmvEx() { } } +// A * B = C +// +// | 0 1 2 | | 1 0 0 0 | | 2 3 10 12 | +// | 0 0 3 | * | 2 3 0 0 | = | 0 0 15 18 | +// | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 | +void test_cusparseSpGEMM() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.out_of_order_queue(); + std::vector a_val_vec = {1, 2, 3, 4}; + Data a_s_val(a_val_vec.data(), 4); + Data a_d_val(a_val_vec.data(), 4); + Data a_c_val(a_val_vec.data(), 4); + Data a_z_val(a_val_vec.data(), 4); + std::vector a_row_ptr_vec = {0, 2, 3, 4}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {1, 2, 2, 0}; + Data a_col_ind(a_col_ind_vec.data(), 4); + + std::vector b_val_vec = {1, 2, 3, 5, 6}; + Data b_s_val(b_val_vec.data(), 5); + Data b_d_val(b_val_vec.data(), 5); + Data b_c_val(b_val_vec.data(), 5); + Data b_z_val(b_val_vec.data(), 5); + std::vector b_row_ptr_vec = {0, 1, 3, 5}; + Data b_row_ptr(b_row_ptr_vec.data(), 4); + std::vector b_col_ind_vec = {0, 0, 1, 2, 3}; + Data b_col_ind(b_col_ind_vec.data(), 5); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + float beta = 0; + Data beta_s(&beta, 1); + Data beta_d(&beta, 1); + Data beta_c(&beta, 1); + Data beta_z(&beta, 1); + + sycl::queue *handle; + handle = &q_ct1; + + /* + DPCT1026:0: The call to cusparseSetPointerMode was removed because this call + is redundant in SYCL. + */ + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s_val.H2D(); + b_d_val.H2D(); + b_c_val.H2D(); + b_z_val.H2D(); + b_row_ptr.H2D(); + b_col_ind.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + beta_s.H2D(); + beta_d.H2D(); + beta_c.H2D(); + beta_z.H2D(); + + dpct::sparse::sparse_matrix_desc_t a_descr_s; + dpct::sparse::sparse_matrix_desc_t a_descr_d; + dpct::sparse::sparse_matrix_desc_t a_descr_c; + dpct::sparse::sparse_matrix_desc_t a_descr_z; + a_descr_s = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + a_descr_d = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + a_descr_c = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + a_descr_z = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + dpct::sparse::sparse_matrix_desc_t b_descr_s; + dpct::sparse::sparse_matrix_desc_t b_descr_d; + dpct::sparse::sparse_matrix_desc_t b_descr_c; + dpct::sparse::sparse_matrix_desc_t b_descr_z; + b_descr_s = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + b_descr_d = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + b_descr_c = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + b_descr_z = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + Data c_s_row_ptr(4); + Data c_d_row_ptr(4); + Data c_c_row_ptr(4); + Data c_z_row_ptr(4); + + dpct::sparse::sparse_matrix_desc_t c_descr_s; + dpct::sparse::sparse_matrix_desc_t c_descr_d; + dpct::sparse::sparse_matrix_desc_t c_descr_c; + dpct::sparse::sparse_matrix_desc_t c_descr_z; + c_descr_s = std::make_shared( + 3, 4, 0, c_s_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + c_descr_d = std::make_shared( + 3, 4, 0, c_d_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + c_descr_c = std::make_shared( + 3, 4, 0, c_c_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + c_descr_z = std::make_shared( + 3, 4, 0, c_z_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_s; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_d; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_c; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_z; + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_s); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_d); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_c); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_z); + + size_t ws_1_size_s = 0; + size_t ws_1_size_d = 0; + size_t ws_1_size_c = 0; + size_t ws_1_size_z = 0; + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, NULL); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, NULL); + if (run_complex_datatype) { + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, NULL); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, NULL); + } + + void *ws_1_s; + void *ws_1_d; + void *ws_1_c; + void *ws_1_z; + ws_1_s = dpct::dpct_malloc(ws_1_size_s); + ws_1_d = dpct::dpct_malloc(ws_1_size_d); + ws_1_c = dpct::dpct_malloc(ws_1_size_c); + ws_1_z = dpct::dpct_malloc(ws_1_size_z); + + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, ws_1_s); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, ws_1_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, ws_1_c); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, ws_1_z); + } + + size_t ws_2_size_s = 0; + size_t ws_2_size_d = 0; + size_t ws_2_size_c = 0; + size_t ws_2_size_z = 0; + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, + SpGEMMDescr_s, &ws_2_size_s, NULL); + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, + SpGEMMDescr_d, &ws_2_size_d, NULL); + if (run_complex_datatype) { + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, NULL); + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, NULL); + } + + void *ws_2_s; + void *ws_2_d; + void *ws_2_c; + void *ws_2_z; + ws_2_s = dpct::dpct_malloc(ws_2_size_s); + ws_2_d = dpct::dpct_malloc(ws_2_size_d); + ws_2_c = dpct::dpct_malloc(ws_2_size_c); + ws_2_z = dpct::dpct_malloc(ws_2_size_z); + + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, + SpGEMMDescr_s, &ws_2_size_s, ws_2_s); + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, + SpGEMMDescr_d, &ws_2_size_d, ws_2_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, ws_2_c); + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, ws_2_z); + } + + int64_t c_row_s; + int64_t c_row_d; + int64_t c_row_c; + int64_t c_row_z; + int64_t c_col_s; + int64_t c_col_d; + int64_t c_col_c; + int64_t c_col_z; + int64_t c_nnz_s; + int64_t c_nnz_d; + int64_t c_nnz_c; + int64_t c_nnz_z; + c_descr_s->get_size(&c_row_s, &c_col_s, &c_nnz_s); + c_descr_d->get_size(&c_row_d, &c_col_d, &c_nnz_d); + c_descr_c->get_size(&c_row_c, &c_col_c, &c_nnz_c); + c_descr_z->get_size(&c_row_z, &c_col_z, &c_nnz_z); + + Data c_s_val(c_nnz_s); + Data c_d_val(c_nnz_d); + Data c_c_val(c_nnz_c); + Data c_z_val(c_nnz_z); + Data c_s_col_ind(c_nnz_s); + Data c_d_col_ind(c_nnz_d); + Data c_c_col_ind(c_nnz_c); + Data c_z_col_ind(c_nnz_z); + + c_descr_s->set_pointers(c_s_row_ptr.d_data, c_s_col_ind.d_data, + c_s_val.d_data); + c_descr_d->set_pointers(c_d_row_ptr.d_data, c_d_col_ind.d_data, + c_d_val.d_data); + c_descr_c->set_pointers(c_c_row_ptr.d_data, c_c_col_ind.d_data, + c_c_val.d_data); + c_descr_z->set_pointers(c_z_row_ptr.d_data, c_z_col_ind.d_data, + c_z_val.d_data); + + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s); + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c); + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z); + } + + q_ct1.wait(); + + dpct::dpct_free(ws_1_s); + dpct::dpct_free(ws_1_d); + dpct::dpct_free(ws_1_c); + dpct::dpct_free(ws_1_z); + dpct::dpct_free(ws_2_s); + dpct::dpct_free(ws_2_d); + dpct::dpct_free(ws_2_c); + dpct::dpct_free(ws_2_z); + a_descr_s.reset(); + a_descr_d.reset(); + a_descr_c.reset(); + a_descr_z.reset(); + b_descr_s.reset(); + b_descr_d.reset(); + b_descr_c.reset(); + b_descr_z.reset(); + c_descr_s.reset(); + c_descr_d.reset(); + c_descr_c.reset(); + c_descr_z.reset(); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_s); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_d); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_c); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_z); + handle = nullptr; + + c_s_val.D2H(); + c_d_val.D2H(); + c_c_val.D2H(); + c_z_val.D2H(); + c_s_row_ptr.D2H(); + c_d_row_ptr.D2H(); + c_c_row_ptr.D2H(); + c_z_row_ptr.D2H(); + c_s_col_ind.D2H(); + c_d_col_ind.D2H(); + c_c_col_ind.D2H(); + c_z_col_ind.D2H(); + + float expect_c_val[7] = {2.000000, 3.000000, 10.000000, 12.000000, 15.000000, 18.000000, 4.000000}; + float expect_c_row_ptr[4] = {0.000000, 4.000000, 6.000000, 7.000000}; + float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000}; + if (compare_result(expect_c_val, c_s_val.h_data, 7) && + compare_result(expect_c_val, c_d_val.h_data, 7) && + compare_result(expect_c_val, c_c_val.h_data, 7) && + compare_result(expect_c_val, c_z_val.h_data, 7) && + compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) && + compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7) + ) + printf("SpGEMM pass\n"); + else { + printf("SpGEMM fail\n"); + test_passed = false; + } +} + +// A * C = B +// +// | 1 1 2 | | 1 | | 9 | +// | 0 1 3 | * | 2 | = | 11 | +// | 0 0 1 | | 3 | | 3 | +void test_cusparseSpSV() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.default_queue(); + std::vector a_val_vec = {1, 1, 2, 1, 3, 1}; + Data a_s_val(a_val_vec.data(), 6); + Data a_d_val(a_val_vec.data(), 6); + Data a_c_val(a_val_vec.data(), 6); + Data a_z_val(a_val_vec.data(), 6); + std::vector a_row_ptr_vec = {0, 3, 5, 6}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {0, 1, 2, 1, 2, 3}; + Data a_col_ind(a_col_ind_vec.data(), 6); + + std::vector b_vec = {9, 11, 3}; + Data b_s(b_vec.data(), 3); + Data b_d(b_vec.data(), 3); + Data b_c(b_vec.data(), 3); + Data b_z(b_vec.data(), 3); + + Data c_s(3); + Data c_d(3); + Data c_c(3); + Data c_z(3); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + sycl::queue *handle; + handle = &q_ct1; + + /* + DPCT1026:1: The call to cusparseSetPointerMode was removed because this call + is redundant in SYCL. + */ + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s.H2D(); + b_d.H2D(); + b_c.H2D(); + b_z.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + + dpct::sparse::sparse_matrix_desc_t a_descr_s; + dpct::sparse::sparse_matrix_desc_t a_descr_d; + dpct::sparse::sparse_matrix_desc_t a_descr_c; + dpct::sparse::sparse_matrix_desc_t a_descr_z; + a_descr_s = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + a_descr_d = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + a_descr_c = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + a_descr_z = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + std::shared_ptr b_descr_s; + std::shared_ptr b_descr_d; + std::shared_ptr b_descr_c; + std::shared_ptr b_descr_z; + b_descr_s = std::make_shared( + 3, b_s.d_data, dpct::library_data_t::real_float); + b_descr_d = std::make_shared( + 3, b_d.d_data, dpct::library_data_t::real_double); + b_descr_c = std::make_shared( + 3, b_c.d_data, dpct::library_data_t::complex_float); + b_descr_z = std::make_shared( + 3, b_z.d_data, dpct::library_data_t::complex_double); + + std::shared_ptr c_descr_s; + std::shared_ptr c_descr_d; + std::shared_ptr c_descr_c; + std::shared_ptr c_descr_z; + c_descr_s = std::make_shared( + 3, c_s.d_data, dpct::library_data_t::real_float); + c_descr_d = std::make_shared( + 3, c_d.d_data, dpct::library_data_t::real_double); + c_descr_c = std::make_shared( + 3, c_c.d_data, dpct::library_data_t::complex_float); + c_descr_z = std::make_shared( + 3, c_z.d_data, dpct::library_data_t::complex_double); + + oneapi::mkl::uplo uplo = oneapi::mkl::uplo::upper; + a_descr_s->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_d->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_c->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_z->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + oneapi::mkl::diag diag = oneapi::mkl::diag::unit; + a_descr_s->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_d->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_c->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_z->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + + int SpSVDescr_s; + int SpSVDescr_d; + int SpSVDescr_c; + int SpSVDescr_z; + /* + DPCT1026:2: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:3: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:4: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:5: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + + size_t ws_size_s = 0; + size_t ws_size_d = 0; + size_t ws_size_c = 0; + size_t ws_size_z = 0; + /* + DPCT1026:6: The call to cusparseSpSV_bufferSize was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:7: The call to cusparseSpSV_bufferSize was removed because this call + is redundant in SYCL. + */ + if (run_complex_datatype) { + /* + DPCT1026:12: The call to cusparseSpSV_bufferSize was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:13: The call to cusparseSpSV_bufferSize was removed because this + call is redundant in SYCL. + */ + } + + void *ws_s; + void *ws_d; + void *ws_c; + void *ws_z; + ws_s = dpct::dpct_malloc(ws_size_s); + ws_d = dpct::dpct_malloc(ws_size_d); + ws_c = dpct::dpct_malloc(ws_size_c); + ws_z = dpct::dpct_malloc(ws_size_z); + + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_s); + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_d); + if (run_complex_datatype) { + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_c); + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_z); + } + + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, c_descr_s, + dpct::library_data_t::real_float); + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, c_descr_d, + dpct::library_data_t::real_double); + if (run_complex_datatype) { + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, + alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, + dpct::library_data_t::complex_float); + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, + alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, + dpct::library_data_t::complex_double); + } + + c_s.D2H(); + c_d.D2H(); + c_c.D2H(); + c_z.D2H(); + + q_ct1.wait(); + + dpct::dpct_free(ws_s); + dpct::dpct_free(ws_d); + dpct::dpct_free(ws_c); + dpct::dpct_free(ws_z); + a_descr_s.reset(); + a_descr_d.reset(); + a_descr_c.reset(); + a_descr_z.reset(); + b_descr_s.reset(); + b_descr_d.reset(); + b_descr_c.reset(); + b_descr_z.reset(); + c_descr_s.reset(); + c_descr_d.reset(); + c_descr_c.reset(); + c_descr_z.reset(); + /* + DPCT1026:8: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:9: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:10: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:11: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + handle = nullptr; + + float expect_c[4] = {1, 2, 3}; + if (compare_result(expect_c, c_s.h_data, 3) && + compare_result(expect_c, c_d.h_data, 3) && + compare_result(expect_c, c_c.h_data, 3) && + compare_result(expect_c, c_z.h_data, 3)) + printf("SpSV pass\n"); + else { + printf("SpSV fail\n"); + test_passed = false; + } +} + int main() { test_cusparseSetGetStream(); test_cusparseTcsrmv_ge(); @@ -1449,6 +2087,8 @@ int main() { test_cusparseSpMM(); test_cusparseTcsrmv_mp(); test_cusparseCsrmvEx(); + test_cusparseSpGEMM(); + test_cusparseSpSV(); if (test_passed) return 0; diff --git a/help_function/src/sparse_utils_2_usm.cpp b/help_function/src/sparse_utils_2_usm.cpp index c90217533..c0f07c123 100644 --- a/help_function/src/sparse_utils_2_usm.cpp +++ b/help_function/src/sparse_utils_2_usm.cpp @@ -1445,6 +1445,644 @@ void test_cusparseCsrmvEx() { } } +// A * B = C +// +// | 0 1 2 | | 1 0 0 0 | | 2 3 10 12 | +// | 0 0 3 | * | 2 3 0 0 | = | 0 0 15 18 | +// | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 | +void test_cusparseSpGEMM() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + std::vector a_val_vec = {1, 2, 3, 4}; + Data a_s_val(a_val_vec.data(), 4); + Data a_d_val(a_val_vec.data(), 4); + Data a_c_val(a_val_vec.data(), 4); + Data a_z_val(a_val_vec.data(), 4); + std::vector a_row_ptr_vec = {0, 2, 3, 4}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {1, 2, 2, 0}; + Data a_col_ind(a_col_ind_vec.data(), 4); + + std::vector b_val_vec = {1, 2, 3, 5, 6}; + Data b_s_val(b_val_vec.data(), 5); + Data b_d_val(b_val_vec.data(), 5); + Data b_c_val(b_val_vec.data(), 5); + Data b_z_val(b_val_vec.data(), 5); + std::vector b_row_ptr_vec = {0, 1, 3, 5}; + Data b_row_ptr(b_row_ptr_vec.data(), 4); + std::vector b_col_ind_vec = {0, 0, 1, 2, 3}; + Data b_col_ind(b_col_ind_vec.data(), 5); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + float beta = 0; + Data beta_s(&beta, 1); + Data beta_d(&beta, 1); + Data beta_c(&beta, 1); + Data beta_z(&beta, 1); + + sycl::queue *handle; + handle = &q_ct1; + + /* + DPCT1026:0: The call to cusparseSetPointerMode was removed because this call + is redundant in SYCL. + */ + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s_val.H2D(); + b_d_val.H2D(); + b_c_val.H2D(); + b_z_val.H2D(); + b_row_ptr.H2D(); + b_col_ind.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + beta_s.H2D(); + beta_d.H2D(); + beta_c.H2D(); + beta_z.H2D(); + + dpct::sparse::sparse_matrix_desc_t a_descr_s; + dpct::sparse::sparse_matrix_desc_t a_descr_d; + dpct::sparse::sparse_matrix_desc_t a_descr_c; + dpct::sparse::sparse_matrix_desc_t a_descr_z; + a_descr_s = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + a_descr_d = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + a_descr_c = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + a_descr_z = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + dpct::sparse::sparse_matrix_desc_t b_descr_s; + dpct::sparse::sparse_matrix_desc_t b_descr_d; + dpct::sparse::sparse_matrix_desc_t b_descr_c; + dpct::sparse::sparse_matrix_desc_t b_descr_z; + b_descr_s = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + b_descr_d = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + b_descr_c = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + b_descr_z = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + Data c_s_row_ptr(4); + Data c_d_row_ptr(4); + Data c_c_row_ptr(4); + Data c_z_row_ptr(4); + + dpct::sparse::sparse_matrix_desc_t c_descr_s; + dpct::sparse::sparse_matrix_desc_t c_descr_d; + dpct::sparse::sparse_matrix_desc_t c_descr_c; + dpct::sparse::sparse_matrix_desc_t c_descr_z; + c_descr_s = std::make_shared( + 3, 4, 0, c_s_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + c_descr_d = std::make_shared( + 3, 4, 0, c_d_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + c_descr_c = std::make_shared( + 3, 4, 0, c_c_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + c_descr_z = std::make_shared( + 3, 4, 0, c_z_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_s; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_d; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_c; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_z; + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_s); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_d); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_c); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_z); + + size_t ws_1_size_s = 0; + size_t ws_1_size_d = 0; + size_t ws_1_size_c = 0; + size_t ws_1_size_z = 0; + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, NULL); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, NULL); + if (run_complex_datatype) { + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, NULL); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, NULL); + } + + void *ws_1_s; + void *ws_1_d; + void *ws_1_c; + void *ws_1_z; + ws_1_s = (void *)sycl::malloc_device(ws_1_size_s, q_ct1); + ws_1_d = (void *)sycl::malloc_device(ws_1_size_d, q_ct1); + ws_1_c = (void *)sycl::malloc_device(ws_1_size_c, q_ct1); + ws_1_z = (void *)sycl::malloc_device(ws_1_size_z, q_ct1); + + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, ws_1_s); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, ws_1_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, ws_1_c); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, ws_1_z); + } + + size_t ws_2_size_s = 0; + size_t ws_2_size_d = 0; + size_t ws_2_size_c = 0; + size_t ws_2_size_z = 0; + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, + SpGEMMDescr_s, &ws_2_size_s, NULL); + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, + SpGEMMDescr_d, &ws_2_size_d, NULL); + if (run_complex_datatype) { + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, NULL); + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, NULL); + } + + void *ws_2_s; + void *ws_2_d; + void *ws_2_c; + void *ws_2_z; + ws_2_s = (void *)sycl::malloc_device(ws_2_size_s, q_ct1); + ws_2_d = (void *)sycl::malloc_device(ws_2_size_d, q_ct1); + ws_2_c = (void *)sycl::malloc_device(ws_2_size_c, q_ct1); + ws_2_z = (void *)sycl::malloc_device(ws_2_size_z, q_ct1); + + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, + SpGEMMDescr_s, &ws_2_size_s, ws_2_s); + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, + SpGEMMDescr_d, &ws_2_size_d, ws_2_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, ws_2_c); + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, ws_2_z); + } + + int64_t c_row_s; + int64_t c_row_d; + int64_t c_row_c; + int64_t c_row_z; + int64_t c_col_s; + int64_t c_col_d; + int64_t c_col_c; + int64_t c_col_z; + int64_t c_nnz_s; + int64_t c_nnz_d; + int64_t c_nnz_c; + int64_t c_nnz_z; + c_descr_s->get_size(&c_row_s, &c_col_s, &c_nnz_s); + c_descr_d->get_size(&c_row_d, &c_col_d, &c_nnz_d); + c_descr_c->get_size(&c_row_c, &c_col_c, &c_nnz_c); + c_descr_z->get_size(&c_row_z, &c_col_z, &c_nnz_z); + + Data c_s_val(c_nnz_s); + Data c_d_val(c_nnz_d); + Data c_c_val(c_nnz_c); + Data c_z_val(c_nnz_z); + Data c_s_col_ind(c_nnz_s); + Data c_d_col_ind(c_nnz_d); + Data c_c_col_ind(c_nnz_c); + Data c_z_col_ind(c_nnz_z); + + c_descr_s->set_pointers(c_s_row_ptr.d_data, c_s_col_ind.d_data, + c_s_val.d_data); + c_descr_d->set_pointers(c_d_row_ptr.d_data, c_d_col_ind.d_data, + c_d_val.d_data); + c_descr_c->set_pointers(c_c_row_ptr.d_data, c_c_col_ind.d_data, + c_c_val.d_data); + c_descr_z->set_pointers(c_z_row_ptr.d_data, c_z_col_ind.d_data, + c_z_val.d_data); + + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s); + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c); + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z); + } + + q_ct1.wait(); + + sycl::free(ws_1_s, q_ct1); + sycl::free(ws_1_d, q_ct1); + sycl::free(ws_1_c, q_ct1); + sycl::free(ws_1_z, q_ct1); + sycl::free(ws_2_s, q_ct1); + sycl::free(ws_2_d, q_ct1); + sycl::free(ws_2_c, q_ct1); + sycl::free(ws_2_z, q_ct1); + a_descr_s.reset(); + a_descr_d.reset(); + a_descr_c.reset(); + a_descr_z.reset(); + b_descr_s.reset(); + b_descr_d.reset(); + b_descr_c.reset(); + b_descr_z.reset(); + c_descr_s.reset(); + c_descr_d.reset(); + c_descr_c.reset(); + c_descr_z.reset(); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_s); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_d); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_c); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_z); + handle = nullptr; + + c_s_val.D2H(); + c_d_val.D2H(); + c_c_val.D2H(); + c_z_val.D2H(); + c_s_row_ptr.D2H(); + c_d_row_ptr.D2H(); + c_c_row_ptr.D2H(); + c_z_row_ptr.D2H(); + c_s_col_ind.D2H(); + c_d_col_ind.D2H(); + c_c_col_ind.D2H(); + c_z_col_ind.D2H(); + + float expect_c_val[7] = {2.000000, 3.000000, 10.000000, 12.000000, 15.000000, 18.000000, 4.000000}; + float expect_c_row_ptr[4] = {0.000000, 4.000000, 6.000000, 7.000000}; + float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000}; + if (compare_result(expect_c_val, c_s_val.h_data, 7) && + compare_result(expect_c_val, c_d_val.h_data, 7) && + compare_result(expect_c_val, c_c_val.h_data, 7) && + compare_result(expect_c_val, c_z_val.h_data, 7) && + compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) && + compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7) + ) + printf("SpGEMM pass\n"); + else { + printf("SpGEMM fail\n"); + test_passed = false; + } +} + +// A * C = B +// +// | 1 1 2 | | 1 | | 9 | +// | 0 1 3 | * | 2 | = | 11 | +// | 0 0 1 | | 3 | | 3 | +void test_cusparseSpSV() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.default_queue(); + std::vector a_val_vec = {1, 1, 2, 1, 3, 1}; + Data a_s_val(a_val_vec.data(), 6); + Data a_d_val(a_val_vec.data(), 6); + Data a_c_val(a_val_vec.data(), 6); + Data a_z_val(a_val_vec.data(), 6); + std::vector a_row_ptr_vec = {0, 3, 5, 6}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {0, 1, 2, 1, 2, 3}; + Data a_col_ind(a_col_ind_vec.data(), 6); + + std::vector b_vec = {9, 11, 3}; + Data b_s(b_vec.data(), 3); + Data b_d(b_vec.data(), 3); + Data b_c(b_vec.data(), 3); + Data b_z(b_vec.data(), 3); + + Data c_s(3); + Data c_d(3); + Data c_c(3); + Data c_z(3); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + sycl::queue *handle; + handle = &q_ct1; + + /* + DPCT1026:1: The call to cusparseSetPointerMode was removed because this call + is redundant in SYCL. + */ + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s.H2D(); + b_d.H2D(); + b_c.H2D(); + b_z.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + + dpct::sparse::sparse_matrix_desc_t a_descr_s; + dpct::sparse::sparse_matrix_desc_t a_descr_d; + dpct::sparse::sparse_matrix_desc_t a_descr_c; + dpct::sparse::sparse_matrix_desc_t a_descr_z; + a_descr_s = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + a_descr_d = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + a_descr_c = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + a_descr_z = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + std::shared_ptr b_descr_s; + std::shared_ptr b_descr_d; + std::shared_ptr b_descr_c; + std::shared_ptr b_descr_z; + b_descr_s = std::make_shared( + 3, b_s.d_data, dpct::library_data_t::real_float); + b_descr_d = std::make_shared( + 3, b_d.d_data, dpct::library_data_t::real_double); + b_descr_c = std::make_shared( + 3, b_c.d_data, dpct::library_data_t::complex_float); + b_descr_z = std::make_shared( + 3, b_z.d_data, dpct::library_data_t::complex_double); + + std::shared_ptr c_descr_s; + std::shared_ptr c_descr_d; + std::shared_ptr c_descr_c; + std::shared_ptr c_descr_z; + c_descr_s = std::make_shared( + 3, c_s.d_data, dpct::library_data_t::real_float); + c_descr_d = std::make_shared( + 3, c_d.d_data, dpct::library_data_t::real_double); + c_descr_c = std::make_shared( + 3, c_c.d_data, dpct::library_data_t::complex_float); + c_descr_z = std::make_shared( + 3, c_z.d_data, dpct::library_data_t::complex_double); + + oneapi::mkl::uplo uplo = oneapi::mkl::uplo::upper; + a_descr_s->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_d->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_c->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_z->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + oneapi::mkl::diag diag = oneapi::mkl::diag::unit; + a_descr_s->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_d->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_c->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_z->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + + int SpSVDescr_s; + int SpSVDescr_d; + int SpSVDescr_c; + int SpSVDescr_z; + /* + DPCT1026:2: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:3: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:4: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:5: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + + size_t ws_size_s = 0; + size_t ws_size_d = 0; + size_t ws_size_c = 0; + size_t ws_size_z = 0; + /* + DPCT1026:6: The call to cusparseSpSV_bufferSize was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:7: The call to cusparseSpSV_bufferSize was removed because this call + is redundant in SYCL. + */ + if (run_complex_datatype) { + /* + DPCT1026:12: The call to cusparseSpSV_bufferSize was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:13: The call to cusparseSpSV_bufferSize was removed because this + call is redundant in SYCL. + */ + } + + void *ws_s; + void *ws_d; + void *ws_c; + void *ws_z; + ws_s = (void *)sycl::malloc_device(ws_size_s, q_ct1); + ws_d = (void *)sycl::malloc_device(ws_size_d, q_ct1); + ws_c = (void *)sycl::malloc_device(ws_size_c, q_ct1); + ws_z = (void *)sycl::malloc_device(ws_size_z, q_ct1); + + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_s); + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_d); + if (run_complex_datatype) { + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_c); + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_z); + } + + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, c_descr_s, + dpct::library_data_t::real_float); + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, c_descr_d, + dpct::library_data_t::real_double); + if (run_complex_datatype) { + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, + alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, + dpct::library_data_t::complex_float); + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, + alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, + dpct::library_data_t::complex_double); + } + + c_s.D2H(); + c_d.D2H(); + c_c.D2H(); + c_z.D2H(); + + q_ct1.wait(); + + sycl::free(ws_s, q_ct1); + sycl::free(ws_d, q_ct1); + sycl::free(ws_c, q_ct1); + sycl::free(ws_z, q_ct1); + a_descr_s.reset(); + a_descr_d.reset(); + a_descr_c.reset(); + a_descr_z.reset(); + b_descr_s.reset(); + b_descr_d.reset(); + b_descr_c.reset(); + b_descr_z.reset(); + c_descr_s.reset(); + c_descr_d.reset(); + c_descr_c.reset(); + c_descr_z.reset(); + /* + DPCT1026:8: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:9: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:10: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:11: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + handle = nullptr; + + float expect_c[4] = {1, 2, 3}; + if (compare_result(expect_c, c_s.h_data, 3) && + compare_result(expect_c, c_d.h_data, 3) && + compare_result(expect_c, c_c.h_data, 3) && + compare_result(expect_c, c_z.h_data, 3)) + printf("SpSV pass\n"); + else { + printf("SpSV fail\n"); + test_passed = false; + } +} + int main() { test_cusparseSetGetStream(); test_cusparseTcsrmv_ge(); @@ -1456,6 +2094,8 @@ int main() { test_cusparseSpMM(); test_cusparseTcsrmv_mp(); test_cusparseCsrmvEx(); + test_cusparseSpGEMM(); + test_cusparseSpSV(); if (test_passed) return 0;