From 1dbfdc96639202dc10027fec188d7383f62128fa Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Fri, 25 Aug 2023 10:18:19 +0800 Subject: [PATCH 1/7] Add test for cusparseSpGEMM Signed-off-by: Jiang, Zhiwei --- features/feature_case/cusparse/cusparse_4.cu | 377 +++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 features/feature_case/cusparse/cusparse_4.cu diff --git a/features/feature_case/cusparse/cusparse_4.cu b/features/feature_case/cusparse/cusparse_4.cu new file mode 100644 index 000000000..52922d46b --- /dev/null +++ b/features/feature_case/cusparse/cusparse_4.cu @@ -0,0 +1,377 @@ +// ===------- cusparse_4.cu -------------------------------- *- CUDA -* ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===----------------------------------------------------------------------===// + +#include "cusparse.h" + +#include +#include +#include +#include + +template +struct Data { + float *h_data; + d_data_t *d_data; + int element_num; + Data(int element_num) : element_num(element_num) { + h_data = (float *)malloc(sizeof(float) * element_num); + memset(h_data, 0, sizeof(float) * element_num); + cudaMalloc(&d_data, sizeof(d_data_t) * element_num); + cudaMemset(d_data, 0, sizeof(d_data_t) * element_num); + } + Data(float *input_data, int element_num) : element_num(element_num) { + h_data = (float *)malloc(sizeof(float) * element_num); + cudaMalloc(&d_data, sizeof(d_data_t) * element_num); + cudaMemset(d_data, 0, sizeof(d_data_t) * element_num); + memcpy(h_data, input_data, sizeof(float) * element_num); + } + ~Data() { + free(h_data); + cudaFree(d_data); + } + void H2D() { + d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num); + memset(h_temp, 0, sizeof(d_data_t) * element_num); + from_float_convert(h_data, h_temp); + cudaMemcpy(d_data, h_temp, sizeof(d_data_t) * element_num, + cudaMemcpyHostToDevice); + free(h_temp); + } + void D2H() { + d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num); + memset(h_temp, 0, sizeof(d_data_t) * element_num); + cudaMemcpy(h_temp, d_data, sizeof(d_data_t) * element_num, + cudaMemcpyDeviceToHost); + to_float_convert(h_temp, h_data); + free(h_temp); + } + +private: + inline void from_float_convert(float *in, d_data_t *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i]; + } + inline void to_float_convert(d_data_t *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i]; + } +}; +template <> +inline void Data::from_float_convert(float *in, float2 *out) { + for (int i = 0; i < element_num; i++) + out[i].x = in[i]; +} +template <> +inline void Data::from_float_convert(float *in, double2 *out) { + for (int i = 0; i < element_num; i++) + out[i].x = in[i]; +} + +template <> +inline void Data::to_float_convert(float2 *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i].x; +} +template <> +inline void Data::to_float_convert(double2 *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i].x; +} + +bool compare_result(float *expect, float *result, int element_num) { + for (int i = 0; i < element_num; i++) { + if (std::abs(result[i] - expect[i]) >= 0.05) { + return false; + } + } + return true; +} + +bool compare_result(float *expect, float *result, std::vector indices) { + for (int i = 0; i < indices.size(); i++) { + if (std::abs(result[indices[i]] - expect[indices[i]]) >= 0.05) { + return false; + } + } + return true; +} + +bool test_passed = true; + +const bool run_complex_datatype = false; + +// A * B = C +// +// | 0 1 2 | | 1 0 0 0 | | 2 3 10 12 | +// | 0 0 3 | * | 2 3 0 0 | = | 0 0 15 18 | +// | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 | +void test_cusparseSpGEMM() { + std::vector a_val_vec = {1, 2, 3, 4}; + Data a_s_val(a_val_vec.data(), 4); + Data a_d_val(a_val_vec.data(), 4); + Data a_c_val(a_val_vec.data(), 4); + Data a_z_val(a_val_vec.data(), 4); + std::vector a_row_ptr_vec = {0, 2, 3, 4}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {1, 2, 2, 0}; + Data a_col_ind(a_col_ind_vec.data(), 4); + + std::vector b_val_vec = {1, 2, 3, 5, 6}; + Data b_s_val(b_val_vec.data(), 5); + Data b_d_val(b_val_vec.data(), 5); + Data b_c_val(b_val_vec.data(), 5); + Data b_z_val(b_val_vec.data(), 5); + std::vector b_row_ptr_vec = {0, 1, 3, 5}; + Data b_row_ptr(b_row_ptr_vec.data(), 4); + std::vector b_col_ind_vec = {0, 0, 1, 2, 3}; + Data b_col_ind(b_col_ind_vec.data(), 5); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + float beta = 0; + Data beta_s(&beta, 1); + Data beta_d(&beta, 1); + Data beta_c(&beta, 1); + Data beta_z(&beta, 1); + + cusparseHandle_t handle; + cusparseCreate(&handle); + + cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_DEVICE); + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s_val.H2D(); + b_d_val.H2D(); + b_c_val.H2D(); + b_z_val.H2D(); + b_row_ptr.H2D(); + b_col_ind.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + beta_s.H2D(); + beta_d.H2D(); + beta_c.H2D(); + beta_z.H2D(); + + cusparseSpMatDescr_t a_descr_s; + cusparseSpMatDescr_t a_descr_d; + cusparseSpMatDescr_t a_descr_c; + cusparseSpMatDescr_t a_descr_z; + cusparseCreateCsr(&a_descr_s, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&a_descr_d, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&a_descr_c, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&a_descr_z, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + + cusparseSpMatDescr_t b_descr_s; + cusparseSpMatDescr_t b_descr_d; + cusparseSpMatDescr_t b_descr_c; + cusparseSpMatDescr_t b_descr_z; + cusparseCreateCsr(&b_descr_s, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&b_descr_d, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&b_descr_c, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&b_descr_z, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + + cusparseSpMatDescr_t c_descr_s; + cusparseSpMatDescr_t c_descr_d; + cusparseSpMatDescr_t c_descr_c; + cusparseSpMatDescr_t c_descr_z; + cusparseCreateCsr(&c_descr_s, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&c_descr_d, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&c_descr_c, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&c_descr_z, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + + cusparseSpGEMMDescr_t SpGEMMDescr_s; + cusparseSpGEMMDescr_t SpGEMMDescr_d; + cusparseSpGEMMDescr_t SpGEMMDescr_c; + cusparseSpGEMMDescr_t SpGEMMDescr_z; + cusparseSpGEMM_createDescr(&SpGEMMDescr_s); + cusparseSpGEMM_createDescr(&SpGEMMDescr_d); + cusparseSpGEMM_createDescr(&SpGEMMDescr_c); + cusparseSpGEMM_createDescr(&SpGEMMDescr_z); + + size_t ws_1_size_s = 0; + size_t ws_1_size_d = 0; + size_t ws_1_size_c = 0; + size_t ws_1_size_z = 0; + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_1_size_s, NULL); + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_1_size_d, NULL); + if (run_complex_datatype) { + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_1_size_c, NULL); + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_1_size_z, NULL); + } + + void *ws_1_s; + void *ws_1_d; + void *ws_1_c; + void *ws_1_z; + cudaMalloc(&ws_1_s, ws_1_size_s); + cudaMalloc(&ws_1_d, ws_1_size_d); + cudaMalloc(&ws_1_c, ws_1_size_c); + cudaMalloc(&ws_1_z, ws_1_size_z); + + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_1_size_s, ws_1_s); + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_1_size_d, ws_1_d); + if (run_complex_datatype) { + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_1_size_c, ws_1_c); + cusparseSpGEMM_workEstimation(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_1_size_z, ws_1_z); + } + + size_t ws_2_size_s = 0; + size_t ws_2_size_d = 0; + size_t ws_2_size_c = 0; + size_t ws_2_size_z = 0; + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_2_size_s, NULL); + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_2_size_d, NULL); + if (run_complex_datatype) { + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_2_size_c, NULL); + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_2_size_z, NULL); + } + + void *ws_2_s; + void *ws_2_d; + void *ws_2_c; + void *ws_2_z; + cudaMalloc(&ws_2_s, ws_2_size_s); + cudaMalloc(&ws_2_d, ws_2_size_d); + cudaMalloc(&ws_2_c, ws_2_size_c); + cudaMalloc(&ws_2_z, ws_2_size_z); + + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s, &ws_2_size_s, ws_2_s); + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d, &ws_2_size_d, ws_2_d); + if (run_complex_datatype) { + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c, &ws_2_size_c, ws_2_c); + cusparseSpGEMM_compute(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z, &ws_2_size_z, ws_2_z); + } + + int64_t c_row_s; + int64_t c_row_d; + int64_t c_row_c; + int64_t c_row_z; + int64_t c_col_s; + int64_t c_col_d; + int64_t c_col_c; + int64_t c_col_z; + int64_t c_nnz_s; + int64_t c_nnz_d; + int64_t c_nnz_c; + int64_t c_nnz_z; + cusparseSpMatGetSize(c_descr_s, &c_row_s, &c_col_s, &c_nnz_s); + cusparseSpMatGetSize(c_descr_d, &c_row_d, &c_col_d, &c_nnz_d); + cusparseSpMatGetSize(c_descr_c, &c_row_c, &c_col_c, &c_nnz_c); + cusparseSpMatGetSize(c_descr_z, &c_row_z, &c_col_z, &c_nnz_z); + + Data c_s_val(c_nnz_s); + Data c_d_val(c_nnz_d); + Data c_c_val(c_nnz_c); + Data c_z_val(c_nnz_z); + Data c_s_row_ptr(4); + Data c_d_row_ptr(4); + Data c_c_row_ptr(4); + Data c_z_row_ptr(4); + Data c_s_col_ind(c_nnz_s); + Data c_d_col_ind(c_nnz_d); + Data c_c_col_ind(c_nnz_c); + Data c_z_col_ind(c_nnz_z); + + cusparseCsrSetPointers(c_descr_s, c_s_row_ptr.d_data, c_s_col_ind.d_data, c_s_val.d_data); + cusparseCsrSetPointers(c_descr_d, c_d_row_ptr.d_data, c_d_col_ind.d_data, c_d_val.d_data); + cusparseCsrSetPointers(c_descr_c, c_c_row_ptr.d_data, c_c_col_ind.d_data, c_c_val.d_data); + cusparseCsrSetPointers(c_descr_z, c_z_row_ptr.d_data, c_z_col_ind.d_data, c_z_val.d_data); + + cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, CUDA_R_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_s); + cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, CUDA_R_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_d); + if (run_complex_datatype) { + cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, beta_c.d_data, c_descr_c, CUDA_C_32F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_c); + cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, beta_z.d_data, c_descr_z, CUDA_C_64F, CUSPARSE_SPGEMM_DEFAULT, SpGEMMDescr_z); + } + + cudaStreamSynchronize(0); + + cudaFree(ws_1_s); + cudaFree(ws_1_d); + cudaFree(ws_1_c); + cudaFree(ws_1_z); + cudaFree(ws_2_s); + cudaFree(ws_2_d); + cudaFree(ws_2_c); + cudaFree(ws_2_z); + cusparseDestroySpMat(a_descr_s); + cusparseDestroySpMat(a_descr_d); + cusparseDestroySpMat(a_descr_c); + cusparseDestroySpMat(a_descr_z); + cusparseDestroySpMat(b_descr_s); + cusparseDestroySpMat(b_descr_d); + cusparseDestroySpMat(b_descr_c); + cusparseDestroySpMat(b_descr_z); + cusparseDestroySpMat(c_descr_s); + cusparseDestroySpMat(c_descr_d); + cusparseDestroySpMat(c_descr_c); + cusparseDestroySpMat(c_descr_z); + cusparseSpGEMM_destroyDescr(SpGEMMDescr_s); + cusparseSpGEMM_destroyDescr(SpGEMMDescr_d); + cusparseSpGEMM_destroyDescr(SpGEMMDescr_c); + cusparseSpGEMM_destroyDescr(SpGEMMDescr_z); + cusparseDestroy(handle); + + c_s_val.D2H(); + c_d_val.D2H(); + c_c_val.D2H(); + c_z_val.D2H(); + c_s_row_ptr.D2H(); + c_d_row_ptr.D2H(); + c_c_row_ptr.D2H(); + c_z_row_ptr.D2H(); + c_s_col_ind.D2H(); + c_d_col_ind.D2H(); + c_c_col_ind.D2H(); + c_z_col_ind.D2H(); + + float expect_c_val[7] = {2.000000, 3.000000, 10.000000, 12.000000, 15.000000, 18.000000, 4.000000}; + float expect_c_row_ptr[4] = {0.000000, 4.000000, 6.000000, 7.000000}; + float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000}; + if (compare_result(expect_c_val, c_s_val.h_data, 7) && + compare_result(expect_c_val, c_d_val.h_data, 7) && + /*compare_result(expect_c_val, c_c_val.h_data, 7) && + compare_result(expect_c_val, c_z_val.h_data, 7) &&*/ + compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) && + /*compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) &&*/ + compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) /*&& + compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7)*/ + ) + printf("SpGEMM pass\n"); + else { + printf("SpGEMM fail\n"); + test_passed = false; + } +} + +int main() { + // Re-enable below two tests until MKL issue fixed +#ifndef DPCT_USM_LEVEL_NONE + test_cusparseSpGEMM(); +#endif + + if (test_passed) + return 0; + return -1; +} From 2a699678776bb75b2aab05c81ed7cbac7df76cc2 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Fri, 25 Aug 2023 13:40:09 +0800 Subject: [PATCH 2/7] Add test for SpSV Signed-off-by: Jiang, Zhiwei --- features/feature_case/cusparse/cusparse_5.cu | 293 +++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 features/feature_case/cusparse/cusparse_5.cu diff --git a/features/feature_case/cusparse/cusparse_5.cu b/features/feature_case/cusparse/cusparse_5.cu new file mode 100644 index 000000000..d55192505 --- /dev/null +++ b/features/feature_case/cusparse/cusparse_5.cu @@ -0,0 +1,293 @@ +// ===------- cusparse_3.cu -------------------------------- *- CUDA -* ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===----------------------------------------------------------------------===// + +#include "cusparse.h" + +#include +#include +#include +#include + +template +struct Data { + float *h_data; + d_data_t *d_data; + int element_num; + Data(int element_num) : element_num(element_num) { + h_data = (float *)malloc(sizeof(float) * element_num); + memset(h_data, 0, sizeof(float) * element_num); + cudaMalloc(&d_data, sizeof(d_data_t) * element_num); + cudaMemset(d_data, 0, sizeof(d_data_t) * element_num); + } + Data(float *input_data, int element_num) : element_num(element_num) { + h_data = (float *)malloc(sizeof(float) * element_num); + cudaMalloc(&d_data, sizeof(d_data_t) * element_num); + cudaMemset(d_data, 0, sizeof(d_data_t) * element_num); + memcpy(h_data, input_data, sizeof(float) * element_num); + } + ~Data() { + free(h_data); + cudaFree(d_data); + } + void H2D() { + d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num); + memset(h_temp, 0, sizeof(d_data_t) * element_num); + from_float_convert(h_data, h_temp); + cudaMemcpy(d_data, h_temp, sizeof(d_data_t) * element_num, + cudaMemcpyHostToDevice); + free(h_temp); + } + void D2H() { + d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num); + memset(h_temp, 0, sizeof(d_data_t) * element_num); + cudaMemcpy(h_temp, d_data, sizeof(d_data_t) * element_num, + cudaMemcpyDeviceToHost); + to_float_convert(h_temp, h_data); + free(h_temp); + } + +private: + inline void from_float_convert(float *in, d_data_t *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i]; + } + inline void to_float_convert(d_data_t *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i]; + } +}; +template <> +inline void Data::from_float_convert(float *in, float2 *out) { + for (int i = 0; i < element_num; i++) + out[i].x = in[i]; +} +template <> +inline void Data::from_float_convert(float *in, double2 *out) { + for (int i = 0; i < element_num; i++) + out[i].x = in[i]; +} + +template <> +inline void Data::to_float_convert(float2 *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i].x; +} +template <> +inline void Data::to_float_convert(double2 *in, float *out) { + for (int i = 0; i < element_num; i++) + out[i] = in[i].x; +} + +bool compare_result(float *expect, float *result, int element_num) { + for (int i = 0; i < element_num; i++) { + if (std::abs(result[i] - expect[i]) >= 0.05) { + return false; + } + } + return true; +} + +bool compare_result(float *expect, float *result, std::vector indices) { + for (int i = 0; i < indices.size(); i++) { + if (std::abs(result[indices[i]] - expect[indices[i]]) >= 0.05) { + return false; + } + } + return true; +} + +bool test_passed = true; + +const bool run_complex_datatype = false; + +// A * C = B +// +// | 1 1 2 | | 1 | | 9 | +// | 0 1 3 | * | 2 | = | 11 | +// | 0 0 1 | | 3 | | 3 | + +void test_cusparseSpSV() { + std::vector a_val_vec = {1, 1, 2, 1, 3, 1}; + Data a_s_val(a_val_vec.data(), 6); + Data a_d_val(a_val_vec.data(), 6); + Data a_c_val(a_val_vec.data(), 6); + Data a_z_val(a_val_vec.data(), 6); + std::vector a_row_ptr_vec = {0, 3, 5, 6}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {0, 1, 2, 1, 2, 3}; + Data a_col_ind(a_col_ind_vec.data(), 6); + + std::vector b_vec = {9, 11, 3}; + Data b_s(b_vec.data(), 3); + Data b_d(b_vec.data(), 3); + Data b_c(b_vec.data(), 3); + Data b_z(b_vec.data(), 3); + + Data c_s(3); + Data c_d(3); + Data c_c(3); + Data c_z(3); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + cusparseHandle_t handle; + cusparseCreate(&handle); + + cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_DEVICE); + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s.H2D(); + b_d.H2D(); + b_c.H2D(); + b_z.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + + cusparseSpMatDescr_t a_descr_s; + cusparseSpMatDescr_t a_descr_d; + cusparseSpMatDescr_t a_descr_c; + cusparseSpMatDescr_t a_descr_z; + cusparseCreateCsr(&a_descr_s, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&a_descr_d, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&a_descr_c, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&a_descr_z, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + + cusparseDnVecDescr_t b_descr_s; + cusparseDnVecDescr_t b_descr_d; + cusparseDnVecDescr_t b_descr_c; + cusparseDnVecDescr_t b_descr_z; + cusparseCreateDnVec(&b_descr_s, 3, b_s.d_data, CUDA_R_32F); + cusparseCreateDnVec(&b_descr_d, 3, b_d.d_data, CUDA_R_64F); + cusparseCreateDnVec(&b_descr_c, 3, b_c.d_data, CUDA_C_32F); + cusparseCreateDnVec(&b_descr_z, 3, b_z.d_data, CUDA_C_64F); + + cusparseDnVecDescr_t c_descr_s; + cusparseDnVecDescr_t c_descr_d; + cusparseDnVecDescr_t c_descr_c; + cusparseDnVecDescr_t c_descr_z; + cusparseCreateDnVec(&c_descr_s, 3, c_s.d_data, CUDA_R_32F); + cusparseCreateDnVec(&c_descr_d, 3, c_d.d_data, CUDA_R_64F); + cusparseCreateDnVec(&c_descr_c, 3, c_c.d_data, CUDA_C_32F); + cusparseCreateDnVec(&c_descr_z, 3, c_z.d_data, CUDA_C_64F); + + cusparseFillMode_t uplo = CUSPARSE_FILL_MODE_UPPER; + cusparseSpMatSetAttribute(a_descr_s, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo)); + cusparseSpMatSetAttribute(a_descr_d, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo)); + cusparseSpMatSetAttribute(a_descr_c, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo)); + cusparseSpMatSetAttribute(a_descr_z, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo)); + cusparseDiagType_t diag = CUSPARSE_DIAG_TYPE_UNIT; + cusparseSpMatSetAttribute(a_descr_s, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag)); + cusparseSpMatSetAttribute(a_descr_d, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag)); + cusparseSpMatSetAttribute(a_descr_c, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag)); + cusparseSpMatSetAttribute(a_descr_z, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag)); + + cusparseSpSVDescr_t SpSVDescr_s; + cusparseSpSVDescr_t SpSVDescr_d; + cusparseSpSVDescr_t SpSVDescr_c; + cusparseSpSVDescr_t SpSVDescr_z; + cusparseSpSV_createDescr(&SpSVDescr_s); + cusparseSpSV_createDescr(&SpSVDescr_d); + cusparseSpSV_createDescr(&SpSVDescr_c); + cusparseSpSV_createDescr(&SpSVDescr_z); + + size_t ws_size_s = 0; + size_t ws_size_d = 0; + size_t ws_size_c = 0; + size_t ws_size_z = 0; + cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s, &ws_size_s); + cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d, &ws_size_d); + if (run_complex_datatype) { + cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c, &ws_size_c); + cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z, &ws_size_z); + } + + void *ws_s; + void *ws_d; + void *ws_c; + void *ws_z; + cudaMalloc(&ws_s, ws_size_s); + cudaMalloc(&ws_d, ws_size_d); + cudaMalloc(&ws_c, ws_size_c); + cudaMalloc(&ws_z, ws_size_z); + + cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s, &ws_size_s); + cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d, &ws_size_d); + if (run_complex_datatype) { + cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c, &ws_size_c); + cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z, &ws_size_z); + } + + cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s); + cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d); + if (run_complex_datatype) { + cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c); + cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z); + } + + c_s.D2H(); + c_d.D2H(); + c_c.D2H(); + c_z.D2H(); + + cudaStreamSynchronize(0); + + cudaFree(ws_s); + cudaFree(ws_d); + cudaFree(ws_c); + cudaFree(ws_z); + cusparseDestroySpMat(a_descr_s); + cusparseDestroySpMat(a_descr_d); + cusparseDestroySpMat(a_descr_c); + cusparseDestroySpMat(a_descr_z); + cusparseDestroyDnVec(b_descr_s); + cusparseDestroyDnVec(b_descr_d); + cusparseDestroyDnVec(b_descr_c); + cusparseDestroyDnVec(b_descr_z); + cusparseDestroyDnVec(c_descr_s); + cusparseDestroyDnVec(c_descr_d); + cusparseDestroyDnVec(c_descr_c); + cusparseDestroyDnVec(c_descr_z); + cusparseSpSV_destroyDescr(SpSVDescr_s); + cusparseSpSV_destroyDescr(SpSVDescr_d); + cusparseSpSV_destroyDescr(SpSVDescr_c); + cusparseSpSV_destroyDescr(SpSVDescr_z); + cusparseDestroy(handle); + + float expect_c[4] = {1, 2, 3}; + if (compare_result(expect_c, c_s.h_data, 3) && + compare_result(expect_c, c_d.h_data, 3)/*&& + compare_result(expect_c, c_c.h_data, 3) && + compare_result(expect_c, c_z.h_data, 3)*/) + printf("SpSV pass\n"); + else { + printf("SpSV fail\n"); + test_passed = false; + } +} + +int main() { + // Re-enable below two tests until MKL issue fixed +#ifndef DPCT_USM_LEVEL_NONE + test_cusparseSpSV(); +#endif + + if (test_passed) + return 0; + return -1; +} From 6d7be31b624a0da627553080d437ebe6fdf3d941 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Mon, 28 Aug 2023 08:48:55 +0800 Subject: [PATCH 3/7] Add more tests Signed-off-by: Jiang, Zhiwei --- ...ATE_cusparse_after_112_skip_double_usm.xml | 14 + features/feature_case/cusparse/cusparse_4.cu | 2 +- features/feature_case/cusparse/cusparse_5.cu | 3 +- features/features.xml | 2 + features/test_feature.py | 2 +- help_function/src/sparse_utils_2_buffer.cpp | 635 ++++++++++++++++++ help_function/src/sparse_utils_2_usm.cpp | 635 ++++++++++++++++++ 7 files changed, 1289 insertions(+), 4 deletions(-) create mode 100644 features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml diff --git a/features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml b/features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml new file mode 100644 index 000000000..b9cc01791 --- /dev/null +++ b/features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml @@ -0,0 +1,14 @@ + + + + test + + + + + + + + + + diff --git a/features/feature_case/cusparse/cusparse_4.cu b/features/feature_case/cusparse/cusparse_4.cu index 52922d46b..9ccc90564 100644 --- a/features/feature_case/cusparse/cusparse_4.cu +++ b/features/feature_case/cusparse/cusparse_4.cu @@ -366,7 +366,7 @@ void test_cusparseSpGEMM() { } int main() { - // Re-enable below two tests until MKL issue fixed + // Re-enable below test until MKL issue fixed #ifndef DPCT_USM_LEVEL_NONE test_cusparseSpGEMM(); #endif diff --git a/features/feature_case/cusparse/cusparse_5.cu b/features/feature_case/cusparse/cusparse_5.cu index d55192505..c6fac7be1 100644 --- a/features/feature_case/cusparse/cusparse_5.cu +++ b/features/feature_case/cusparse/cusparse_5.cu @@ -110,7 +110,6 @@ const bool run_complex_datatype = false; // | 1 1 2 | | 1 | | 9 | // | 0 1 3 | * | 2 | = | 11 | // | 0 0 1 | | 3 | | 3 | - void test_cusparseSpSV() { std::vector a_val_vec = {1, 1, 2, 1, 3, 1}; Data a_s_val(a_val_vec.data(), 6); @@ -282,7 +281,7 @@ void test_cusparseSpSV() { } int main() { - // Re-enable below two tests until MKL issue fixed + // Re-enable below test until MKL issue fixed #ifndef DPCT_USM_LEVEL_NONE test_cusparseSpSV(); #endif diff --git a/features/features.xml b/features/features.xml index 5a3641a52..432adf62f 100644 --- a/features/features.xml +++ b/features/features.xml @@ -247,6 +247,8 @@ + + diff --git a/features/test_feature.py b/features/test_feature.py index 4c65e54ea..d9525316a 100644 --- a/features/test_feature.py +++ b/features/test_feature.py @@ -38,7 +38,7 @@ 'math-bf16-conv', 'math-half-conv', 'math-bfloat16', 'libcu_atomic', 'test_shared_memory', 'cudnn-reduction', 'cudnn-binary', 'cudnn-bnp1', 'cudnn-bnp2', 'cudnn-bnp3', 'cudnn-normp1', 'cudnn-normp2', 'cudnn-normp3', 'cudnn-convp1', 'cudnn-convp2', 'cudnn-convp3', 'cudnn-convp4', 'cudnn-convp5', 'cudnn-convp6', - 'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3", + 'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3", "cusparse_4", "cusparse_5", 'cudnn-GetErrorString', 'cudnn-types', 'cudnn-version', 'cudnn-dropout', 'constant_attr', 'sync_warp_p2', 'occupancy_calculation', diff --git a/help_function/src/sparse_utils_2_buffer.cpp b/help_function/src/sparse_utils_2_buffer.cpp index 80d65c6ad..c5c8c4a60 100644 --- a/help_function/src/sparse_utils_2_buffer.cpp +++ b/help_function/src/sparse_utils_2_buffer.cpp @@ -1138,6 +1138,639 @@ void test_cusparseSpMM() { } } +// A * B = C +// +// | 0 1 2 | | 1 0 0 0 | | 2 3 10 12 | +// | 0 0 3 | * | 2 3 0 0 | = | 0 0 15 18 | +// | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 | +void test_cusparseSpGEMM() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.default_queue(); + std::vector a_val_vec = {1, 2, 3, 4}; + Data a_s_val(a_val_vec.data(), 4); + Data a_d_val(a_val_vec.data(), 4); + Data a_c_val(a_val_vec.data(), 4); + Data a_z_val(a_val_vec.data(), 4); + std::vector a_row_ptr_vec = {0, 2, 3, 4}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {1, 2, 2, 0}; + Data a_col_ind(a_col_ind_vec.data(), 4); + + std::vector b_val_vec = {1, 2, 3, 5, 6}; + Data b_s_val(b_val_vec.data(), 5); + Data b_d_val(b_val_vec.data(), 5); + Data b_c_val(b_val_vec.data(), 5); + Data b_z_val(b_val_vec.data(), 5); + std::vector b_row_ptr_vec = {0, 1, 3, 5}; + Data b_row_ptr(b_row_ptr_vec.data(), 4); + std::vector b_col_ind_vec = {0, 0, 1, 2, 3}; + Data b_col_ind(b_col_ind_vec.data(), 5); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + float beta = 0; + Data beta_s(&beta, 1); + Data beta_d(&beta, 1); + Data beta_c(&beta, 1); + Data beta_z(&beta, 1); + + sycl::queue *handle; + handle = &q_ct1; + + /* + DPCT1026:0: The call to cusparseSetPointerMode was removed because this call + is redundant in SYCL. + */ + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s_val.H2D(); + b_d_val.H2D(); + b_c_val.H2D(); + b_z_val.H2D(); + b_row_ptr.H2D(); + b_col_ind.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + beta_s.H2D(); + beta_d.H2D(); + beta_c.H2D(); + beta_z.H2D(); + + dpct::sparse::sparse_matrix_desc_t a_descr_s; + dpct::sparse::sparse_matrix_desc_t a_descr_d; + dpct::sparse::sparse_matrix_desc_t a_descr_c; + dpct::sparse::sparse_matrix_desc_t a_descr_z; + a_descr_s = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + a_descr_d = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + a_descr_c = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + a_descr_z = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + dpct::sparse::sparse_matrix_desc_t b_descr_s; + dpct::sparse::sparse_matrix_desc_t b_descr_d; + dpct::sparse::sparse_matrix_desc_t b_descr_c; + dpct::sparse::sparse_matrix_desc_t b_descr_z; + b_descr_s = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + b_descr_d = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + b_descr_c = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + b_descr_z = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + dpct::sparse::sparse_matrix_desc_t c_descr_s; + dpct::sparse::sparse_matrix_desc_t c_descr_d; + dpct::sparse::sparse_matrix_desc_t c_descr_c; + dpct::sparse::sparse_matrix_desc_t c_descr_z; + c_descr_s = std::make_shared( + 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, + dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, + dpct::library_data_t::real_float, dpct::sparse::matrix_format::csr); + c_descr_d = std::make_shared( + 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, + dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, + dpct::library_data_t::real_double, dpct::sparse::matrix_format::csr); + c_descr_c = std::make_shared( + 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, + dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, + dpct::library_data_t::complex_float, dpct::sparse::matrix_format::csr); + c_descr_z = std::make_shared( + 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, + dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, + dpct::library_data_t::complex_double, dpct::sparse::matrix_format::csr); + + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_s; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_d; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_c; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_z; + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_s); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_d); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_c); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_z); + + size_t ws_1_size_s = 0; + size_t ws_1_size_d = 0; + size_t ws_1_size_c = 0; + size_t ws_1_size_z = 0; + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, NULL); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, NULL); + if (run_complex_datatype) { + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, NULL); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, NULL); + } + + void *ws_1_s; + void *ws_1_d; + void *ws_1_c; + void *ws_1_z; + ws_1_s = dpct::dpct_malloc(ws_1_size_s); + ws_1_d = dpct::dpct_malloc(ws_1_size_d); + ws_1_c = dpct::dpct_malloc(ws_1_size_c); + ws_1_z = dpct::dpct_malloc(ws_1_size_z); + + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, ws_1_s); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, ws_1_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, ws_1_c); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, ws_1_z); + } + + size_t ws_2_size_s = 0; + size_t ws_2_size_d = 0; + size_t ws_2_size_c = 0; + size_t ws_2_size_z = 0; + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, + SpGEMMDescr_s, &ws_2_size_s, NULL); + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, + SpGEMMDescr_d, &ws_2_size_d, NULL); + if (run_complex_datatype) { + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, NULL); + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, NULL); + } + + void *ws_2_s; + void *ws_2_d; + void *ws_2_c; + void *ws_2_z; + ws_2_s = dpct::dpct_malloc(ws_2_size_s); + ws_2_d = dpct::dpct_malloc(ws_2_size_d); + ws_2_c = dpct::dpct_malloc(ws_2_size_c); + ws_2_z = dpct::dpct_malloc(ws_2_size_z); + + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, + SpGEMMDescr_s, &ws_2_size_s, ws_2_s); + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, + SpGEMMDescr_d, &ws_2_size_d, ws_2_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, ws_2_c); + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, ws_2_z); + } + + int64_t c_row_s; + int64_t c_row_d; + int64_t c_row_c; + int64_t c_row_z; + int64_t c_col_s; + int64_t c_col_d; + int64_t c_col_c; + int64_t c_col_z; + int64_t c_nnz_s; + int64_t c_nnz_d; + int64_t c_nnz_c; + int64_t c_nnz_z; + c_descr_s->get_size(&c_row_s, &c_col_s, &c_nnz_s); + c_descr_d->get_size(&c_row_d, &c_col_d, &c_nnz_d); + c_descr_c->get_size(&c_row_c, &c_col_c, &c_nnz_c); + c_descr_z->get_size(&c_row_z, &c_col_z, &c_nnz_z); + + Data c_s_val(c_nnz_s); + Data c_d_val(c_nnz_d); + Data c_c_val(c_nnz_c); + Data c_z_val(c_nnz_z); + Data c_s_row_ptr(4); + Data c_d_row_ptr(4); + Data c_c_row_ptr(4); + Data c_z_row_ptr(4); + Data c_s_col_ind(c_nnz_s); + Data c_d_col_ind(c_nnz_d); + Data c_c_col_ind(c_nnz_c); + Data c_z_col_ind(c_nnz_z); + + c_descr_s->set_pointers(c_s_row_ptr.d_data, c_s_col_ind.d_data, + c_s_val.d_data); + c_descr_d->set_pointers(c_d_row_ptr.d_data, c_d_col_ind.d_data, + c_d_val.d_data); + c_descr_c->set_pointers(c_c_row_ptr.d_data, c_c_col_ind.d_data, + c_c_val.d_data); + c_descr_z->set_pointers(c_z_row_ptr.d_data, c_z_col_ind.d_data, + c_z_val.d_data); + + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s); + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c); + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z); + } + + q_ct1.wait(); + + dpct::dpct_free(ws_1_s); + dpct::dpct_free(ws_1_d); + dpct::dpct_free(ws_1_c); + dpct::dpct_free(ws_1_z); + dpct::dpct_free(ws_2_s); + dpct::dpct_free(ws_2_d); + dpct::dpct_free(ws_2_c); + dpct::dpct_free(ws_2_z); + a_descr_s.reset(); + a_descr_d.reset(); + a_descr_c.reset(); + a_descr_z.reset(); + b_descr_s.reset(); + b_descr_d.reset(); + b_descr_c.reset(); + b_descr_z.reset(); + c_descr_s.reset(); + c_descr_d.reset(); + c_descr_c.reset(); + c_descr_z.reset(); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_s); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_d); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_c); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_z); + handle = nullptr; + + c_s_val.D2H(); + c_d_val.D2H(); + c_c_val.D2H(); + c_z_val.D2H(); + c_s_row_ptr.D2H(); + c_d_row_ptr.D2H(); + c_c_row_ptr.D2H(); + c_z_row_ptr.D2H(); + c_s_col_ind.D2H(); + c_d_col_ind.D2H(); + c_c_col_ind.D2H(); + c_z_col_ind.D2H(); + + float expect_c_val[7] = {2.000000, 3.000000, 10.000000, 12.000000, 15.000000, 18.000000, 4.000000}; + float expect_c_row_ptr[4] = {0.000000, 4.000000, 6.000000, 7.000000}; + float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000}; + if (compare_result(expect_c_val, c_s_val.h_data, 7) && + compare_result(expect_c_val, c_d_val.h_data, 7) && + /*compare_result(expect_c_val, c_c_val.h_data, 7) && + compare_result(expect_c_val, c_z_val.h_data, 7) &&*/ + compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) && + /*compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) &&*/ + compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) /*&& + compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7)*/ + ) + printf("SpGEMM pass\n"); + else { + printf("SpGEMM fail\n"); + test_passed = false; + } +} + +// A * C = B +// +// | 1 1 2 | | 1 | | 9 | +// | 0 1 3 | * | 2 | = | 11 | +// | 0 0 1 | | 3 | | 3 | +void test_cusparseSpSV() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.default_queue(); + std::vector a_val_vec = {1, 1, 2, 1, 3, 1}; + Data a_s_val(a_val_vec.data(), 6); + Data a_d_val(a_val_vec.data(), 6); + Data a_c_val(a_val_vec.data(), 6); + Data a_z_val(a_val_vec.data(), 6); + std::vector a_row_ptr_vec = {0, 3, 5, 6}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {0, 1, 2, 1, 2, 3}; + Data a_col_ind(a_col_ind_vec.data(), 6); + + std::vector b_vec = {9, 11, 3}; + Data b_s(b_vec.data(), 3); + Data b_d(b_vec.data(), 3); + Data b_c(b_vec.data(), 3); + Data b_z(b_vec.data(), 3); + + Data c_s(3); + Data c_d(3); + Data c_c(3); + Data c_z(3); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + sycl::queue *handle; + handle = &q_ct1; + + /* + DPCT1026:1: The call to cusparseSetPointerMode was removed because this call + is redundant in SYCL. + */ + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s.H2D(); + b_d.H2D(); + b_c.H2D(); + b_z.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + + dpct::sparse::sparse_matrix_desc_t a_descr_s; + dpct::sparse::sparse_matrix_desc_t a_descr_d; + dpct::sparse::sparse_matrix_desc_t a_descr_c; + dpct::sparse::sparse_matrix_desc_t a_descr_z; + a_descr_s = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + a_descr_d = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + a_descr_c = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + a_descr_z = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + std::shared_ptr b_descr_s; + std::shared_ptr b_descr_d; + std::shared_ptr b_descr_c; + std::shared_ptr b_descr_z; + b_descr_s = std::make_shared( + 3, b_s.d_data, dpct::library_data_t::real_float); + b_descr_d = std::make_shared( + 3, b_d.d_data, dpct::library_data_t::real_double); + b_descr_c = std::make_shared( + 3, b_c.d_data, dpct::library_data_t::complex_float); + b_descr_z = std::make_shared( + 3, b_z.d_data, dpct::library_data_t::complex_double); + + std::shared_ptr c_descr_s; + std::shared_ptr c_descr_d; + std::shared_ptr c_descr_c; + std::shared_ptr c_descr_z; + c_descr_s = std::make_shared( + 3, c_s.d_data, dpct::library_data_t::real_float); + c_descr_d = std::make_shared( + 3, c_d.d_data, dpct::library_data_t::real_double); + c_descr_c = std::make_shared( + 3, c_c.d_data, dpct::library_data_t::complex_float); + c_descr_z = std::make_shared( + 3, c_z.d_data, dpct::library_data_t::complex_double); + + oneapi::mkl::uplo uplo = oneapi::mkl::uplo::upper; + a_descr_s->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_d->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_c->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_z->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + oneapi::mkl::diag diag = oneapi::mkl::diag::unit; + a_descr_s->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_d->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_c->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_z->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + + int SpSVDescr_s; + int SpSVDescr_d; + int SpSVDescr_c; + int SpSVDescr_z; + /* + DPCT1026:2: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:3: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:4: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:5: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + + size_t ws_size_s = 0; + size_t ws_size_d = 0; + size_t ws_size_c = 0; + size_t ws_size_z = 0; + /* + DPCT1026:6: The call to cusparseSpSV_bufferSize was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:7: The call to cusparseSpSV_bufferSize was removed because this call + is redundant in SYCL. + */ + if (run_complex_datatype) { + /* + DPCT1026:12: The call to cusparseSpSV_bufferSize was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:13: The call to cusparseSpSV_bufferSize was removed because this + call is redundant in SYCL. + */ + } + + void *ws_s; + void *ws_d; + void *ws_c; + void *ws_z; + ws_s = dpct::dpct_malloc(ws_size_s); + ws_d = dpct::dpct_malloc(ws_size_d); + ws_c = dpct::dpct_malloc(ws_size_c); + ws_z = dpct::dpct_malloc(ws_size_z); + + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_s); + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_d); + if (run_complex_datatype) { + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_c); + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_z); + } + + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, c_descr_s, + dpct::library_data_t::real_float); + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, c_descr_d, + dpct::library_data_t::real_double); + if (run_complex_datatype) { + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, + alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, + dpct::library_data_t::complex_float); + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, + alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, + dpct::library_data_t::complex_double); + } + + c_s.D2H(); + c_d.D2H(); + c_c.D2H(); + c_z.D2H(); + + q_ct1.wait(); + + dpct::dpct_free(ws_s); + dpct::dpct_free(ws_d); + dpct::dpct_free(ws_c); + dpct::dpct_free(ws_z); + a_descr_s.reset(); + a_descr_d.reset(); + a_descr_c.reset(); + a_descr_z.reset(); + b_descr_s.reset(); + b_descr_d.reset(); + b_descr_c.reset(); + b_descr_z.reset(); + c_descr_s.reset(); + c_descr_d.reset(); + c_descr_c.reset(); + c_descr_z.reset(); + /* + DPCT1026:8: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:9: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:10: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:11: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + handle = nullptr; + + float expect_c[4] = {1, 2, 3}; + if (compare_result(expect_c, c_s.h_data, 3) && + compare_result(expect_c, c_d.h_data, 3)/*&& + compare_result(expect_c, c_c.h_data, 3) && + compare_result(expect_c, c_z.h_data, 3)*/) + printf("SpSV pass\n"); + else { + printf("SpSV fail\n"); + test_passed = false; + } +} + int main() { test_cusparseSetGetStream(); test_cusparseTcsrmv_ge(); @@ -1147,6 +1780,8 @@ int main() { test_cusparseTcsrsv(); // test_cusparseSpMV(); // Re-enable this test until MKL issue fixed // test_cusparseSpMM(); // Re-enable this test until MKL issue fixed + // test_cusparseSpGEMM(); // Re-enable this test until MKL issue fixed + // test_cusparseSpSV() // Re-enable this test until MKL issue fixed if (test_passed) return 0; diff --git a/help_function/src/sparse_utils_2_usm.cpp b/help_function/src/sparse_utils_2_usm.cpp index c72fe6fe3..079f29f59 100644 --- a/help_function/src/sparse_utils_2_usm.cpp +++ b/help_function/src/sparse_utils_2_usm.cpp @@ -1145,6 +1145,639 @@ void test_cusparseSpMM() { } } +// A * B = C +// +// | 0 1 2 | | 1 0 0 0 | | 2 3 10 12 | +// | 0 0 3 | * | 2 3 0 0 | = | 0 0 15 18 | +// | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 | +void test_cusparseSpGEMM() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.default_queue(); + std::vector a_val_vec = {1, 2, 3, 4}; + Data a_s_val(a_val_vec.data(), 4); + Data a_d_val(a_val_vec.data(), 4); + Data a_c_val(a_val_vec.data(), 4); + Data a_z_val(a_val_vec.data(), 4); + std::vector a_row_ptr_vec = {0, 2, 3, 4}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {1, 2, 2, 0}; + Data a_col_ind(a_col_ind_vec.data(), 4); + + std::vector b_val_vec = {1, 2, 3, 5, 6}; + Data b_s_val(b_val_vec.data(), 5); + Data b_d_val(b_val_vec.data(), 5); + Data b_c_val(b_val_vec.data(), 5); + Data b_z_val(b_val_vec.data(), 5); + std::vector b_row_ptr_vec = {0, 1, 3, 5}; + Data b_row_ptr(b_row_ptr_vec.data(), 4); + std::vector b_col_ind_vec = {0, 0, 1, 2, 3}; + Data b_col_ind(b_col_ind_vec.data(), 5); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + float beta = 0; + Data beta_s(&beta, 1); + Data beta_d(&beta, 1); + Data beta_c(&beta, 1); + Data beta_z(&beta, 1); + + sycl::queue *handle; + handle = &q_ct1; + + /* + DPCT1026:0: The call to cusparseSetPointerMode was removed because this call + is redundant in SYCL. + */ + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s_val.H2D(); + b_d_val.H2D(); + b_c_val.H2D(); + b_z_val.H2D(); + b_row_ptr.H2D(); + b_col_ind.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + beta_s.H2D(); + beta_d.H2D(); + beta_c.H2D(); + beta_z.H2D(); + + dpct::sparse::sparse_matrix_desc_t a_descr_s; + dpct::sparse::sparse_matrix_desc_t a_descr_d; + dpct::sparse::sparse_matrix_desc_t a_descr_c; + dpct::sparse::sparse_matrix_desc_t a_descr_z; + a_descr_s = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + a_descr_d = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + a_descr_c = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + a_descr_z = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + dpct::sparse::sparse_matrix_desc_t b_descr_s; + dpct::sparse::sparse_matrix_desc_t b_descr_d; + dpct::sparse::sparse_matrix_desc_t b_descr_c; + dpct::sparse::sparse_matrix_desc_t b_descr_z; + b_descr_s = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + b_descr_d = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + b_descr_c = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + b_descr_z = std::make_shared( + 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + dpct::sparse::sparse_matrix_desc_t c_descr_s; + dpct::sparse::sparse_matrix_desc_t c_descr_d; + dpct::sparse::sparse_matrix_desc_t c_descr_c; + dpct::sparse::sparse_matrix_desc_t c_descr_z; + c_descr_s = std::make_shared( + 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, + dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, + dpct::library_data_t::real_float, dpct::sparse::matrix_format::csr); + c_descr_d = std::make_shared( + 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, + dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, + dpct::library_data_t::real_double, dpct::sparse::matrix_format::csr); + c_descr_c = std::make_shared( + 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, + dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, + dpct::library_data_t::complex_float, dpct::sparse::matrix_format::csr); + c_descr_z = std::make_shared( + 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, + dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, + dpct::library_data_t::complex_double, dpct::sparse::matrix_format::csr); + + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_s; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_d; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_c; + oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_z; + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_s); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_d); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_c); + oneapi::mkl::sparse::init_matmat_descr(&SpGEMMDescr_z); + + size_t ws_1_size_s = 0; + size_t ws_1_size_d = 0; + size_t ws_1_size_c = 0; + size_t ws_1_size_z = 0; + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, NULL); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, NULL); + if (run_complex_datatype) { + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, NULL); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, NULL); + } + + void *ws_1_s; + void *ws_1_d; + void *ws_1_c; + void *ws_1_z; + ws_1_s = (void *)sycl::malloc_device(ws_1_size_s, q_ct1); + ws_1_d = (void *)sycl::malloc_device(ws_1_size_d, q_ct1); + ws_1_c = (void *)sycl::malloc_device(ws_1_size_c, q_ct1); + ws_1_z = (void *)sycl::malloc_device(ws_1_size_z, q_ct1); + + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s, &ws_1_size_s, ws_1_s); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d, &ws_1_size_d, ws_1_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_1_size_c, ws_1_c); + dpct::sparse::spgemm_work_estimation( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_1_size_z, ws_1_z); + } + + size_t ws_2_size_s = 0; + size_t ws_2_size_d = 0; + size_t ws_2_size_c = 0; + size_t ws_2_size_z = 0; + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, + SpGEMMDescr_s, &ws_2_size_s, NULL); + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, + SpGEMMDescr_d, &ws_2_size_d, NULL); + if (run_complex_datatype) { + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, NULL); + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, NULL); + } + + void *ws_2_s; + void *ws_2_d; + void *ws_2_c; + void *ws_2_z; + ws_2_s = (void *)sycl::malloc_device(ws_2_size_s, q_ct1); + ws_2_d = (void *)sycl::malloc_device(ws_2_size_d, q_ct1); + ws_2_c = (void *)sycl::malloc_device(ws_2_size_c, q_ct1); + ws_2_z = (void *)sycl::malloc_device(ws_2_size_z, q_ct1); + + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, beta_s.d_data, c_descr_s, + SpGEMMDescr_s, &ws_2_size_s, ws_2_s); + dpct::sparse::spgemm_compute(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, beta_d.d_data, c_descr_d, + SpGEMMDescr_d, &ws_2_size_d, ws_2_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c, &ws_2_size_c, ws_2_c); + dpct::sparse::spgemm_compute( + *handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z, &ws_2_size_z, ws_2_z); + } + + int64_t c_row_s; + int64_t c_row_d; + int64_t c_row_c; + int64_t c_row_z; + int64_t c_col_s; + int64_t c_col_d; + int64_t c_col_c; + int64_t c_col_z; + int64_t c_nnz_s; + int64_t c_nnz_d; + int64_t c_nnz_c; + int64_t c_nnz_z; + c_descr_s->get_size(&c_row_s, &c_col_s, &c_nnz_s); + c_descr_d->get_size(&c_row_d, &c_col_d, &c_nnz_d); + c_descr_c->get_size(&c_row_c, &c_col_c, &c_nnz_c); + c_descr_z->get_size(&c_row_z, &c_col_z, &c_nnz_z); + + Data c_s_val(c_nnz_s); + Data c_d_val(c_nnz_d); + Data c_c_val(c_nnz_c); + Data c_z_val(c_nnz_z); + Data c_s_row_ptr(4); + Data c_d_row_ptr(4); + Data c_c_row_ptr(4); + Data c_z_row_ptr(4); + Data c_s_col_ind(c_nnz_s); + Data c_d_col_ind(c_nnz_d); + Data c_c_col_ind(c_nnz_c); + Data c_z_col_ind(c_nnz_z); + + c_descr_s->set_pointers(c_s_row_ptr.d_data, c_s_col_ind.d_data, + c_s_val.d_data); + c_descr_d->set_pointers(c_d_row_ptr.d_data, c_d_col_ind.d_data, + c_d_val.d_data); + c_descr_c->set_pointers(c_c_row_ptr.d_data, c_c_col_ind.d_data, + c_c_val.d_data); + c_descr_z->set_pointers(c_z_row_ptr.d_data, c_z_col_ind.d_data, + c_z_val.d_data); + + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_s.d_data, a_descr_s, b_descr_s, + beta_s.d_data, c_descr_s, SpGEMMDescr_s); + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_d.d_data, a_descr_d, b_descr_d, + beta_d.d_data, c_descr_d, SpGEMMDescr_d); + if (run_complex_datatype) { + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_c.d_data, a_descr_c, b_descr_c, + beta_c.d_data, c_descr_c, SpGEMMDescr_c); + dpct::sparse::spgemm_finalize(*handle, oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + alpha_z.d_data, a_descr_z, b_descr_z, + beta_z.d_data, c_descr_z, SpGEMMDescr_z); + } + + q_ct1.wait(); + + sycl::free(ws_1_s, q_ct1); + sycl::free(ws_1_d, q_ct1); + sycl::free(ws_1_c, q_ct1); + sycl::free(ws_1_z, q_ct1); + sycl::free(ws_2_s, q_ct1); + sycl::free(ws_2_d, q_ct1); + sycl::free(ws_2_c, q_ct1); + sycl::free(ws_2_z, q_ct1); + a_descr_s.reset(); + a_descr_d.reset(); + a_descr_c.reset(); + a_descr_z.reset(); + b_descr_s.reset(); + b_descr_d.reset(); + b_descr_c.reset(); + b_descr_z.reset(); + c_descr_s.reset(); + c_descr_d.reset(); + c_descr_c.reset(); + c_descr_z.reset(); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_s); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_d); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_c); + oneapi::mkl::sparse::release_matmat_descr(&SpGEMMDescr_z); + handle = nullptr; + + c_s_val.D2H(); + c_d_val.D2H(); + c_c_val.D2H(); + c_z_val.D2H(); + c_s_row_ptr.D2H(); + c_d_row_ptr.D2H(); + c_c_row_ptr.D2H(); + c_z_row_ptr.D2H(); + c_s_col_ind.D2H(); + c_d_col_ind.D2H(); + c_c_col_ind.D2H(); + c_z_col_ind.D2H(); + + float expect_c_val[7] = {2.000000, 3.000000, 10.000000, 12.000000, 15.000000, 18.000000, 4.000000}; + float expect_c_row_ptr[4] = {0.000000, 4.000000, 6.000000, 7.000000}; + float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000}; + if (compare_result(expect_c_val, c_s_val.h_data, 7) && + compare_result(expect_c_val, c_d_val.h_data, 7) && + /*compare_result(expect_c_val, c_c_val.h_data, 7) && + compare_result(expect_c_val, c_z_val.h_data, 7) &&*/ + compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) && + /*compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) &&*/ + compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) /*&& + compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) && + compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7)*/ + ) + printf("SpGEMM pass\n"); + else { + printf("SpGEMM fail\n"); + test_passed = false; + } +} + +// A * C = B +// +// | 1 1 2 | | 1 | | 9 | +// | 0 1 3 | * | 2 | = | 11 | +// | 0 0 1 | | 3 | | 3 | +void test_cusparseSpSV() { + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.default_queue(); + std::vector a_val_vec = {1, 1, 2, 1, 3, 1}; + Data a_s_val(a_val_vec.data(), 6); + Data a_d_val(a_val_vec.data(), 6); + Data a_c_val(a_val_vec.data(), 6); + Data a_z_val(a_val_vec.data(), 6); + std::vector a_row_ptr_vec = {0, 3, 5, 6}; + Data a_row_ptr(a_row_ptr_vec.data(), 4); + std::vector a_col_ind_vec = {0, 1, 2, 1, 2, 3}; + Data a_col_ind(a_col_ind_vec.data(), 6); + + std::vector b_vec = {9, 11, 3}; + Data b_s(b_vec.data(), 3); + Data b_d(b_vec.data(), 3); + Data b_c(b_vec.data(), 3); + Data b_z(b_vec.data(), 3); + + Data c_s(3); + Data c_d(3); + Data c_c(3); + Data c_z(3); + + float alpha = 1; + Data alpha_s(&alpha, 1); + Data alpha_d(&alpha, 1); + Data alpha_c(&alpha, 1); + Data alpha_z(&alpha, 1); + + sycl::queue *handle; + handle = &q_ct1; + + /* + DPCT1026:1: The call to cusparseSetPointerMode was removed because this call + is redundant in SYCL. + */ + + a_s_val.H2D(); + a_d_val.H2D(); + a_c_val.H2D(); + a_z_val.H2D(); + a_row_ptr.H2D(); + a_col_ind.H2D(); + b_s.H2D(); + b_d.H2D(); + b_c.H2D(); + b_z.H2D(); + alpha_s.H2D(); + alpha_d.H2D(); + alpha_c.H2D(); + alpha_z.H2D(); + + dpct::sparse::sparse_matrix_desc_t a_descr_s; + dpct::sparse::sparse_matrix_desc_t a_descr_d; + dpct::sparse::sparse_matrix_desc_t a_descr_c; + dpct::sparse::sparse_matrix_desc_t a_descr_z; + a_descr_s = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); + a_descr_d = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); + a_descr_c = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); + a_descr_z = std::make_shared( + 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); + + std::shared_ptr b_descr_s; + std::shared_ptr b_descr_d; + std::shared_ptr b_descr_c; + std::shared_ptr b_descr_z; + b_descr_s = std::make_shared( + 3, b_s.d_data, dpct::library_data_t::real_float); + b_descr_d = std::make_shared( + 3, b_d.d_data, dpct::library_data_t::real_double); + b_descr_c = std::make_shared( + 3, b_c.d_data, dpct::library_data_t::complex_float); + b_descr_z = std::make_shared( + 3, b_z.d_data, dpct::library_data_t::complex_double); + + std::shared_ptr c_descr_s; + std::shared_ptr c_descr_d; + std::shared_ptr c_descr_c; + std::shared_ptr c_descr_z; + c_descr_s = std::make_shared( + 3, c_s.d_data, dpct::library_data_t::real_float); + c_descr_d = std::make_shared( + 3, c_d.d_data, dpct::library_data_t::real_double); + c_descr_c = std::make_shared( + 3, c_c.d_data, dpct::library_data_t::complex_float); + c_descr_z = std::make_shared( + 3, c_z.d_data, dpct::library_data_t::complex_double); + + oneapi::mkl::uplo uplo = oneapi::mkl::uplo::upper; + a_descr_s->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_d->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_c->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + a_descr_z->set_attribute(dpct::sparse::matrix_attribute::uplo, &uplo, + sizeof(uplo)); + oneapi::mkl::diag diag = oneapi::mkl::diag::unit; + a_descr_s->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_d->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_c->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + a_descr_z->set_attribute(dpct::sparse::matrix_attribute::diag, &diag, + sizeof(diag)); + + int SpSVDescr_s; + int SpSVDescr_d; + int SpSVDescr_c; + int SpSVDescr_z; + /* + DPCT1026:2: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:3: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:4: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:5: The call to cusparseSpSV_createDescr was removed because this call + is redundant in SYCL. + */ + + size_t ws_size_s = 0; + size_t ws_size_d = 0; + size_t ws_size_c = 0; + size_t ws_size_z = 0; + /* + DPCT1026:6: The call to cusparseSpSV_bufferSize was removed because this call + is redundant in SYCL. + */ + /* + DPCT1026:7: The call to cusparseSpSV_bufferSize was removed because this call + is redundant in SYCL. + */ + if (run_complex_datatype) { + /* + DPCT1026:12: The call to cusparseSpSV_bufferSize was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:13: The call to cusparseSpSV_bufferSize was removed because this + call is redundant in SYCL. + */ + } + + void *ws_s; + void *ws_d; + void *ws_c; + void *ws_z; + ws_s = (void *)sycl::malloc_device(ws_size_s, q_ct1); + ws_d = (void *)sycl::malloc_device(ws_size_d, q_ct1); + ws_c = (void *)sycl::malloc_device(ws_size_c, q_ct1); + ws_z = (void *)sycl::malloc_device(ws_size_z, q_ct1); + + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_s); + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_d); + if (run_complex_datatype) { + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_c); + dpct::sparse::spsv_optimize(*handle, oneapi::mkl::transpose::nontrans, + a_descr_z); + } + + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_s.d_data, + a_descr_s, b_descr_s, c_descr_s, + dpct::library_data_t::real_float); + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, alpha_d.d_data, + a_descr_d, b_descr_d, c_descr_d, + dpct::library_data_t::real_double); + if (run_complex_datatype) { + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, + alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, + dpct::library_data_t::complex_float); + dpct::sparse::spsv(*handle, oneapi::mkl::transpose::nontrans, + alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, + dpct::library_data_t::complex_double); + } + + c_s.D2H(); + c_d.D2H(); + c_c.D2H(); + c_z.D2H(); + + q_ct1.wait(); + + sycl::free(ws_s, q_ct1); + sycl::free(ws_d, q_ct1); + sycl::free(ws_c, q_ct1); + sycl::free(ws_z, q_ct1); + a_descr_s.reset(); + a_descr_d.reset(); + a_descr_c.reset(); + a_descr_z.reset(); + b_descr_s.reset(); + b_descr_d.reset(); + b_descr_c.reset(); + b_descr_z.reset(); + c_descr_s.reset(); + c_descr_d.reset(); + c_descr_c.reset(); + c_descr_z.reset(); + /* + DPCT1026:8: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:9: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:10: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + /* + DPCT1026:11: The call to cusparseSpSV_destroyDescr was removed because this + call is redundant in SYCL. + */ + handle = nullptr; + + float expect_c[4] = {1, 2, 3}; + if (compare_result(expect_c, c_s.h_data, 3) && + compare_result(expect_c, c_d.h_data, 3)/*&& + compare_result(expect_c, c_c.h_data, 3) && + compare_result(expect_c, c_z.h_data, 3)*/) + printf("SpSV pass\n"); + else { + printf("SpSV fail\n"); + test_passed = false; + } +} + int main() { test_cusparseSetGetStream(); test_cusparseTcsrmv_ge(); @@ -1154,6 +1787,8 @@ int main() { test_cusparseTcsrsv(); test_cusparseSpMV(); test_cusparseSpMM(); + test_cusparseSpGEMM(); + test_cusparseSpSV(); if (test_passed) return 0; From b09cec12b0d59227053512f14365e53e61497368 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Fri, 8 Sep 2023 09:30:33 +0800 Subject: [PATCH 4/7] Update test Signed-off-by: Jiang, Zhiwei --- features/feature_case/cusparse/cusparse_4.cu | 17 +++++---- help_function/src/sparse_utils_2_buffer.cpp | 39 +++++++++++--------- help_function/src/sparse_utils_2_usm.cpp | 39 +++++++++++--------- 3 files changed, 53 insertions(+), 42 deletions(-) diff --git a/features/feature_case/cusparse/cusparse_4.cu b/features/feature_case/cusparse/cusparse_4.cu index 9ccc90564..4dbdd14e4 100644 --- a/features/feature_case/cusparse/cusparse_4.cu +++ b/features/feature_case/cusparse/cusparse_4.cu @@ -187,14 +187,19 @@ void test_cusparseSpGEMM() { cusparseCreateCsr(&b_descr_c, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); cusparseCreateCsr(&b_descr_z, 3, 4, 5, b_row_ptr.d_data, b_col_ind.d_data, b_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + Data c_s_row_ptr(4); + Data c_d_row_ptr(4); + Data c_c_row_ptr(4); + Data c_z_row_ptr(4); + cusparseSpMatDescr_t c_descr_s; cusparseSpMatDescr_t c_descr_d; cusparseSpMatDescr_t c_descr_c; cusparseSpMatDescr_t c_descr_z; - cusparseCreateCsr(&c_descr_s, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); - cusparseCreateCsr(&c_descr_d, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); - cusparseCreateCsr(&c_descr_c, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); - cusparseCreateCsr(&c_descr_z, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + cusparseCreateCsr(&c_descr_s, 3, 4, 0, c_s_row_ptr.d_data, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&c_descr_d, 3, 4, 0, c_d_row_ptr.d_data, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&c_descr_c, 3, 4, 0, c_c_row_ptr.d_data, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&c_descr_z, 3, 4, 0, c_z_row_ptr.d_data, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); cusparseSpGEMMDescr_t SpGEMMDescr_s; cusparseSpGEMMDescr_t SpGEMMDescr_d; @@ -280,10 +285,6 @@ void test_cusparseSpGEMM() { Data c_d_val(c_nnz_d); Data c_c_val(c_nnz_c); Data c_z_val(c_nnz_z); - Data c_s_row_ptr(4); - Data c_d_row_ptr(4); - Data c_c_row_ptr(4); - Data c_z_row_ptr(4); Data c_s_col_ind(c_nnz_s); Data c_d_col_ind(c_nnz_d); Data c_c_col_ind(c_nnz_c); diff --git a/help_function/src/sparse_utils_2_buffer.cpp b/help_function/src/sparse_utils_2_buffer.cpp index c5c8c4a60..d10070bf0 100644 --- a/help_function/src/sparse_utils_2_buffer.cpp +++ b/help_function/src/sparse_utils_2_buffer.cpp @@ -1145,7 +1145,7 @@ void test_cusparseSpMM() { // | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 | void test_cusparseSpGEMM() { dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.default_queue(); + sycl::queue &q_ct1 = dev_ct1.out_of_order_queue(); std::vector a_val_vec = {1, 2, 3, 4}; Data a_s_val(a_val_vec.data(), 4); Data a_d_val(a_val_vec.data(), 4); @@ -1257,26 +1257,35 @@ void test_cusparseSpGEMM() { oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, dpct::sparse::matrix_format::csr); + Data c_s_row_ptr(4); + Data c_d_row_ptr(4); + Data c_c_row_ptr(4); + Data c_z_row_ptr(4); + dpct::sparse::sparse_matrix_desc_t c_descr_s; dpct::sparse::sparse_matrix_desc_t c_descr_d; dpct::sparse::sparse_matrix_desc_t c_descr_c; dpct::sparse::sparse_matrix_desc_t c_descr_z; c_descr_s = std::make_shared( - 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, - dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, - dpct::library_data_t::real_float, dpct::sparse::matrix_format::csr); + 3, 4, 0, c_s_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); c_descr_d = std::make_shared( - 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, - dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, - dpct::library_data_t::real_double, dpct::sparse::matrix_format::csr); + 3, 4, 0, c_d_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); c_descr_c = std::make_shared( - 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, - dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, - dpct::library_data_t::complex_float, dpct::sparse::matrix_format::csr); + 3, 4, 0, c_c_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); c_descr_z = std::make_shared( - 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, - dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, - dpct::library_data_t::complex_double, dpct::sparse::matrix_format::csr); + 3, 4, 0, c_z_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_s; oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_d; @@ -1410,10 +1419,6 @@ void test_cusparseSpGEMM() { Data c_d_val(c_nnz_d); Data c_c_val(c_nnz_c); Data c_z_val(c_nnz_z); - Data c_s_row_ptr(4); - Data c_d_row_ptr(4); - Data c_c_row_ptr(4); - Data c_z_row_ptr(4); Data c_s_col_ind(c_nnz_s); Data c_d_col_ind(c_nnz_d); Data c_c_col_ind(c_nnz_c); diff --git a/help_function/src/sparse_utils_2_usm.cpp b/help_function/src/sparse_utils_2_usm.cpp index 079f29f59..f516b18ff 100644 --- a/help_function/src/sparse_utils_2_usm.cpp +++ b/help_function/src/sparse_utils_2_usm.cpp @@ -1152,7 +1152,7 @@ void test_cusparseSpMM() { // | 4 0 0 | | 0 0 5 6 | | 4 0 0 0 | void test_cusparseSpGEMM() { dpct::device_ext &dev_ct1 = dpct::get_current_device(); - sycl::queue &q_ct1 = dev_ct1.default_queue(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); std::vector a_val_vec = {1, 2, 3, 4}; Data a_s_val(a_val_vec.data(), 4); Data a_d_val(a_val_vec.data(), 4); @@ -1264,26 +1264,35 @@ void test_cusparseSpGEMM() { oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, dpct::sparse::matrix_format::csr); + Data c_s_row_ptr(4); + Data c_d_row_ptr(4); + Data c_c_row_ptr(4); + Data c_z_row_ptr(4); + dpct::sparse::sparse_matrix_desc_t c_descr_s; dpct::sparse::sparse_matrix_desc_t c_descr_d; dpct::sparse::sparse_matrix_desc_t c_descr_c; dpct::sparse::sparse_matrix_desc_t c_descr_z; c_descr_s = std::make_shared( - 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, - dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, - dpct::library_data_t::real_float, dpct::sparse::matrix_format::csr); + 3, 4, 0, c_s_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_float, + dpct::sparse::matrix_format::csr); c_descr_d = std::make_shared( - 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, - dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, - dpct::library_data_t::real_double, dpct::sparse::matrix_format::csr); + 3, 4, 0, c_d_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::real_double, + dpct::sparse::matrix_format::csr); c_descr_c = std::make_shared( - 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, - dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, - dpct::library_data_t::complex_float, dpct::sparse::matrix_format::csr); + 3, 4, 0, c_c_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_float, + dpct::sparse::matrix_format::csr); c_descr_z = std::make_shared( - 3, 4, 0, nullptr, nullptr, nullptr, dpct::library_data_t::real_int32, - dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, - dpct::library_data_t::complex_double, dpct::sparse::matrix_format::csr); + 3, 4, 0, c_z_row_ptr.d_data, nullptr, nullptr, + dpct::library_data_t::real_int32, dpct::library_data_t::real_int32, + oneapi::mkl::index_base::zero, dpct::library_data_t::complex_double, + dpct::sparse::matrix_format::csr); oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_s; oneapi::mkl::sparse::matmat_descr_t SpGEMMDescr_d; @@ -1417,10 +1426,6 @@ void test_cusparseSpGEMM() { Data c_d_val(c_nnz_d); Data c_c_val(c_nnz_c); Data c_z_val(c_nnz_z); - Data c_s_row_ptr(4); - Data c_d_row_ptr(4); - Data c_c_row_ptr(4); - Data c_z_row_ptr(4); Data c_s_col_ind(c_nnz_s); Data c_d_col_ind(c_nnz_d); Data c_c_col_ind(c_nnz_c); From 316acc5557577b31ce0f9f0b9e0ac8ea89e1291b Mon Sep 17 00:00:00 2001 From: Zhiwei Jiang Date: Mon, 25 Sep 2023 09:08:16 +0800 Subject: [PATCH 5/7] Add buffer and complex tests Signed-off-by: Zhiwei Jiang --- features/feature_case/cusparse/cusparse_4.cu | 17 ++++++------- features/feature_case/cusparse/cusparse_5.cu | 9 +++---- help_function/src/sparse_utils_2_buffer.cpp | 26 ++++++++++---------- help_function/src/sparse_utils_2_usm.cpp | 18 +++++++------- 4 files changed, 32 insertions(+), 38 deletions(-) diff --git a/features/feature_case/cusparse/cusparse_4.cu b/features/feature_case/cusparse/cusparse_4.cu index 4dbdd14e4..dfb28ec95 100644 --- a/features/feature_case/cusparse/cusparse_4.cu +++ b/features/feature_case/cusparse/cusparse_4.cu @@ -103,7 +103,7 @@ bool compare_result(float *expect, float *result, std::vector indices) { bool test_passed = true; -const bool run_complex_datatype = false; +const bool run_complex_datatype = true; // A * B = C // @@ -348,16 +348,16 @@ void test_cusparseSpGEMM() { float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000}; if (compare_result(expect_c_val, c_s_val.h_data, 7) && compare_result(expect_c_val, c_d_val.h_data, 7) && - /*compare_result(expect_c_val, c_c_val.h_data, 7) && - compare_result(expect_c_val, c_z_val.h_data, 7) &&*/ + compare_result(expect_c_val, c_c_val.h_data, 7) && + compare_result(expect_c_val, c_z_val.h_data, 7) && compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) && compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) && - /*compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && - compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) &&*/ + compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) && compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) && - compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) /*&& + compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) && compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) && - compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7)*/ + compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7) ) printf("SpGEMM pass\n"); else { @@ -367,10 +367,7 @@ void test_cusparseSpGEMM() { } int main() { - // Re-enable below test until MKL issue fixed -#ifndef DPCT_USM_LEVEL_NONE test_cusparseSpGEMM(); -#endif if (test_passed) return 0; diff --git a/features/feature_case/cusparse/cusparse_5.cu b/features/feature_case/cusparse/cusparse_5.cu index c6fac7be1..bb47948ec 100644 --- a/features/feature_case/cusparse/cusparse_5.cu +++ b/features/feature_case/cusparse/cusparse_5.cu @@ -103,7 +103,7 @@ bool compare_result(float *expect, float *result, std::vector indices) { bool test_passed = true; -const bool run_complex_datatype = false; +const bool run_complex_datatype = true; // A * C = B // @@ -270,9 +270,9 @@ void test_cusparseSpSV() { float expect_c[4] = {1, 2, 3}; if (compare_result(expect_c, c_s.h_data, 3) && - compare_result(expect_c, c_d.h_data, 3)/*&& + compare_result(expect_c, c_d.h_data, 3) && compare_result(expect_c, c_c.h_data, 3) && - compare_result(expect_c, c_z.h_data, 3)*/) + compare_result(expect_c, c_z.h_data, 3)) printf("SpSV pass\n"); else { printf("SpSV fail\n"); @@ -281,10 +281,7 @@ void test_cusparseSpSV() { } int main() { - // Re-enable below test until MKL issue fixed -#ifndef DPCT_USM_LEVEL_NONE test_cusparseSpSV(); -#endif if (test_passed) return 0; diff --git a/help_function/src/sparse_utils_2_buffer.cpp b/help_function/src/sparse_utils_2_buffer.cpp index d10070bf0..a30f88d70 100644 --- a/help_function/src/sparse_utils_2_buffer.cpp +++ b/help_function/src/sparse_utils_2_buffer.cpp @@ -110,7 +110,7 @@ bool compare_result(float *expect, float *result, std::vector indices) { bool test_passed = true; -const bool run_complex_datatype = false; +const bool run_complex_datatype = true; void test_cusparseSetGetStream() { sycl::queue *handle; @@ -1498,16 +1498,16 @@ void test_cusparseSpGEMM() { float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000}; if (compare_result(expect_c_val, c_s_val.h_data, 7) && compare_result(expect_c_val, c_d_val.h_data, 7) && - /*compare_result(expect_c_val, c_c_val.h_data, 7) && - compare_result(expect_c_val, c_z_val.h_data, 7) &&*/ + compare_result(expect_c_val, c_c_val.h_data, 7) && + compare_result(expect_c_val, c_z_val.h_data, 7) && compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) && compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) && - /*compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && - compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) &&*/ + compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) && compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) && - compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) /*&& + compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) && compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) && - compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7)*/ + compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7) ) printf("SpGEMM pass\n"); else { @@ -1766,9 +1766,9 @@ void test_cusparseSpSV() { float expect_c[4] = {1, 2, 3}; if (compare_result(expect_c, c_s.h_data, 3) && - compare_result(expect_c, c_d.h_data, 3)/*&& + compare_result(expect_c, c_d.h_data, 3) && compare_result(expect_c, c_c.h_data, 3) && - compare_result(expect_c, c_z.h_data, 3)*/) + compare_result(expect_c, c_z.h_data, 3)) printf("SpSV pass\n"); else { printf("SpSV fail\n"); @@ -1783,10 +1783,10 @@ int main() { test_cusparseTcsrmv_tr(); // test_cusparseTcsrmm(); // Re-enable this test until MKL issue fixed test_cusparseTcsrsv(); - // test_cusparseSpMV(); // Re-enable this test until MKL issue fixed - // test_cusparseSpMM(); // Re-enable this test until MKL issue fixed - // test_cusparseSpGEMM(); // Re-enable this test until MKL issue fixed - // test_cusparseSpSV() // Re-enable this test until MKL issue fixed + test_cusparseSpMV(); + test_cusparseSpMM(); + test_cusparseSpGEMM(); + test_cusparseSpSV(); if (test_passed) return 0; diff --git a/help_function/src/sparse_utils_2_usm.cpp b/help_function/src/sparse_utils_2_usm.cpp index f516b18ff..75eb0018c 100644 --- a/help_function/src/sparse_utils_2_usm.cpp +++ b/help_function/src/sparse_utils_2_usm.cpp @@ -117,7 +117,7 @@ bool compare_result(float *expect, float *result, std::vector indices) { bool test_passed = true; -const bool run_complex_datatype = false; +const bool run_complex_datatype = true; void test_cusparseSetGetStream() { sycl::queue *handle; @@ -1505,16 +1505,16 @@ void test_cusparseSpGEMM() { float expect_c_col_ind[7] = {0.000000, 1.000000, 2.000000, 3.000000, 2.000000, 3.000000, 0.000000}; if (compare_result(expect_c_val, c_s_val.h_data, 7) && compare_result(expect_c_val, c_d_val.h_data, 7) && - /*compare_result(expect_c_val, c_c_val.h_data, 7) && - compare_result(expect_c_val, c_z_val.h_data, 7) &&*/ + compare_result(expect_c_val, c_c_val.h_data, 7) && + compare_result(expect_c_val, c_z_val.h_data, 7) && compare_result(expect_c_row_ptr, c_s_row_ptr.h_data, 4) && compare_result(expect_c_row_ptr, c_d_row_ptr.h_data, 4) && - /*compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && - compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) &&*/ + compare_result(expect_c_row_ptr, c_c_row_ptr.h_data, 4) && + compare_result(expect_c_row_ptr, c_z_row_ptr.h_data, 4) && compare_result(expect_c_col_ind, c_s_col_ind.h_data, 7) && - compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) /*&& + compare_result(expect_c_col_ind, c_d_col_ind.h_data, 7) && compare_result(expect_c_col_ind, c_c_col_ind.h_data, 7) && - compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7)*/ + compare_result(expect_c_col_ind, c_z_col_ind.h_data, 7) ) printf("SpGEMM pass\n"); else { @@ -1773,9 +1773,9 @@ void test_cusparseSpSV() { float expect_c[4] = {1, 2, 3}; if (compare_result(expect_c, c_s.h_data, 3) && - compare_result(expect_c, c_d.h_data, 3)/*&& + compare_result(expect_c, c_d.h_data, 3) && compare_result(expect_c, c_c.h_data, 3) && - compare_result(expect_c, c_z.h_data, 3)*/) + compare_result(expect_c, c_z.h_data, 3)) printf("SpSV pass\n"); else { printf("SpSV fail\n"); From 9f0d3983b1fd2edca1156de87aa8d3e37114c63e Mon Sep 17 00:00:00 2001 From: Zhiwei Jiang Date: Wed, 27 Sep 2023 11:02:50 +0800 Subject: [PATCH 6/7] Use NULL in cusparseCreateCsr Signed-off-by: Zhiwei Jiang --- features/feature_case/cusparse/cusparse_4.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/features/feature_case/cusparse/cusparse_4.cu b/features/feature_case/cusparse/cusparse_4.cu index dfb28ec95..bc239b9bd 100644 --- a/features/feature_case/cusparse/cusparse_4.cu +++ b/features/feature_case/cusparse/cusparse_4.cu @@ -196,10 +196,10 @@ void test_cusparseSpGEMM() { cusparseSpMatDescr_t c_descr_d; cusparseSpMatDescr_t c_descr_c; cusparseSpMatDescr_t c_descr_z; - cusparseCreateCsr(&c_descr_s, 3, 4, 0, c_s_row_ptr.d_data, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); - cusparseCreateCsr(&c_descr_d, 3, 4, 0, c_d_row_ptr.d_data, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); - cusparseCreateCsr(&c_descr_c, 3, 4, 0, c_c_row_ptr.d_data, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); - cusparseCreateCsr(&c_descr_z, 3, 4, 0, c_z_row_ptr.d_data, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); + cusparseCreateCsr(&c_descr_s, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + cusparseCreateCsr(&c_descr_d, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + cusparseCreateCsr(&c_descr_c, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F); + cusparseCreateCsr(&c_descr_z, 3, 4, 0, NULL, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F); cusparseSpGEMMDescr_t SpGEMMDescr_s; cusparseSpGEMMDescr_t SpGEMMDescr_d; From bf9f7870785b62aedc771b05078f143a7e83406c Mon Sep 17 00:00:00 2001 From: Zhiwei Jiang Date: Wed, 27 Sep 2023 14:27:35 +0800 Subject: [PATCH 7/7] Fix config Signed-off-by: Zhiwei Jiang --- ...PLATE_help_function_before_11_skip_double.xml | 16 ---------------- .../TEMPLATE_help_function_sparse_buffer.xml | 14 ++++++++++++++ .../config/TEMPLATE_help_function_sparse_usm.xml | 14 ++++++++++++++ help_function/help_function.xml | 4 ++-- 4 files changed, 30 insertions(+), 18 deletions(-) delete mode 100644 help_function/config/TEMPLATE_help_function_before_11_skip_double.xml create mode 100644 help_function/config/TEMPLATE_help_function_sparse_buffer.xml create mode 100644 help_function/config/TEMPLATE_help_function_sparse_usm.xml diff --git a/help_function/config/TEMPLATE_help_function_before_11_skip_double.xml b/help_function/config/TEMPLATE_help_function_before_11_skip_double.xml deleted file mode 100644 index 6da1f7005..000000000 --- a/help_function/config/TEMPLATE_help_function_before_11_skip_double.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - WARNING: DON'T UPDATE THIS FILE MANUALLY!!! - This is auto-generated accessors configuration file which affects all tests in ported gcc suites - If you have any issue with this file please contact Compiler QA team - - - - - - - - - - diff --git a/help_function/config/TEMPLATE_help_function_sparse_buffer.xml b/help_function/config/TEMPLATE_help_function_sparse_buffer.xml new file mode 100644 index 000000000..9e20a921a --- /dev/null +++ b/help_function/config/TEMPLATE_help_function_sparse_buffer.xml @@ -0,0 +1,14 @@ + + + + test help function + + + + + + + + + + diff --git a/help_function/config/TEMPLATE_help_function_sparse_usm.xml b/help_function/config/TEMPLATE_help_function_sparse_usm.xml new file mode 100644 index 000000000..7dbc611de --- /dev/null +++ b/help_function/config/TEMPLATE_help_function_sparse_usm.xml @@ -0,0 +1,14 @@ + + + + test help function + + + + + + + + + + diff --git a/help_function/help_function.xml b/help_function/help_function.xml index 38cd52ecc..5eca8e704 100644 --- a/help_function/help_function.xml +++ b/help_function/help_function.xml @@ -185,8 +185,8 @@ - - + +