Skip to content
Merged
14 changes: 14 additions & 0 deletions features/config/TEMPLATE_cusparse_after_112_skip_double_usm.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>

<test driverID="test_feature" name="TEMPLATE">
<description>test</description>
<files>
<file path="feature_case/cusparse/${testName}.cu" />
</files>
<rules>
<platformRule OSFamily="Linux" kit="CUDA11.2" kitRange="OLDER" runOnThisPlatform="false"/>
<platformRule OSFamily="Windows" kit="CUDA11.2" kitRange="OLDER" runOnThisPlatform="false"/>
<optlevelRule GPUFeature="NOT double" excludeOptlevelNameString="gpu" />
<optlevelRule excludeOptlevelNameString="usmnone" />
</rules>
</test>
375 changes: 375 additions & 0 deletions features/feature_case/cusparse/cusparse_4.cu

Large diffs are not rendered by default.

289 changes: 289 additions & 0 deletions features/feature_case/cusparse/cusparse_5.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
// ===------- cusparse_3.cu -------------------------------- *- CUDA -* ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// ===----------------------------------------------------------------------===//

#include "cusparse.h"

#include <cmath>
#include <complex>
#include <cstdio>
#include <vector>

template <class d_data_t>
struct Data {
float *h_data;
d_data_t *d_data;
int element_num;
Data(int element_num) : element_num(element_num) {
h_data = (float *)malloc(sizeof(float) * element_num);
memset(h_data, 0, sizeof(float) * element_num);
cudaMalloc(&d_data, sizeof(d_data_t) * element_num);
cudaMemset(d_data, 0, sizeof(d_data_t) * element_num);
}
Data(float *input_data, int element_num) : element_num(element_num) {
h_data = (float *)malloc(sizeof(float) * element_num);
cudaMalloc(&d_data, sizeof(d_data_t) * element_num);
cudaMemset(d_data, 0, sizeof(d_data_t) * element_num);
memcpy(h_data, input_data, sizeof(float) * element_num);
}
~Data() {
free(h_data);
cudaFree(d_data);
}
void H2D() {
d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num);
memset(h_temp, 0, sizeof(d_data_t) * element_num);
from_float_convert(h_data, h_temp);
cudaMemcpy(d_data, h_temp, sizeof(d_data_t) * element_num,
cudaMemcpyHostToDevice);
free(h_temp);
}
void D2H() {
d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num);
memset(h_temp, 0, sizeof(d_data_t) * element_num);
cudaMemcpy(h_temp, d_data, sizeof(d_data_t) * element_num,
cudaMemcpyDeviceToHost);
to_float_convert(h_temp, h_data);
free(h_temp);
}

private:
inline void from_float_convert(float *in, d_data_t *out) {
for (int i = 0; i < element_num; i++)
out[i] = in[i];
}
inline void to_float_convert(d_data_t *in, float *out) {
for (int i = 0; i < element_num; i++)
out[i] = in[i];
}
};
template <>
inline void Data<float2>::from_float_convert(float *in, float2 *out) {
for (int i = 0; i < element_num; i++)
out[i].x = in[i];
}
template <>
inline void Data<double2>::from_float_convert(float *in, double2 *out) {
for (int i = 0; i < element_num; i++)
out[i].x = in[i];
}

template <>
inline void Data<float2>::to_float_convert(float2 *in, float *out) {
for (int i = 0; i < element_num; i++)
out[i] = in[i].x;
}
template <>
inline void Data<double2>::to_float_convert(double2 *in, float *out) {
for (int i = 0; i < element_num; i++)
out[i] = in[i].x;
}

bool compare_result(float *expect, float *result, int element_num) {
for (int i = 0; i < element_num; i++) {
if (std::abs(result[i] - expect[i]) >= 0.05) {
return false;
}
}
return true;
}

bool compare_result(float *expect, float *result, std::vector<int> indices) {
for (int i = 0; i < indices.size(); i++) {
if (std::abs(result[indices[i]] - expect[indices[i]]) >= 0.05) {
return false;
}
}
return true;
}

bool test_passed = true;

const bool run_complex_datatype = true;

// A * C = B
//
// | 1 1 2 | | 1 | | 9 |
// | 0 1 3 | * | 2 | = | 11 |
// | 0 0 1 | | 3 | | 3 |
void test_cusparseSpSV() {
std::vector<float> a_val_vec = {1, 1, 2, 1, 3, 1};
Data<float> a_s_val(a_val_vec.data(), 6);
Data<double> a_d_val(a_val_vec.data(), 6);
Data<float2> a_c_val(a_val_vec.data(), 6);
Data<double2> a_z_val(a_val_vec.data(), 6);
std::vector<float> a_row_ptr_vec = {0, 3, 5, 6};
Data<int> a_row_ptr(a_row_ptr_vec.data(), 4);
std::vector<float> a_col_ind_vec = {0, 1, 2, 1, 2, 3};
Data<int> a_col_ind(a_col_ind_vec.data(), 6);

std::vector<float> b_vec = {9, 11, 3};
Data<float> b_s(b_vec.data(), 3);
Data<double> b_d(b_vec.data(), 3);
Data<float2> b_c(b_vec.data(), 3);
Data<double2> b_z(b_vec.data(), 3);

Data<float> c_s(3);
Data<double> c_d(3);
Data<float2> c_c(3);
Data<double2> c_z(3);

float alpha = 1;
Data<float> alpha_s(&alpha, 1);
Data<double> alpha_d(&alpha, 1);
Data<float2> alpha_c(&alpha, 1);
Data<double2> alpha_z(&alpha, 1);

cusparseHandle_t handle;
cusparseCreate(&handle);

cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_DEVICE);

a_s_val.H2D();
a_d_val.H2D();
a_c_val.H2D();
a_z_val.H2D();
a_row_ptr.H2D();
a_col_ind.H2D();
b_s.H2D();
b_d.H2D();
b_c.H2D();
b_z.H2D();
alpha_s.H2D();
alpha_d.H2D();
alpha_c.H2D();
alpha_z.H2D();

cusparseSpMatDescr_t a_descr_s;
cusparseSpMatDescr_t a_descr_d;
cusparseSpMatDescr_t a_descr_c;
cusparseSpMatDescr_t a_descr_z;
cusparseCreateCsr(&a_descr_s, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
cusparseCreateCsr(&a_descr_d, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
cusparseCreateCsr(&a_descr_c, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
cusparseCreateCsr(&a_descr_z, 3, 3, 4, a_row_ptr.d_data, a_col_ind.d_data, a_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);

cusparseDnVecDescr_t b_descr_s;
cusparseDnVecDescr_t b_descr_d;
cusparseDnVecDescr_t b_descr_c;
cusparseDnVecDescr_t b_descr_z;
cusparseCreateDnVec(&b_descr_s, 3, b_s.d_data, CUDA_R_32F);
cusparseCreateDnVec(&b_descr_d, 3, b_d.d_data, CUDA_R_64F);
cusparseCreateDnVec(&b_descr_c, 3, b_c.d_data, CUDA_C_32F);
cusparseCreateDnVec(&b_descr_z, 3, b_z.d_data, CUDA_C_64F);

cusparseDnVecDescr_t c_descr_s;
cusparseDnVecDescr_t c_descr_d;
cusparseDnVecDescr_t c_descr_c;
cusparseDnVecDescr_t c_descr_z;
cusparseCreateDnVec(&c_descr_s, 3, c_s.d_data, CUDA_R_32F);
cusparseCreateDnVec(&c_descr_d, 3, c_d.d_data, CUDA_R_64F);
cusparseCreateDnVec(&c_descr_c, 3, c_c.d_data, CUDA_C_32F);
cusparseCreateDnVec(&c_descr_z, 3, c_z.d_data, CUDA_C_64F);

cusparseFillMode_t uplo = CUSPARSE_FILL_MODE_UPPER;
cusparseSpMatSetAttribute(a_descr_s, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo));
cusparseSpMatSetAttribute(a_descr_d, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo));
cusparseSpMatSetAttribute(a_descr_c, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo));
cusparseSpMatSetAttribute(a_descr_z, CUSPARSE_SPMAT_FILL_MODE, &uplo, sizeof(uplo));
cusparseDiagType_t diag = CUSPARSE_DIAG_TYPE_UNIT;
cusparseSpMatSetAttribute(a_descr_s, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag));
cusparseSpMatSetAttribute(a_descr_d, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag));
cusparseSpMatSetAttribute(a_descr_c, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag));
cusparseSpMatSetAttribute(a_descr_z, CUSPARSE_SPMAT_DIAG_TYPE, &diag, sizeof(diag));

cusparseSpSVDescr_t SpSVDescr_s;
cusparseSpSVDescr_t SpSVDescr_d;
cusparseSpSVDescr_t SpSVDescr_c;
cusparseSpSVDescr_t SpSVDescr_z;
cusparseSpSV_createDescr(&SpSVDescr_s);
cusparseSpSV_createDescr(&SpSVDescr_d);
cusparseSpSV_createDescr(&SpSVDescr_c);
cusparseSpSV_createDescr(&SpSVDescr_z);

size_t ws_size_s = 0;
size_t ws_size_d = 0;
size_t ws_size_c = 0;
size_t ws_size_z = 0;
cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s, &ws_size_s);
cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d, &ws_size_d);
if (run_complex_datatype) {
cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c, &ws_size_c);
cusparseSpSV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z, &ws_size_z);
}

void *ws_s;
void *ws_d;
void *ws_c;
void *ws_z;
cudaMalloc(&ws_s, ws_size_s);
cudaMalloc(&ws_d, ws_size_d);
cudaMalloc(&ws_c, ws_size_c);
cudaMalloc(&ws_z, ws_size_z);

cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s, &ws_size_s);
cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d, &ws_size_d);
if (run_complex_datatype) {
cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c, &ws_size_c);
cusparseSpSV_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z, &ws_size_z);
}

cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_s.d_data, a_descr_s, b_descr_s, c_descr_s, CUDA_R_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_s);
cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.d_data, a_descr_d, b_descr_d, c_descr_d, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_d);
if (run_complex_datatype) {
cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_c.d_data, a_descr_c, b_descr_c, c_descr_c, CUDA_C_32F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_c);
cusparseSpSV_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_z.d_data, a_descr_z, b_descr_z, c_descr_z, CUDA_C_64F, CUSPARSE_SPSV_ALG_DEFAULT, SpSVDescr_z);
}

c_s.D2H();
c_d.D2H();
c_c.D2H();
c_z.D2H();

cudaStreamSynchronize(0);

cudaFree(ws_s);
cudaFree(ws_d);
cudaFree(ws_c);
cudaFree(ws_z);
cusparseDestroySpMat(a_descr_s);
cusparseDestroySpMat(a_descr_d);
cusparseDestroySpMat(a_descr_c);
cusparseDestroySpMat(a_descr_z);
cusparseDestroyDnVec(b_descr_s);
cusparseDestroyDnVec(b_descr_d);
cusparseDestroyDnVec(b_descr_c);
cusparseDestroyDnVec(b_descr_z);
cusparseDestroyDnVec(c_descr_s);
cusparseDestroyDnVec(c_descr_d);
cusparseDestroyDnVec(c_descr_c);
cusparseDestroyDnVec(c_descr_z);
cusparseSpSV_destroyDescr(SpSVDescr_s);
cusparseSpSV_destroyDescr(SpSVDescr_d);
cusparseSpSV_destroyDescr(SpSVDescr_c);
cusparseSpSV_destroyDescr(SpSVDescr_z);
cusparseDestroy(handle);

float expect_c[4] = {1, 2, 3};
if (compare_result(expect_c, c_s.h_data, 3) &&
compare_result(expect_c, c_d.h_data, 3) &&
compare_result(expect_c, c_c.h_data, 3) &&
compare_result(expect_c, c_z.h_data, 3))
printf("SpSV pass\n");
else {
printf("SpSV fail\n");
test_passed = false;
}
}

int main() {
test_cusparseSpSV();

if (test_passed)
return 0;
return -1;
}
2 changes: 2 additions & 0 deletions features/features.xml
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@
<test testName="cusparse_1" configFile="config/TEMPLATE_cusparse.xml" />
<test testName="cusparse_2" configFile="config/TEMPLATE_cusparse_before_11_skip_double.xml" splitGroup="double"/>
<test testName="cusparse_3" configFile="config/TEMPLATE_cusparse_after_112_skip_double.xml" splitGroup="double"/>
<test testName="cusparse_4" configFile="config/TEMPLATE_cusparse_after_112_skip_double_usm.xml" splitGroup="double"/>
<test testName="cusparse_5" configFile="config/TEMPLATE_cusparse_after_112_skip_double_usm.xml" splitGroup="double"/>
<test testName="cusolver_test1" configFile="config/TEMPLATE_solver.xml" splitGroup="double"/>
<test testName="cusolver_test2" configFile="config/TEMPLATE_solver_skip80.xml" splitGroup="double"/>
<test testName="cusolver_test3" configFile="config/TEMPLATE_solver_11.xml" splitGroup="double"/>
Expand Down
2 changes: 1 addition & 1 deletion features/test_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
'math-bf16-conv', 'math-half-conv',
'math-bfloat16', 'libcu_atomic', 'test_shared_memory', 'cudnn-reduction', 'cudnn-binary', 'cudnn-bnp1', 'cudnn-bnp2', 'cudnn-bnp3',
'cudnn-normp1', 'cudnn-normp2', 'cudnn-normp3', 'cudnn-convp1', 'cudnn-convp2', 'cudnn-convp3', 'cudnn-convp4', 'cudnn-convp5', 'cudnn-convp6',
'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3",
'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3", "cusparse_4", "cusparse_5",
'cudnn-GetErrorString', 'cub_device_histgram',
'cudnn-types', 'cudnn-version', 'cudnn-dropout',
'constant_attr', 'sync_warp_p2', 'occupancy_calculation',
Expand Down

This file was deleted.

14 changes: 14 additions & 0 deletions help_function/config/TEMPLATE_help_function_sparse_buffer.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>

<test driverID="test_help" name="TEMPLATE">
<description>test help function</description>
<files>
<file path="src/${testName}.cpp" />
</files>
<rules>
<optlevelRule excludeOptlevelNameString="acc" />
<optlevelRule excludeOptlevelNameString="cuda_backend" />
<optlevelRule GPUFeature="NOT double" excludeOptlevelNameString="gpu" />
<optlevelRule onlyOptlevelNameString="usmnone" />
</rules>
</test>
14 changes: 14 additions & 0 deletions help_function/config/TEMPLATE_help_function_sparse_usm.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>

<test driverID="test_help" name="TEMPLATE">
<description>test help function</description>
<files>
<file path="src/${testName}.cpp" />
</files>
<rules>
<optlevelRule excludeOptlevelNameString="acc" />
<optlevelRule excludeOptlevelNameString="cuda_backend" />
<optlevelRule GPUFeature="NOT double" excludeOptlevelNameString="gpu" />
<optlevelRule excludeOptlevelNameString="usmnone" />
</rules>
</test>
4 changes: 2 additions & 2 deletions help_function/help_function.xml
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@
<test testName="lapack_utils_usm" configFile="config/TEMPLATE_help_function_skip_double.xml" splitGroup="double" />
<test testName="util_cast_double_from_to_int_test" configFile="config/TEMPLATE_help_function_skip_double.xml" splitGroup="double" />
<test testName="sparse_utils_1" configFile="config/TEMPLATE_help_function_skip_cuda_backend.xml" />
<test testName="sparse_utils_2_buffer" configFile="config/TEMPLATE_help_function_before_11_skip_double.xml" splitGroup="double" />
<test testName="sparse_utils_2_usm" configFile="config/TEMPLATE_help_function_before_11_skip_double.xml" splitGroup="double" />
<test testName="sparse_utils_2_buffer" configFile="config/TEMPLATE_help_function_sparse_buffer.xml" splitGroup="double" />
<test testName="sparse_utils_2_usm" configFile="config/TEMPLATE_help_function_sparse_usm.xml" splitGroup="double" />
<test testName="atomic_compare_exchange_strong" configFile="config/TEMPLATE_help_function.xml" />
</tests>
</suite>
Loading