Skip to content

Commit

Permalink
[build,src] Enhancements to the cudamatrix/cudavector classes. (#3373)
Browse files Browse the repository at this point in the history
* Added CuSolver to the matrix class.  This is only supported with
Cuda 9.1 or newer.  Calling CuSolver code without Cuda 9.1 or newer
will result in a runtime error.

This change required some changes to the build system which requires
versioning the configure script. This forces everyone to reconfigure.
Failure to reconfigure would result in linking and build errors on
some systems.
  • Loading branch information
luitjens authored and danpovey committed Jun 11, 2019
1 parent 04cf43b commit c10e02f
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 5 deletions.
21 changes: 19 additions & 2 deletions src/configure
Expand Up @@ -39,7 +39,7 @@

# This should be incremented after any significant change to the configure
# script, i.e. any change affecting kaldi.mk or the build system as a whole.
CONFIGURE_VERSION=10
CONFIGURE_VERSION=11

# We support bash version 3.2 (Macs still ship with this version as of 2019)
# and above.
Expand Down Expand Up @@ -433,22 +433,32 @@ function configure_cuda {
7_*)
MIN_UNSUPPORTED_GCC_VER="5.0"
MIN_UNSUPPORTED_GCC_VER_NUM=50000;
CUSOLVER=false
;;
8_*)
MIN_UNSUPPORTED_GCC_VER="6.0"
MIN_UNSUPPORTED_GCC_VER_NUM=60000;
CUSOLVER=false
;;
9_0 | 9_1)
9_0)
MIN_UNSUPPORTED_GCC_VER="7.0"
MIN_UNSUPPORTED_GCC_VER_NUM=70000;
CUSOLVER=false
;;
9_1)
MIN_UNSUPPORTED_GCC_VER="7.0"
MIN_UNSUPPORTED_GCC_VER_NUM=70000;
CUSOLVER=true
;;
9_2 | 9_* | 10_0)
MIN_UNSUPPORTED_GCC_VER="8.0"
MIN_UNSUPPORTED_GCC_VER_NUM=80000;
CUSOLVER=true
;;
10_1 | 10_*)
MIN_UNSUPPORTED_GCC_VER="9.0"
MIN_UNSUPPORTED_GCC_VER_NUM=90000;
CUSOLVER=true
;;
*)
echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1;
Expand Down Expand Up @@ -492,6 +502,8 @@ function configure_cuda {
echo CUDA = true >> kaldi.mk
echo CUDATKDIR = $CUDATKDIR >> kaldi.mk
echo "CUDA_ARCH = $CUDA_ARCH" >> kaldi.mk


echo >> kaldi.mk

# 64bit/32bit? We do not support cross compilation with CUDA so, use direct
Expand All @@ -512,6 +524,11 @@ WARNING: CUDA will not be used!
CUDA is not supported with 32-bit builds."
exit 1;
fi

#add cusolver flags for newer toolkits
if [[ $CUSOLVER -eq true ]]; then
echo "CUDA_LDLIBS += -lcusolver" >> kaldi.mk
fi

else
echo "\
Expand Down
9 changes: 9 additions & 0 deletions src/cudamatrix/cu-common.h
Expand Up @@ -59,6 +59,15 @@
} \
}

#define CUSOLVER_SAFE_CALL(fun) \
{ \
int32 ret; \
if ((ret = (fun)) != 0) { \
KALDI_ERR << "cusolverStatus_t " << ret << " : \"" << ret << "\" returned from '" << #fun << "'"; \
} \
}


#define CUSPARSE_SAFE_CALL(fun) \
{ \
int32 ret; \
Expand Down
36 changes: 33 additions & 3 deletions src/cudamatrix/cu-device.cc
Expand Up @@ -110,15 +110,21 @@ void CuDevice::Initialize() {
// Initialize CUBLAS.
CUBLAS_SAFE_CALL(cublasCreate(&cublas_handle_));
CUBLAS_SAFE_CALL(cublasSetStream(cublas_handle_, cudaStreamPerThread));

#if CUDA_VERSION >= 9100
CUSOLVER_SAFE_CALL(cusolverDnCreate(&cusolverdn_handle_));
CUSOLVER_SAFE_CALL(cusolverDnSetStream(cusolverdn_handle_,
cudaStreamPerThread));
#endif

#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000
if (device_options_.use_tensor_cores) {
// Enable tensor cores in CUBLAS
// Note if the device does not support tensor cores this will fall back to normal math mode
CUBLAS_SAFE_CALL(cublasSetMathMode(cublas_handle_,
CUBLAS_TENSOR_OP_MATH));
}
#endif
#endif

// Initialize the cuSPARSE library
CUSPARSE_SAFE_CALL(cusparseCreate(&cusparse_handle_));
Expand All @@ -130,6 +136,7 @@ void CuDevice::Initialize() {
// To get same random sequence, call srand() before the constructor is invoked,
CURAND_SAFE_CALL(curandSetGeneratorOrdering(
curand_handle_, CURAND_ORDERING_PSEUDO_DEFAULT));
CURAND_SAFE_CALL(curandSetStream(curand_handle_, cudaStreamPerThread));
SeedGpu();
}
}
Expand Down Expand Up @@ -263,6 +270,23 @@ void CuDevice::FinalizeActiveGpu() {
// Initialize CUBLAS.
CUBLAS_SAFE_CALL(cublasCreate(&cublas_handle_));
CUBLAS_SAFE_CALL(cublasSetStream(cublas_handle_, cudaStreamPerThread));

#if CUDA_VERSION >= 9100
CUSOLVER_SAFE_CALL(cusolverDnCreate(&cusolverdn_handle_));
CUSOLVER_SAFE_CALL(cusolverDnSetStream(cusolverdn_handle_,
cudaStreamPerThread));
#endif

#if CUDA_VERSION >= 9000
if (device_options_.use_tensor_cores) {
// Enable tensor cores in CUBLAS
// Note if the device does not support tensor cores this will fall back to normal math mode
CUBLAS_SAFE_CALL(cublasSetMathMode(cublas_handle_,
CUBLAS_TENSOR_OP_MATH));
}
#endif


// Initialize the cuSPARSE library
CUSPARSE_SAFE_CALL(cusparseCreate(&cusparse_handle_));
CUSPARSE_SAFE_CALL(cusparseSetStream(cusparse_handle_, cudaStreamPerThread));
Expand Down Expand Up @@ -537,7 +561,8 @@ CuDevice::CuDevice():
initialized_(false),
device_id_copy_(-1),
cublas_handle_(NULL),
cusparse_handle_(NULL) {
cusparse_handle_(NULL),
cusolverdn_handle_(NULL) {
}

CuDevice::~CuDevice() {
Expand All @@ -548,6 +573,11 @@ CuDevice::~CuDevice() {
if (curand_handle_) {
CURAND_SAFE_CALL(curandDestroyGenerator(curand_handle_));
}
#if CUDA_VERSION >= 9100
if (cusolverdn_handle_) {
CUSOLVER_SAFE_CALL(cusolverDnDestroy(cusolverdn_handle_));
}
#endif
}


Expand Down
22 changes: 22 additions & 0 deletions src/cudamatrix/cu-device.h
Expand Up @@ -37,6 +37,16 @@
#include "cudamatrix/cu-allocator.h"
#include "cudamatrix/cu-common.h"

#if CUDA_VERSION >= 9100
#include <cusolverDn.h>
#else
// cusolver not supported.
// Setting a few types to minimize compiler guards.
// If a user tries to use cusovler it will throw an error.
typedef void* cusolverDnHandle_t;
typedef int cusolverStatus_t;
#endif

namespace kaldi {

class CuTimer;
Expand Down Expand Up @@ -83,6 +93,13 @@ class CuDevice {
inline cublasHandle_t GetCublasHandle() { return cublas_handle_; }
inline cusparseHandle_t GetCusparseHandle() { return cusparse_handle_; }
inline curandGenerator_t GetCurandHandle() { return curand_handle_; }
inline cusolverDnHandle_t GetCusolverDnHandle() {
#if CUDA_VERSION < 9100
KALDI_ERR << "CUDA VERSION '" << CUDA_VERSION << "' not new enough to support "
<< "cusolver. Upgrade to at least 9.1";
#endif
return cusolverdn_handle_;
}

inline void SeedGpu() {
if (CuDevice::Instantiate().Enabled()) {
Expand Down Expand Up @@ -304,6 +321,7 @@ class CuDevice {
cublasHandle_t cublas_handle_;
cusparseHandle_t cusparse_handle_;
curandGenerator_t curand_handle_;
cusolverDnHandle_t cusolverdn_handle_;
}; // class CuDevice


Expand All @@ -322,6 +340,10 @@ inline cublasHandle_t GetCublasHandle() {
return CuDevice::Instantiate().GetCublasHandle();
}

inline cusolverDnHandle_t GetCusolverDnHandle() {
return CuDevice::Instantiate().GetCusolverDnHandle();
}

// A more convenient way to get the handle to use cuSPARSE APIs.
inline cusparseHandle_t GetCusparseHandle() {
return CuDevice::Instantiate().GetCusparseHandle();
Expand Down

0 comments on commit c10e02f

Please sign in to comment.