Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Finally Cuda 9.0 and Cuda 9.1
  • Loading branch information
kunzmi committed Feb 5, 2018
1 parent 7031120 commit a92cb8a
Show file tree
Hide file tree
Showing 37 changed files with 17,677 additions and 6,667 deletions.
378 changes: 278 additions & 100 deletions CudaBlas/CudaBlasHandler.cs

Large diffs are not rendered by default.

221 changes: 183 additions & 38 deletions CudaBlas/CudaBlasNativeMethods.cs
Expand Up @@ -35,9 +35,9 @@ public static class CudaBlasNativeMethods
{
//unfortunately Nvidia provides different dll-names for x86 and x64. Use preprocessor macro to switch names:
#if _x64
internal const string CUBLAS_API_DLL_NAME = "cublas64_80";
internal const string CUBLAS_API_DLL_NAME = "cublas64_91";
#else
internal const string CUBLAS_API_DLL_NAME = "cublas32_80";
internal const string CUBLAS_API_DLL_NAME = "cublas32_91";
#endif

#region Basics
Expand Down Expand Up @@ -78,29 +78,38 @@ public static class CudaBlasNativeMethods
/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasSetAtomicsMode(CudaBlasHandle handle, AtomicsMode mode);

#endregion

#region Set and Get

/// <summary>
/// copies n elements from a vector x in CPU memory space to a vector y
/// in GPU memory space. Elements in both vectors are assumed to have a
/// size of elemSize bytes. Storage spacing between consecutive elements
/// is incx for the source vector x and incy for the destination vector
/// y. In general, y points to an object, or part of an object, allocated
/// via cublasAlloc(). Column major format for two-dimensional matrices
/// is assumed throughout CUBLAS. Therefore, if the increment for a vector
/// is equal to 1, this access a column vector while using an increment
/// equal to the leading dimension of the respective matrix accesses a
/// row vector.
/// </summary>
/// <returns>
/// CudaBlas Error Codes: <see cref="CublasStatus.Success"/>, <see cref="CublasStatus.InvalidValue"/>,
/// <see cref="CublasStatus.MappingError"/>, <see cref="CublasStatus.NotInitialized"/>.
/// </returns>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasSetAtomicsMode(CudaBlasHandle handle, AtomicsMode mode);

/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasGetMathMode(CudaBlasHandle handle, ref Math mode);
/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasSetMathMode(CudaBlasHandle handle, Math mode);

#endregion

#region Set and Get

/// <summary>
/// copies n elements from a vector x in CPU memory space to a vector y
/// in GPU memory space. Elements in both vectors are assumed to have a
/// size of elemSize bytes. Storage spacing between consecutive elements
/// is incx for the source vector x and incy for the destination vector
/// y. In general, y points to an object, or part of an object, allocated
/// via cublasAlloc(). Column major format for two-dimensional matrices
/// is assumed throughout CUBLAS. Therefore, if the increment for a vector
/// is equal to 1, this access a column vector while using an increment
/// equal to the leading dimension of the respective matrix accesses a
/// row vector.
/// </summary>
/// <returns>
/// CudaBlas Error Codes: <see cref="CublasStatus.Success"/>, <see cref="CublasStatus.InvalidValue"/>,
/// <see cref="CublasStatus.MappingError"/>, <see cref="CublasStatus.NotInitialized"/>.
/// </returns>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasSetVector(int n, int elemSize, [In] IntPtr x, int incx, CUdeviceptr devicePtr, int incy);

/// <summary>
Expand Down Expand Up @@ -3103,12 +3112,48 @@ public static class CudaBlasNativeMethods
int ldb,
CUdeviceptr beta, /* host or device pointer */
CUdeviceptr C,
int ldc);

/* IO in FP16/FP32, computation in float */
/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
int ldc);
/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasHgemmBatched(CudaBlasHandle handle,
Operation transa,
Operation transb,
int m,
int n,
int k,
ref half alpha, /* host or device pointer */
CUdeviceptr A,
int lda,
CUdeviceptr B,
int ldb,
ref half beta, /* host or device pointer */
CUdeviceptr C,
int ldc,
int batchCount);
/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasHgemmBatched(CudaBlasHandle handle,
Operation transa,
Operation transb,
int m,
int n,
int k,
CUdeviceptr alpha, /* host or device pointer */
CUdeviceptr A,
int lda,
CUdeviceptr B,
int ldb,
CUdeviceptr beta, /* host or device pointer */
CUdeviceptr C,
int ldc,
int batchCount);

/* IO in FP16/FP32, computation in float */
/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasSgemmEx (CudaBlasHandle handle,
Operation transa,
Operation transb,
Expand Down Expand Up @@ -4918,14 +4963,64 @@ public static class CudaBlasNativeMethods
CUdeviceptr beta, /* host or device pointer */
CUdeviceptr Carray,
int ldc,
int batchCount);




/// <summary>
int batchCount);


/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasGemmBatchedEx(CudaBlasHandle handle,
Operation transa,
Operation transb,
int m,
int n,
int k,
CUdeviceptr alpha, /* host or device pointer */
CUdeviceptr Aarray,
cudaDataType Atype,
int lda,
CUdeviceptr Barray,
cudaDataType Btype,
int ldb,
CUdeviceptr beta, /* host or device pointer */
CUdeviceptr Carray,
cudaDataType Ctype,
int ldc,
int batchCount,
cudaDataType computeType,
GemmAlgo algo);

/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasGemmStridedBatchedEx(CudaBlasHandle handle,
Operation transa,
Operation transb,
int m,
int n,
int k,
CUdeviceptr alpha, /* host or device pointer */
CUdeviceptr A,
cudaDataType Atype,
int lda,
long strideA, /* purposely signed */
CUdeviceptr B,
cudaDataType Btype,
int ldb,
long strideB,
CUdeviceptr beta, /* host or device pointer */
CUdeviceptr C,
cudaDataType Ctype,
int ldc,
long strideC,
int batchCount,
cudaDataType computeType,
GemmAlgo algo);


/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasSgemmStridedBatched (CudaBlasHandle handle,
Operation transa,
Operation transb,
Expand Down Expand Up @@ -5115,10 +5210,60 @@ public static class CudaBlasNativeMethods
int ldc,
int batchCount);

/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasGemmBatchedEx(CudaBlasHandle handle,
Operation transa,
Operation transb,
int m,
int n,
int k,
IntPtr alpha, /* host or device pointer */
CUdeviceptr Aarray,
cudaDataType Atype,
int lda,
CUdeviceptr Barray,
cudaDataType Btype,
int ldb,
IntPtr beta, /* host or device pointer */
CUdeviceptr Carray,
cudaDataType Ctype,
int ldc,
int batchCount,
cudaDataType computeType,
GemmAlgo algo);

/// <summary>
/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasGemmStridedBatchedEx(CudaBlasHandle handle,
Operation transa,
Operation transb,
int m,
int n,
int k,
IntPtr alpha, /* host or device pointer */
CUdeviceptr A,
cudaDataType Atype,
int lda,
long strideA, /* purposely signed */
CUdeviceptr B,
cudaDataType Btype,
int ldb,
long strideB,
IntPtr beta, /* host or device pointer */
CUdeviceptr C,
cudaDataType Ctype,
int ldc,
long strideC,
int batchCount,
cudaDataType computeType,
GemmAlgo algo);

/// <summary>
/// </summary>
[DllImport(CUBLAS_API_DLL_NAME)]
public static extern CublasStatus cublasSgemmStridedBatched(CudaBlasHandle handle,
Operation transa,
Operation transb,
Expand Down
77 changes: 69 additions & 8 deletions CudaBlas/CudaBlasTypes.cs
Expand Up @@ -192,7 +192,7 @@ public enum GemmAlgo
{
/// <summary>
/// </summary>
Default = 1,
Default = -1,
/// <summary>
/// </summary>
Algo0 = 0,
Expand All @@ -216,15 +216,76 @@ public enum GemmAlgo
Algo6 = 6,
/// <summary>
/// </summary>
Algo7 = 7
Algo7 = 7,
/// <summary>
/// </summary>
Algo8 = 8,
/// <summary>
/// </summary>
Algo9 = 9,
/// <summary>
/// </summary>
Algo10 = 10,
/// <summary>
/// </summary>
Algo11 = 11,
/// <summary>
/// </summary>
Algo12 = 12,
/// <summary>
/// </summary>
Algo13 = 13,
/// <summary>
/// </summary>
Algo14 = 14,
/// <summary>
/// </summary>
Algo15 = 15,
/// <summary>
/// </summary>
Algo16 = 16,
/// <summary>
/// </summary>
Algo17 = 17,
/// <summary>
/// </summary>
DefaultTensorOp = 99,
/// <summary>
/// </summary>
Algo0TensorOp = 100,
/// <summary>
/// </summary>
Algo1TensorOp = 101,
/// <summary>
/// </summary>
Algo2TensorOp = 102,
/// <summary>
/// </summary>
Algo3TensorOp = 103,
/// <summary>
/// </summary>
Algo4TensorOp = 104
}

/// <summary>
/// The cublasDataType_t type is an enumerant to specify the data precision. It is used
/// when the data reference does not carry the type itself (e.g void *).
/// To mimic the typedef in cublas_api.h, we redefine the enum identically to cudaDataType
/// </summary>
public enum DataType
/// <summary>
/// Enum for default math mode/tensor operation
/// </summary>
public enum Math
{
/// <summary>
/// </summary>
DefaultMath = 0,
/// <summary>
/// </summary>
TensorOpMath = 1
}

/// <summary>
/// The cublasDataType_t type is an enumerant to specify the data precision. It is used
/// when the data reference does not carry the type itself (e.g void *).
/// To mimic the typedef in cublas_api.h, we redefine the enum identically to cudaDataType
/// </summary>
public enum DataType
{
///// <summary>
///// the data type is 32-bit floating-point
Expand Down
4 changes: 2 additions & 2 deletions CudaFFT/CudaFFTNativeMethods.cs
Expand Up @@ -35,9 +35,9 @@ public static class CudaFFTNativeMethods
{
//unfortunately Nvidia provides different dll-names for x86 and x64. Use preprocessor macro to switch names:
#if _x64
internal const string CUFFT_API_DLL_NAME = "cufft64_80";
internal const string CUFFT_API_DLL_NAME = "cufft64_91";
#else
internal const string CUFFT_API_DLL_NAME = "cufft32_80";
internal const string CUFFT_API_DLL_NAME = "cufft32_91";
#endif


Expand Down
4 changes: 2 additions & 2 deletions CudaRand/CudaRandNativeMethods.cs
Expand Up @@ -36,9 +36,9 @@ public static class CudaRandNativeMethods
{
//unfortunately Nvidia provides different dll-names for x86 and x64. Use preprocessor macro to switch names:
#if _x64
internal const string CURAND_API_DLL_NAME = "curand64_80";
internal const string CURAND_API_DLL_NAME = "curand64_91";
#else
internal const string CURAND_API_DLL_NAME = "curand32_80";
internal const string CURAND_API_DLL_NAME = "curand32_91";
#endif


Expand Down
1 change: 1 addition & 0 deletions CudaSparse/CudaSparse.csproj
Expand Up @@ -127,6 +127,7 @@
<Compile Include="CudaSparseHybMat.cs" />
<Compile Include="CudaSparseMatrixDescriptor.cs" />
<Compile Include="CudaSparseNativeMethods.cs" />
<Compile Include="CudaSparsePruneInfo.cs" />
<Compile Include="CudaSparseSolveAnalysisInfo.cs" />
<Compile Include="CudaSparseTypes.cs" />
<Compile Include="CudaSparseCsrsv2Info.cs" />
Expand Down

0 comments on commit a92cb8a

Please sign in to comment.