Skip to content

Commit

Permalink
[CUDA] Add conversion operators for threadIdx, blockIdx, gridDim, and…
Browse files Browse the repository at this point in the history
… blockDim to uint3 and dim3.

Summary:
This lets you write, e.g.

  uint3 a = threadIdx;
  uint3 b = blockIdx;
  dim3 c = gridDim;
  dim3 d = blockDim;

which is legal in nvcc, but was not legal in clang.

The fact that e.g. the type of threadIdx is not actually uint3 is still
observable, but now you have to try to observe it.

Reviewers: tra

Subscribers: echristo, cfe-commits

Differential Revision: http://reviews.llvm.org/D17561

llvm-svn: 261777
  • Loading branch information
Justin Lebar committed Feb 24, 2016
1 parent c8dae53 commit d7a3549
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
27 changes: 27 additions & 0 deletions clang/lib/Headers/__clang_cuda_runtime_wrapper.h
Expand Up @@ -245,6 +245,33 @@ __device__ static inline void *malloc(size_t __size) {
}
} // namespace std

// Out-of-line implementations from cuda_builtin_vars.h. These need to come
// after we've pulled in the definition of uint3 and dim3.

__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
uint3 ret;
ret.x = x;
ret.y = y;
ret.z = z;
return ret;
}

__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
uint3 ret;
ret.x = x;
ret.y = y;
ret.z = z;
return ret;
}

__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
return dim3(x, y, z);
}

__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
return dim3(x, y, z);
}

#include <__clang_cuda_cmath.h>

// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host
Expand Down
18 changes: 17 additions & 1 deletion clang/lib/Headers/cuda_builtin_vars.h
Expand Up @@ -24,10 +24,14 @@
#ifndef __CUDA_BUILTIN_VARS_H
#define __CUDA_BUILTIN_VARS_H

// Forward declares from vector_types.h.
struct uint3;
struct dim3;

// The file implements built-in CUDA variables using __declspec(property).
// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
// All read accesses of built-in variable fields get converted into calls to a
// getter function which in turn would call appropriate builtin to fetch the
// getter function which in turn calls the appropriate builtin to fetch the
// value.
//
// Example:
Expand Down Expand Up @@ -63,6 +67,9 @@ struct __cuda_builtin_threadIdx_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
// threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
// uint3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
};
Expand All @@ -71,6 +78,9 @@ struct __cuda_builtin_blockIdx_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
// blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
// uint3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
};
Expand All @@ -79,6 +89,9 @@ struct __cuda_builtin_blockDim_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
// blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
// dim3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
};
Expand All @@ -87,6 +100,9 @@ struct __cuda_builtin_gridDim_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
// gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
// dim3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
};
Expand Down

0 comments on commit d7a3549

Please sign in to comment.