This repository has been archived by the owner on Apr 23, 2020. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[cuda] Added support for CUDA built-in variables.
Added cuda_builtin_vars.h which implements built-in CUDA variables using __declattr(property). Fields of built-in variables (except for warpSize) are implemented using __declattr(property) which replaces read/write of a member field with a call to a getter/setter member function, in this case with appropriate NVPTX builtin. Added a test case to check diagnostics on attempt to construct or improperly access a built-in variable. Differential Revision: http://reviews.llvm.org/D9064 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@235448 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Showing
4 changed files
with
196 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ set(files | |
bmi2intrin.h | ||
bmiintrin.h | ||
cpuid.h | ||
cuda_builtin_vars.h | ||
emmintrin.h | ||
f16cintrin.h | ||
float.h | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
* in the Software without restriction, including without limitation the rights | ||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in | ||
* all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
* THE SOFTWARE. | ||
* | ||
*===-----------------------------------------------------------------------=== | ||
*/ | ||
|
||
#ifndef __CUDA_BUILTIN_VARS_H | ||
#define __CUDA_BUILTIN_VARS_H | ||
|
||
// The file implements built-in CUDA variables using __declspec(property). | ||
// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx | ||
// All read accesses of built-in variable fields get converted into calls to a | ||
// getter function which in turn would call appropriate builtin to fetch the | ||
// value. | ||
// | ||
// Example: | ||
// int x = threadIdx.x; | ||
// IR output: | ||
// %0 = call i32 @llvm.ptx.read.tid.x() #3 | ||
// PTX output: | ||
// mov.u32 %r2, %tid.x; | ||
|
||
#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ | ||
__declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ | ||
static inline __attribute__((always_inline)) \ | ||
__attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ | ||
return INTRINSIC; \ | ||
} | ||
|
||
#if __cplusplus >= 201103L | ||
#define __DELETE =delete | ||
#else | ||
#define __DELETE | ||
#endif | ||
|
||
// Make sure nobody can create instances of the special varible types. nvcc | ||
// also disallows taking address of special variables, so we disable address-of | ||
// operator as well. | ||
#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ | ||
__attribute__((device)) TypeName() __DELETE; \ | ||
__attribute__((device)) TypeName(const TypeName &) __DELETE; \ | ||
__attribute__((device)) void operator=(const TypeName &) const __DELETE; \ | ||
__attribute__((device)) TypeName *operator&() const __DELETE | ||
|
||
struct __cuda_builtin_threadIdx_t { | ||
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x()); | ||
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y()); | ||
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z()); | ||
private: | ||
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); | ||
}; | ||
|
||
struct __cuda_builtin_blockIdx_t { | ||
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x()); | ||
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y()); | ||
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z()); | ||
private: | ||
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); | ||
}; | ||
|
||
struct __cuda_builtin_blockDim_t { | ||
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x()); | ||
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y()); | ||
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z()); | ||
private: | ||
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); | ||
}; | ||
|
||
struct __cuda_builtin_gridDim_t { | ||
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x()); | ||
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y()); | ||
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z()); | ||
private: | ||
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); | ||
}; | ||
|
||
#define __CUDA_BUILTIN_VAR \ | ||
extern const __attribute__((device)) __attribute__((weak)) | ||
__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; | ||
__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; | ||
__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; | ||
__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; | ||
|
||
// warpSize should translate to read of %WARP_SZ but there's currently no | ||
// builtin to do so. According to PTX v4.2 docs 'to date, all target | ||
// architectures have a WARP_SZ value of 32'. | ||
__attribute__((device)) const int warpSize = 32; | ||
|
||
#undef __CUDA_DEVICE_BUILTIN | ||
#undef __CUDA_BUILTIN_VAR | ||
#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS | ||
|
||
#endif /* __CUDA_BUILTIN_VARS_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
// RUN: %clang_cc1 "-triple" "nvptx-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s | ||
|
||
#include "cuda_builtin_vars.h" | ||
|
||
// CHECK: define void @_Z6kernelPi(i32* %out) | ||
__attribute__((global)) | ||
void kernel(int *out) { | ||
int i = 0; | ||
out[i++] = threadIdx.x; // CHECK: call i32 @llvm.ptx.read.tid.x() | ||
out[i++] = threadIdx.y; // CHECK: call i32 @llvm.ptx.read.tid.y() | ||
out[i++] = threadIdx.z; // CHECK: call i32 @llvm.ptx.read.tid.z() | ||
|
||
out[i++] = blockIdx.x; // CHECK: call i32 @llvm.ptx.read.ctaid.x() | ||
out[i++] = blockIdx.y; // CHECK: call i32 @llvm.ptx.read.ctaid.y() | ||
out[i++] = blockIdx.z; // CHECK: call i32 @llvm.ptx.read.ctaid.z() | ||
|
||
out[i++] = blockDim.x; // CHECK: call i32 @llvm.ptx.read.ntid.x() | ||
out[i++] = blockDim.y; // CHECK: call i32 @llvm.ptx.read.ntid.y() | ||
out[i++] = blockDim.z; // CHECK: call i32 @llvm.ptx.read.ntid.z() | ||
|
||
out[i++] = gridDim.x; // CHECK: call i32 @llvm.ptx.read.nctaid.x() | ||
out[i++] = gridDim.y; // CHECK: call i32 @llvm.ptx.read.nctaid.y() | ||
out[i++] = gridDim.z; // CHECK: call i32 @llvm.ptx.read.nctaid.z() | ||
|
||
out[i++] = warpSize; // CHECK: store i32 32, | ||
|
||
// CHECK: ret void | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// RUN: %clang_cc1 "-triple" "nvptx-nvidia-cuda" -fcuda-is-device -fsyntax-only -verify %s | ||
|
||
#include "cuda_builtin_vars.h" | ||
__attribute__((global)) | ||
void kernel(int *out) { | ||
int i = 0; | ||
out[i++] = threadIdx.x; | ||
threadIdx.x = 0; // expected-error {{no setter defined for property 'x'}} | ||
out[i++] = threadIdx.y; | ||
threadIdx.y = 0; // expected-error {{no setter defined for property 'y'}} | ||
out[i++] = threadIdx.z; | ||
threadIdx.z = 0; // expected-error {{no setter defined for property 'z'}} | ||
|
||
out[i++] = blockIdx.x; | ||
blockIdx.x = 0; // expected-error {{no setter defined for property 'x'}} | ||
out[i++] = blockIdx.y; | ||
blockIdx.y = 0; // expected-error {{no setter defined for property 'y'}} | ||
out[i++] = blockIdx.z; | ||
blockIdx.z = 0; // expected-error {{no setter defined for property 'z'}} | ||
|
||
out[i++] = blockDim.x; | ||
blockDim.x = 0; // expected-error {{no setter defined for property 'x'}} | ||
out[i++] = blockDim.y; | ||
blockDim.y = 0; // expected-error {{no setter defined for property 'y'}} | ||
out[i++] = blockDim.z; | ||
blockDim.z = 0; // expected-error {{no setter defined for property 'z'}} | ||
|
||
out[i++] = gridDim.x; | ||
gridDim.x = 0; // expected-error {{no setter defined for property 'x'}} | ||
out[i++] = gridDim.y; | ||
gridDim.y = 0; // expected-error {{no setter defined for property 'y'}} | ||
out[i++] = gridDim.z; | ||
gridDim.z = 0; // expected-error {{no setter defined for property 'z'}} | ||
|
||
out[i++] = warpSize; | ||
warpSize = 0; // expected-error {{cannot assign to variable 'warpSize' with const-qualified type 'const int'}} | ||
// expected-note@cuda_builtin_vars.h:104 {{variable 'warpSize' declared const here}} | ||
|
||
// Make sure we can't construct or assign to the special variables. | ||
__cuda_builtin_threadIdx_t x; // expected-error {{calling a private constructor of class '__cuda_builtin_threadIdx_t'}} | ||
// expected-note@cuda_builtin_vars.h:67 {{declared private here}} | ||
|
||
__cuda_builtin_threadIdx_t y = threadIdx; // expected-error {{calling a private constructor of class '__cuda_builtin_threadIdx_t'}} | ||
// expected-note@cuda_builtin_vars.h:67 {{declared private here}} | ||
|
||
threadIdx = threadIdx; // expected-error {{'operator=' is a private member of '__cuda_builtin_threadIdx_t'}} | ||
// expected-note@cuda_builtin_vars.h:67 {{declared private here}} | ||
|
||
void *ptr = &threadIdx; // expected-error {{'operator&' is a private member of '__cuda_builtin_threadIdx_t'}} | ||
// expected-note@cuda_builtin_vars.h:67 {{declared private here}} | ||
|
||
// Following line should've caused an error as one is not allowed to | ||
// take address of a built-in variable in CUDA. Alas there's no way | ||
// to prevent getting address of a 'const int', so the line | ||
// currently compiles without errors or warnings. | ||
const void *wsptr = &warpSize; | ||
} |