|
| 1 | +/*===---- cuda_runtime.h - CUDA runtime support ----------------------------=== |
| 2 | + * |
| 3 | + * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 4 | + * of this software and associated documentation files (the "Software"), to deal |
| 5 | + * in the Software without restriction, including without limitation the rights |
| 6 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 7 | + * copies of the Software, and to permit persons to whom the Software is |
| 8 | + * furnished to do so, subject to the following conditions: |
| 9 | + * |
| 10 | + * The above copyright notice and this permission notice shall be included in |
| 11 | + * all copies or substantial portions of the Software. |
| 12 | + * |
| 13 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 14 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 15 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 16 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 17 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 18 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 19 | + * THE SOFTWARE. |
| 20 | + * |
| 21 | + *===-----------------------------------------------------------------------=== |
| 22 | + */ |
| 23 | + |
| 24 | +#ifndef __CLANG_CUDA_RUNTIME_H__ |
| 25 | +#define __CLANG_CUDA_RUNTIME_H__ |
| 26 | + |
| 27 | +#if defined(__CUDA__) && defined(__clang__) |
| 28 | + |
| 29 | +// Include some standard headers to avoid CUDA headers including them |
| 30 | +// while some required macros (like __THROW) are in a weird state. |
| 31 | +#include <stdlib.h> |
| 32 | + |
| 33 | +// Preserve common macros that will be changed below by us or by CUDA |
| 34 | +// headers. |
| 35 | +#pragma push_macro("__THROW") |
| 36 | +#pragma push_macro("__CUDA_ARCH__") |
| 37 | + |
| 38 | +// WARNING: Preprocessor hacks below are based on specific of |
| 39 | +// implementation of CUDA-7.x headers and are expected to break with |
| 40 | +// any other version of CUDA headers. |
| 41 | +#include "cuda.h" |
| 42 | +#if !defined(CUDA_VERSION) |
| 43 | +#error "cuda.h did not define CUDA_VERSION" |
| 44 | +#elif CUDA_VERSION < 7000 || CUDA_VERSION > 7050 |
| 45 | +#error "Unsupported CUDA version!" |
| 46 | +#endif |
| 47 | + |
| 48 | +// Make largest subset of device functions available during host |
| 49 | +// compilation -- SM_35 for the time being. |
| 50 | +#ifndef __CUDA_ARCH__ |
| 51 | +#define __CUDA_ARCH__ 350 |
| 52 | +#endif |
| 53 | + |
| 54 | +#include "cuda_builtin_vars.h" |
| 55 | + |
| 56 | +// No need for device_launch_parameters.h as cuda_builtin_vars.h above |
| 57 | +// has taken care of builtin variables declared in the file. |
| 58 | +#define __DEVICE_LAUNCH_PARAMETERS_H__ |
| 59 | + |
| 60 | +// {math,device}_functions.h only have declarations of the |
| 61 | +// functions. We don't need them as we're going to pull in their |
| 62 | +// definitions from .hpp files. |
| 63 | +#define __DEVICE_FUNCTIONS_H__ |
| 64 | +#define __MATH_FUNCTIONS_H__ |
| 65 | + |
| 66 | +#undef __CUDACC__ |
| 67 | +#define __CUDABE__ |
| 68 | +// Disables definitions of device-side runtime support stubs in |
| 69 | +// cuda_device_runtime_api.h |
| 70 | +#define __CUDADEVRT_INTERNAL__ |
| 71 | +#include "host_config.h" |
| 72 | +#include "host_defines.h" |
| 73 | +#include "driver_types.h" |
| 74 | +#include "common_functions.h" |
| 75 | +#undef __CUDADEVRT_INTERNAL__ |
| 76 | + |
| 77 | +#undef __CUDABE__ |
| 78 | +#define __CUDACC__ |
| 79 | +#include_next "cuda_runtime.h" |
| 80 | + |
| 81 | +#undef __CUDACC__ |
| 82 | +#define __CUDABE__ |
| 83 | + |
| 84 | +// CUDA headers use __nvvm_memcpy and __nvvm_memset which clang does |
| 85 | +// not have at the moment. Emulate them with a builtin memcpy/memset. |
| 86 | +#define __nvvm_memcpy(s,d,n,a) __builtin_memcpy(s,d,n) |
| 87 | +#define __nvvm_memset(d,c,n,a) __builtin_memset(d,c,n) |
| 88 | + |
| 89 | +#include "crt/host_runtime.h" |
| 90 | +#include "crt/device_runtime.h" |
| 91 | +// device_runtime.h defines __cxa_* macros that will conflict with |
| 92 | +// cxxabi.h. |
| 93 | +// FIXME: redefine these as __device__ functions. |
| 94 | +#undef __cxa_vec_ctor |
| 95 | +#undef __cxa_vec_cctor |
| 96 | +#undef __cxa_vec_dtor |
| 97 | +#undef __cxa_vec_new2 |
| 98 | +#undef __cxa_vec_new3 |
| 99 | +#undef __cxa_vec_delete2 |
| 100 | +#undef __cxa_vec_delete |
| 101 | +#undef __cxa_vec_delete3 |
| 102 | +#undef __cxa_pure_virtual |
| 103 | + |
| 104 | +// We need decls for functions in CUDA's libdevice woth __device__ |
| 105 | +// attribute only. Alas they come either as __host__ __device__ or |
| 106 | +// with no attributes at all. To work around that, define __CUDA_RTC__ |
| 107 | +// which produces HD variant and undef __host__ which gives us desided |
| 108 | +// decls with __device__ attribute. |
| 109 | +#pragma push_macro("__host__") |
| 110 | +#define __host__ |
| 111 | +#define __CUDACC_RTC__ |
| 112 | +#include "device_functions_decls.h" |
| 113 | +#undef __CUDACC_RTC__ |
| 114 | + |
| 115 | +// Temporarily poison __host__ macro to ensure it's not used by any of |
| 116 | +// the headers we're about to include. |
| 117 | +#define __host__ UNEXPECTED_HOST_ATTRIBUTE |
| 118 | + |
| 119 | +// device_functions.hpp and math_functions*.hpp use 'static |
| 120 | +// __forceinline__' (with no __device__) for definitions of device |
| 121 | +// functions. Temporarily redefine __forceinline__ to include |
| 122 | +// __device__. |
| 123 | +#pragma push_macro("__forceinline__") |
| 124 | +#define __forceinline__ __device__ __inline__ __attribute__((always_inline)) |
| 125 | +#include "device_functions.hpp" |
| 126 | +#include "math_functions.hpp" |
| 127 | +#include "math_functions_dbl_ptx3.hpp" |
| 128 | +#pragma pop_macro("__forceinline__") |
| 129 | + |
| 130 | +// For some reason single-argument variant is not always declared by |
| 131 | +// CUDA headers. Alas, device_functions.hpp included below needs it. |
| 132 | +static inline __device__ void __brkpt(int c) { __brkpt(); } |
| 133 | + |
| 134 | +// Now include *.hpp with definitions of various GPU functions. Alas, |
| 135 | +// a lot of thins get declared/defined with __host__ attribute which |
| 136 | +// we don't want and we have to define it out. We also have to include |
| 137 | +// {device,math}_functions.hpp again in order to extract the other |
| 138 | +// branch of #if/else inside. |
| 139 | + |
| 140 | +#define __host__ |
| 141 | +#undef __CUDABE__ |
| 142 | +#define __CUDACC__ |
| 143 | +#undef __DEVICE_FUNCTIONS_HPP__ |
| 144 | +#include "device_functions.hpp" |
| 145 | +#include "device_atomic_functions.hpp" |
| 146 | +#include "sm_20_atomic_functions.hpp" |
| 147 | +#include "sm_32_atomic_functions.hpp" |
| 148 | +#include "sm_20_intrinsics.hpp" |
| 149 | +// sm_30_intrinsics.h has declarations that use default argument, so |
| 150 | +// we have to include it and it will in turn include .hpp |
| 151 | +#include "sm_30_intrinsics.h" |
| 152 | +#include "sm_32_intrinsics.hpp" |
| 153 | +#undef __MATH_FUNCTIONS_HPP__ |
| 154 | +#include "math_functions.hpp" |
| 155 | +#pragma pop_macro("__host__") |
| 156 | + |
| 157 | +#include "texture_indirect_functions.h" |
| 158 | + |
| 159 | +// Restore state of __CUDA_ARCH__ and __THROW we had on entry. |
| 160 | +#pragma pop_macro("__CUDA_ARCH__") |
| 161 | +#pragma pop_macro("__THROW") |
| 162 | + |
| 163 | +// Set up compiler macros expected to be seen during compilation. |
| 164 | +#undef __CUDABE__ |
| 165 | +#define __CUDACC__ |
| 166 | +#define __NVCC__ |
| 167 | + |
| 168 | +#if defined(__CUDA_ARCH__) |
| 169 | +// We need to emit IR declaration for non-existing __nvvm_reflect to |
| 170 | +// let backend know that it should be treated as const nothrow |
| 171 | +// function which is implicitly assumed by NVVMReflect pass. |
| 172 | +extern "C" __device__ __attribute__((const)) int __nvvm_reflect(const void *); |
| 173 | +static __device__ __attribute__((used)) int __nvvm_reflect_anchor() { |
| 174 | + return __nvvm_reflect("NONE"); |
| 175 | +} |
| 176 | +#endif |
| 177 | + |
| 178 | +#endif // __CUDA__ |
| 179 | +#endif // __CLANG_CUDA_RUNTIME_H__ |
0 commit comments