diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 7819e71d7fe2a..8d3c5e69d55cf 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -159,6 +159,7 @@ BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") BUILTIN(__nvvm_prmt, "UiUiUiUi", "") BUILTIN(__nvvm_exit, "v", "r") +BUILTIN(__nvvm_reflect, "UicC*", "r") TARGET_BUILTIN(__nvvm_nanosleep, "vUi", "n", AND(SM_70, PTX63)) // Min Max diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index ad7c27f2d60d2..4dba7670b5c43 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -44,6 +44,14 @@ __device__ int read_tid() { } +__device__ bool reflect() { + +// CHECK: call i32 @llvm.nvvm.reflect(ptr {{.*}}) + + unsigned x = __nvvm_reflect("__CUDA_ARCH"); + return x >= 700; +} + __device__ int read_ntid() { // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl new file mode 100644 index 0000000000000..9ae4a5f027d35 --- /dev/null +++ b/clang/test/CodeGenOpenCL/reflect.cl @@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s + +// CHECK-LABEL: define dso_local zeroext i1 @device_function( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.reflect(ptr addrspacecast (ptr addrspace(4) @.str to ptr)) +// CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[TMP0]], 700 +// CHECK-NEXT: ret i1 [[CMP]] +// +bool device_function() { + return __nvvm_reflect("__CUDA_ARCH") >= 700; +} + +// CHECK-LABEL: define dso_local spir_kernel void @kernel_function( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3:[0-9]+]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4 +// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP0]], align 4 +// CHECK-NEXT: ret void +// +__kernel void kernel_function(__global int *i) { + *i = device_function(); +} diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index d825dc8215643..726cea004606e 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1625,7 +1625,8 @@ def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">; def int_nvvm_reflect : - Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.reflect">; + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">, + ClangBuiltin<"__nvvm_reflect">; // isspacep.{const, global, local, shared} def int_nvvm_isspacep_const diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll index 1cb5c87fae826..46ab79d9858ca 100644 --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll @@ -41,7 +41,7 @@ exit: ret float %ret } -declare i32 @llvm.nvvm.reflect.p0(ptr) +declare i32 @llvm.nvvm.reflect(ptr) ; CHECK-LABEL: define noundef i32 @intrinsic define i32 @intrinsic() { @@ -49,7 +49,7 @@ define i32 @intrinsic() { ; USE_FTZ_0: ret i32 0 ; USE_FTZ_1: ret i32 1 %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str) - %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr) + %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr) ret i32 %reflect } diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll index 9b1939f372082..2ed9f7c11bcf9 100644 --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -41,7 +41,7 @@ exit: ret float %ret } -declare i32 @llvm.nvvm.reflect.p0(ptr) +declare i32 @llvm.nvvm.reflect(ptr) ; CHECK-LABEL: define noundef i32 @intrinsic define i32 @intrinsic() { @@ -49,7 +49,7 @@ define i32 @intrinsic() { ; USE_FTZ_0: ret i32 0 ; USE_FTZ_1: ret i32 1 %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str) - %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr) + %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr) ret i32 %reflect }