diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 7ffb38d50a6cf..3275d50a85a4b 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -817,6 +817,7 @@ BUILTIN(__nvvm_compiler_error, "vcC*4", "n") BUILTIN(__nvvm_compiler_warn, "vcC*4", "n") BUILTIN(__nvvm_ldu_c, "ccC*", "") +BUILTIN(__nvvm_ldu_sc, "ScScC*", "") BUILTIN(__nvvm_ldu_s, "ssC*", "") BUILTIN(__nvvm_ldu_i, "iiC*", "") BUILTIN(__nvvm_ldu_l, "LiLiC*", "") @@ -833,11 +834,14 @@ BUILTIN(__nvvm_ldu_f, "ffC*", "") BUILTIN(__nvvm_ldu_d, "ddC*", "") BUILTIN(__nvvm_ldu_c2, "E2cE2cC*", "") +BUILTIN(__nvvm_ldu_sc2, "E2ScE2ScC*", "") BUILTIN(__nvvm_ldu_c4, "E4cE4cC*", "") +BUILTIN(__nvvm_ldu_sc4, "E4ScE4ScC*", "") BUILTIN(__nvvm_ldu_s2, "E2sE2sC*", "") BUILTIN(__nvvm_ldu_s4, "E4sE4sC*", "") BUILTIN(__nvvm_ldu_i2, "E2iE2iC*", "") BUILTIN(__nvvm_ldu_i4, "E4iE4iC*", "") +BUILTIN(__nvvm_ldu_l2, "E2LiE2LiC*", "") BUILTIN(__nvvm_ldu_ll2, "E2LLiE2LLiC*", "") BUILTIN(__nvvm_ldu_uc2, "E2UcE2UcC*", "") @@ -846,6 +850,7 @@ BUILTIN(__nvvm_ldu_us2, "E2UsE2UsC*", "") BUILTIN(__nvvm_ldu_us4, "E4UsE4UsC*", "") BUILTIN(__nvvm_ldu_ui2, "E2UiE2UiC*", "") BUILTIN(__nvvm_ldu_ui4, "E4UiE4UiC*", "") +BUILTIN(__nvvm_ldu_ul2, "E2ULiE2ULiC*", "") BUILTIN(__nvvm_ldu_ull2, "E2ULLiE2ULLiC*", "") BUILTIN(__nvvm_ldu_h2, "E2hE2hC*", "") @@ -854,6 +859,7 @@ BUILTIN(__nvvm_ldu_f4, "E4fE4fC*", "") BUILTIN(__nvvm_ldu_d2, "E2dE2dC*", "") BUILTIN(__nvvm_ldg_c, "ccC*", "") +BUILTIN(__nvvm_ldg_sc, "ScScC*", "") BUILTIN(__nvvm_ldg_s, "ssC*", "") BUILTIN(__nvvm_ldg_i, "iiC*", "") BUILTIN(__nvvm_ldg_l, "LiLiC*", "") @@ -870,11 +876,14 @@ BUILTIN(__nvvm_ldg_f, "ffC*", "") BUILTIN(__nvvm_ldg_d, "ddC*", "") BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "") +BUILTIN(__nvvm_ldg_sc2, "E2ScE2ScC*", "") BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "") +BUILTIN(__nvvm_ldg_sc4, "E4ScE4ScC*", "") BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "") BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "") BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "") BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "") +BUILTIN(__nvvm_ldg_l2, "E2LiE2LiC*", "") BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "") BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "") @@ -883,6 +892,7 @@ BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "") BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "") BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "") BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "") +BUILTIN(__nvvm_ldg_ul2, "E2ULiE2ULiC*", "") BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "") BUILTIN(__nvvm_ldg_h2, "E2hE2hC*", "") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3a61fdd65592a..bfa6fd716c5ec 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18422,8 +18422,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, } case NVPTX::BI__nvvm_ldg_c: + case NVPTX::BI__nvvm_ldg_sc: case NVPTX::BI__nvvm_ldg_c2: + case NVPTX::BI__nvvm_ldg_sc2: case NVPTX::BI__nvvm_ldg_c4: + case NVPTX::BI__nvvm_ldg_sc4: case NVPTX::BI__nvvm_ldg_s: case NVPTX::BI__nvvm_ldg_s2: case NVPTX::BI__nvvm_ldg_s4: @@ -18431,6 +18434,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_ldg_i2: case NVPTX::BI__nvvm_ldg_i4: case NVPTX::BI__nvvm_ldg_l: + case NVPTX::BI__nvvm_ldg_l2: case NVPTX::BI__nvvm_ldg_ll: case NVPTX::BI__nvvm_ldg_ll2: case NVPTX::BI__nvvm_ldg_uc: @@ -18443,6 +18447,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_ldg_ui2: case NVPTX::BI__nvvm_ldg_ui4: case NVPTX::BI__nvvm_ldg_ul: + case NVPTX::BI__nvvm_ldg_ul2: case NVPTX::BI__nvvm_ldg_ull: case NVPTX::BI__nvvm_ldg_ull2: // PTX Interoperability section 2.2: "For a vector with an even number of @@ -18457,8 +18462,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E); case NVPTX::BI__nvvm_ldu_c: + case NVPTX::BI__nvvm_ldu_sc: case NVPTX::BI__nvvm_ldu_c2: + case NVPTX::BI__nvvm_ldu_sc2: case NVPTX::BI__nvvm_ldu_c4: + case NVPTX::BI__nvvm_ldu_sc4: case NVPTX::BI__nvvm_ldu_s: case NVPTX::BI__nvvm_ldu_s2: case NVPTX::BI__nvvm_ldu_s4: @@ -18466,6 +18474,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_ldu_i2: case NVPTX::BI__nvvm_ldu_i4: case NVPTX::BI__nvvm_ldu_l: + case NVPTX::BI__nvvm_ldu_l2: case NVPTX::BI__nvvm_ldu_ll: case NVPTX::BI__nvvm_ldu_ll2: case NVPTX::BI__nvvm_ldu_uc: @@ -18478,6 +18487,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_ldu_ui2: case NVPTX::BI__nvvm_ldu_ui4: case NVPTX::BI__nvvm_ldu_ul: + case NVPTX::BI__nvvm_ldu_ul2: case NVPTX::BI__nvvm_ldu_ull: case NVPTX::BI__nvvm_ldu_ull2: return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E); diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index df8085bd2559b..75cb6835049c6 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -554,10 +554,12 @@ __device__ void nvvm_atom(float *fp, float f, double *dfp, double df, int *ip, // CHECK-LABEL: nvvm_ldg __device__ void nvvm_ldg(const void *p) { + // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) __nvvm_ldg_c((const char *)p); __nvvm_ldg_uc((const unsigned char *)p); + __nvvm_ldg_sc((const signed char *)p); // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2) // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2) @@ -590,19 +592,25 @@ __device__ void nvvm_ldg(const void *p) { // elements, its alignment is set to number of elements times the alignment of // its member: n*alignof(t)." + // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2) // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2) // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2) typedef char char2 __attribute__((ext_vector_type(2))); typedef unsigned char uchar2 __attribute__((ext_vector_type(2))); + typedef signed char schar2 __attribute__((ext_vector_type(2))); __nvvm_ldg_c2((const char2 *)p); __nvvm_ldg_uc2((const uchar2 *)p); + __nvvm_ldg_sc2((const schar2 *)p); + // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4) // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4) // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4) typedef char char4 __attribute__((ext_vector_type(4))); typedef unsigned char uchar4 __attribute__((ext_vector_type(4))); + typedef signed char schar4 __attribute__((ext_vector_type(4))); __nvvm_ldg_c4((const char4 *)p); __nvvm_ldg_uc4((const uchar4 *)p); + __nvvm_ldg_sc4((const schar4 *)p); // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4) // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4) @@ -632,6 +640,15 @@ __device__ void nvvm_ldg(const void *p) { __nvvm_ldg_i4((const int4 *)p); __nvvm_ldg_ui4((const uint4 *)p); + // LP32: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8) + // LP32: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8) + // LP64: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) + // LP64: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) + typedef long long2 __attribute__((ext_vector_type(2))); + typedef unsigned long ulong2 __attribute__((ext_vector_type(2))); + __nvvm_ldg_l2((const long2 *)p); + __nvvm_ldg_ul2((const ulong2 *)p); + // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) typedef long long longlong2 __attribute__((ext_vector_type(2))); @@ -654,10 +671,12 @@ __device__ void nvvm_ldg(const void *p) { // CHECK-LABEL: nvvm_ldu __device__ void nvvm_ldu(const void *p) { + // CHECK: call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) // CHECK: call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) // CHECK: call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) __nvvm_ldu_c((const char *)p); __nvvm_ldu_uc((const unsigned char *)p); + __nvvm_ldu_sc((const signed char *)p); // CHECK: call i16 @llvm.nvvm.ldu.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2) // CHECK: call i16 @llvm.nvvm.ldu.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2) @@ -681,19 +700,25 @@ __device__ void nvvm_ldu(const void *p) { // CHECK: call double @llvm.nvvm.ldu.global.f.f64.p0(ptr {{%[0-9]+}}, i32 8) __nvvm_ldu_d((const double *)p); + // CHECK: call <2 x i8> @llvm.nvvm.ldu.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2) // CHECK: call <2 x i8> @llvm.nvvm.ldu.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2) // CHECK: call <2 x i8> @llvm.nvvm.ldu.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2) typedef char char2 __attribute__((ext_vector_type(2))); typedef unsigned char uchar2 __attribute__((ext_vector_type(2))); + typedef signed char schar2 __attribute__((ext_vector_type(2))); __nvvm_ldu_c2((const char2 *)p); __nvvm_ldu_uc2((const uchar2 *)p); + __nvvm_ldu_sc2((const schar2 *)p); + // CHECK: call <4 x i8> @llvm.nvvm.ldu.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4) // CHECK: call <4 x i8> @llvm.nvvm.ldu.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4) // CHECK: call <4 x i8> @llvm.nvvm.ldu.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4) typedef char char4 __attribute__((ext_vector_type(4))); typedef unsigned char uchar4 __attribute__((ext_vector_type(4))); + typedef signed char schar4 __attribute__((ext_vector_type(4))); __nvvm_ldu_c4((const char4 *)p); __nvvm_ldu_uc4((const uchar4 *)p); + __nvvm_ldu_sc4((const schar4 *)p); // CHECK: call <2 x i16> @llvm.nvvm.ldu.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4) // CHECK: call <2 x i16> @llvm.nvvm.ldu.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4) @@ -723,6 +748,15 @@ __device__ void nvvm_ldu(const void *p) { __nvvm_ldu_i4((const int4 *)p); __nvvm_ldu_ui4((const uint4 *)p); + // LP32: call <2 x i32> @llvm.nvvm.ldu.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8) + // LP32: call <2 x i32> @llvm.nvvm.ldu.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8) + // LP64: call <2 x i64> @llvm.nvvm.ldu.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) + // LP64: call <2 x i64> @llvm.nvvm.ldu.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) + typedef long long2 __attribute__((ext_vector_type(2))); + typedef unsigned long ulong2 __attribute__((ext_vector_type(2))); + __nvvm_ldu_l2((const long2 *)p); + __nvvm_ldu_ul2((const ulong2 *)p); + // CHECK: call <2 x i64> @llvm.nvvm.ldu.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) // CHECK: call <2 x i64> @llvm.nvvm.ldu.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) typedef long long longlong2 __attribute__((ext_vector_type(2)));