diff --git a/clang/test/CodeGenCUDA/address-spaces.cu b/clang/test/CodeGenCUDA/address-spaces.cu index dde36a618b3492..0608c9cabd0489 100644 --- a/clang/test/CodeGenCUDA/address-spaces.cu +++ b/clang/test/CodeGenCUDA/address-spaces.cu @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s -// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck %s // Verifies Clang emits correct address spaces and addrspacecast instructions // for CUDA code. @@ -28,17 +28,17 @@ struct MyStruct { // CHECK: @b ={{.*}} addrspace(3) global float undef __device__ void foo() { - // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*) + // CHECK: load i32, ptr addrspacecast (ptr addrspace(1) @i to ptr) i++; - // CHECK: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*) + // CHECK: load i32, ptr addrspacecast (ptr addrspace(4) @j to ptr) j++; - // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*) + // CHECK: load i32, ptr addrspacecast (ptr addrspace(3) @k to ptr) k++; __shared__ int lk; - // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*) + // CHECK: load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ3foovE2lk to ptr) lk++; } @@ -49,7 +49,7 @@ __device__ void func0() { ap->data2 = 2; } // CHECK: define{{.*}} void @_Z5func0v() -// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %{{.*}} +// CHECK: store ptr addrspacecast (ptr addrspace(3) @_ZZ5func0vE1a to ptr), ptr %{{.*}} __device__ void callee(float *ap) { *ap = 1.0f; @@ -60,7 +60,7 @@ __device__ void func1() { callee(&a); // implicit cast from parameters } // CHECK: define{{.*}} void @_Z5func1v() -// CHECK: call void @_Z6calleePf(float* noundef addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*)) +// CHECK: call void @_Z6calleePf(ptr noundef addrspacecast (ptr addrspace(3) @_ZZ5func1vE1a to ptr)) __device__ void func2() { __shared__ float a[256]; @@ -68,7 +68,7 @@ __device__ void func2() { *ap = 1.0f; } // CHECK: define{{.*}} void @_Z5func2v() -// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i{{32|64}} 0, i{{32|64}} 128), float** %{{.*}} +// CHECK: store ptr getelementptr inbounds ([256 x float], ptr addrspacecast (ptr addrspace(3) @_ZZ5func2vE1a to ptr), i{{32|64}} 0, i{{32|64}} 128), ptr %{{.*}} __device__ void func3() { __shared__ float a; @@ -76,7 +76,7 @@ __device__ void func3() { *ap = 1.0f; } // CHECK: define{{.*}} void @_Z5func3v() -// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %{{.*}} +// CHECK: store ptr addrspacecast (ptr addrspace(3) @_ZZ5func3vE1a to ptr), ptr %{{.*}} __device__ void func4() { __shared__ float a; @@ -84,12 +84,12 @@ __device__ void func4() { *ap = 1.0f; } // CHECK: define{{.*}} void @_Z5func4v() -// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %{{.*}} +// CHECK: store ptr addrspacecast (ptr addrspace(3) @_ZZ5func4vE1a to ptr), ptr %{{.*}} __shared__ float b; __device__ float *func5() { return &b; // implicit cast from a return value } -// CHECK: define{{.*}} float* @_Z5func5v() -// CHECK: ret float* addrspacecast (float addrspace(3)* @b to float*) +// CHECK: define{{.*}} ptr @_Z5func5v() +// CHECK: ret ptr addrspacecast (ptr addrspace(3) @b to ptr) diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu index 5d64be1f52619b..2e303dcebacc6f 100644 --- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu +++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -no-opaque-pointers %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ // RUN: -fnative-half-arguments-and-returns | FileCheck %s @@ -9,33 +9,33 @@ __device__ float ffp1(float *p) { // CHECK-LABEL: @_Z4ffp1Pf - // CHECK: atomicrmw fadd float* {{.*}} monotonic + // CHECK: atomicrmw fadd ptr {{.*}} monotonic return __atomic_fetch_add(p, 1.0f, memory_order_relaxed); } __device__ double ffp2(double *p) { // CHECK-LABEL: @_Z4ffp2Pd - // CHECK: atomicrmw fsub double* {{.*}} monotonic + // CHECK: atomicrmw fsub ptr {{.*}} monotonic return __atomic_fetch_sub(p, 1.0, memory_order_relaxed); } // long double is the same as double for amdgcn. __device__ long double ffp3(long double *p) { // CHECK-LABEL: @_Z4ffp3Pe - // CHECK: atomicrmw fsub double* {{.*}} monotonic + // CHECK: atomicrmw fsub ptr {{.*}} monotonic return __atomic_fetch_sub(p, 1.0L, memory_order_relaxed); } __device__ double ffp4(double *p, float f) { // CHECK-LABEL: @_Z4ffp4Pdf // CHECK: fpext float {{.*}} to double - // CHECK: atomicrmw fsub double* {{.*}} monotonic + // CHECK: atomicrmw fsub ptr {{.*}} monotonic return __atomic_fetch_sub(p, f, memory_order_relaxed); } __device__ double ffp5(double *p, int i) { // CHECK-LABEL: @_Z4ffp5Pdi // CHECK: sitofp i32 {{.*}} to double - // CHECK: atomicrmw fsub double* {{.*}} monotonic + // CHECK: atomicrmw fsub ptr {{.*}} monotonic return __atomic_fetch_sub(p, i, memory_order_relaxed); } diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index 4d788e6807ab29..e506b875b67481 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,31 +1,31 @@ -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s #include "Inputs/cuda.h" // PRECOV5-LABEL: test_get_workgroup_size -// PRECOV5: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// PRECOV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 4 -// PRECOV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load -// PRECOV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 6 -// PRECOV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load -// PRECOV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 8 -// PRECOV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load +// PRECOV5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 +// PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load +// PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 +// PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load +// PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 +// PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load // COV5-LABEL: test_get_workgroup_size -// COV5: call align 8 dereferenceable(256) i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() -// COV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 12 -// COV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load -// COV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 14 -// COV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load -// COV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 16 -// COV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load +// COV5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 +// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load +// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 +// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load +// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 +// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load __device__ void test_get_workgroup_size(int d, int *out) { switch (d) { diff --git a/clang/test/CodeGenCUDA/atomic-ops.cu b/clang/test/CodeGenCUDA/atomic-ops.cu index ef0128f4609565..13f4a015386cbd 100644 --- a/clang/test/CodeGenCUDA/atomic-ops.cu +++ b/clang/test/CodeGenCUDA/atomic-ops.cu @@ -1,18 +1,18 @@ -// RUN: %clang_cc1 -no-opaque-pointers -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s #include "Inputs/cuda.h" // CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii -// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 4 -// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("singlethread-one-as") monotonic, align 4 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 4 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("singlethread-one-as") monotonic, align 4 +// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 4 __device__ int atomic32_op_singlethread(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); @@ -29,8 +29,8 @@ __device__ int atomic32_op_singlethread(int *ptr, int val, int desired) { } // CHECK-LABEL: @_Z25atomicu32_op_singlethreadPjjj -// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") __device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned int val, unsigned int desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); @@ -38,17 +38,17 @@ __device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned in } // CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii -// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 4 -// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("wavefront-one-as") monotonic, align 4 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 4 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 4 +// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 4 __device__ int atomic32_op_wavefront(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); @@ -65,8 +65,8 @@ __device__ int atomic32_op_wavefront(int *ptr, int val, int desired) { } // CHECK-LABEL: @_Z22atomicu32_op_wavefrontPjjj -// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") __device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int val, unsigned int desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); @@ -74,16 +74,16 @@ __device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int v } // CHECK-LABEL: @_Z21atomic32_op_workgroupPiii -// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 4 -// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("workgroup-one-as") monotonic, align 4 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 4 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 4 __device__ int atomic32_op_workgroup(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); @@ -99,8 +99,8 @@ __device__ int atomic32_op_workgroup(int *ptr, int val, int desired) { } // CHECK-LABEL: @_Z22atomicu32_op_workgroupPjjj -// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") __device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int val, unsigned int desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); @@ -108,16 +108,16 @@ __device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int v } // CHECK-LABEL: @_Z17atomic32_op_agentPiii -// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 4 -// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("agent-one-as") monotonic, align 4 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 4 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 4 __device__ int atomic32_op_agent(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); @@ -133,8 +133,8 @@ __device__ int atomic32_op_agent(int *ptr, int val, int desired) { } // CHECK-LABEL: @_Z18atomicu32_op_agentPjjj -// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") __device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, unsigned int desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); @@ -142,17 +142,17 @@ __device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, } // CHECK-LABEL: @_Z18atomic32_op_systemPiii -// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") -// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 4 -// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") -// CHECK: load i32, i32* %{{.*}}, align 4 -// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("one-as") monotonic, align 4 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 4 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: load i32, ptr %{{.*}}, align 4 +// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 4 __device__ int atomic32_op_system(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); @@ -169,8 +169,8 @@ __device__ int atomic32_op_system(int *ptr, int val, int desired) { } // CHECK-LABEL: @_Z19atomicu32_op_systemPjjj -// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") __device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, unsigned int desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); @@ -178,16 +178,16 @@ __device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, } // CHECK-LABEL: @_Z24atomic64_op_singlethreadPxS_xx -// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 8 -// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 8 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8 __device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); @@ -203,10 +203,10 @@ __device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, l } // CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyS_yy -// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -// CHECK: load atomic i64, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8 -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8 +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: load atomic i64, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8 +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8 __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); @@ -216,17 +216,17 @@ __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, } // CHECK-LABEL: @_Z21atomic64_op_wavefrontPxS_xx -// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 8 -// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8 -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("wavefront-one-as") monotonic, align 8 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 8 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8 +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 8 __device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); @@ -243,10 +243,10 @@ __device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long } // CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyS_yy -// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -// CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8 -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("wavefront-one-as") monotonic, align 8 +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8 +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 8 __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); @@ -256,16 +256,16 @@ __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, un } // CHECK-LABEL: @_Z21atomic64_op_workgroupPxS_xx -// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 8 -// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("workgroup-one-as") monotonic, align 8 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 8 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 8 __device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); @@ -281,9 +281,9 @@ __device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long } // CHECK-LABEL: @_Z22atomicu64_op_workgroupPyS_yy -// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("workgroup-one-as") monotonic, align 8 +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 8 __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); @@ -292,16 +292,16 @@ __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, un } // CHECK-LABEL: @_Z17atomic64_op_agentPxS_xx -// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 8 -// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("agent-one-as") monotonic, align 8 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 8 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 8 __device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); @@ -317,9 +317,9 @@ __device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long lon } // CHECK-LABEL: @_Z18atomicu64_op_agentPyS_yy -// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("agent-one-as") monotonic, align 8 +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 8 __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); @@ -328,17 +328,17 @@ __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsign } // CHECK-LABEL: @_Z18atomic64_op_systemPxS_xx -// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 8 -// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: load i64, i64* %{{.*}}, align 8 -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("one-as") monotonic, align 8 +// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 8 +// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: load i64, ptr %{{.*}}, align 8 +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 8 __device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); @@ -355,10 +355,10 @@ __device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long lo } // CHECK-LABEL: @_Z19atomicu64_op_systemPyS_yy -// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -// CHECK: load i64, i64* %{{.*}}, align 8 -// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("one-as") monotonic, align 8 +// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: load i64, ptr %{{.*}}, align 8 +// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 8 __device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); diff --git a/clang/test/CodeGenCUDA/builtin-mangled-name.cu b/clang/test/CodeGenCUDA/builtin-mangled-name.cu index 4cd176baf73995..6a6a2e7f5d65a3 100644 --- a/clang/test/CodeGenCUDA/builtin-mangled-name.cu +++ b/clang/test/CodeGenCUDA/builtin-mangled-name.cu @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux -aux-triple amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux -aux-triple amdgcn-amd-amdhsa \ // RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=CHECK,LNX %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-windows-msvc -aux-triple amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -aux-triple amdgcn-amd-amdhsa \ // RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=CHECK,MSVC %s #include "Inputs/cuda.h" @@ -15,14 +15,14 @@ namespace X { // LNX-LABEL: define {{.*}}@_Z4fun1v() // MSVC-LABEL: define {{.*}} @"?fun1@@YAPEBDXZ"() -// CHECK: ret i8* getelementptr inbounds ({{.*}} @[[STR1]], i64 0, i64 0) +// CHECK: ret ptr @[[STR1]] const char *fun1() { return __builtin_get_device_side_mangled_name(X::kern1); } // LNX-LABEL: define {{.*}}@_Z4fun2v() // MSVC-LABEL: define {{.*}}@"?fun2@@YAPEBDXZ"() -// CHECK: ret i8* getelementptr inbounds ({{.*}} @[[STR2]], i64 0, i64 0) +// CHECK: ret ptr @[[STR2]] __host__ __device__ const char *fun2() { return __builtin_get_device_side_mangled_name(X::var1); } diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu index 2278c26f0bcfd1..c44d198882196c 100644 --- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -1,9 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \ // RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \ // RUN: -o - | FileCheck %s -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \ // RUN: -aux-triple x86_64-pc-windows-msvc -fcuda-is-device -emit-llvm %s \ // RUN: -o - | FileCheck %s @@ -11,23 +11,23 @@ // CHECK-LABEL: @_Z16use_dispatch_ptrPi( // CHECK-NEXT: entry: -// CHECK-NEXT: [[OUT:%.*]] = alloca i32*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) -// CHECK-NEXT: [[DISPATCH_PTR:%.*]] = alloca i32*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT]] to i32** -// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT_ADDR]] to i32** -// CHECK-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[DISPATCH_PTR]] to i32** -// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i32 addrspace(1)* [[OUT_COERCE:%.*]] to i32* -// CHECK-NEXT: store i32* [[TMP0]], i32** [[OUT_ASCAST]], align 8 -// CHECK-NEXT: [[OUT1:%.*]] = load i32*, i32** [[OUT_ASCAST]], align 8 -// CHECK-NEXT: store i32* [[OUT1]], i32** [[OUT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast i8 addrspace(4)* [[TMP1]] to i32* -// CHECK-NEXT: store i32* [[TMP2]], i32** [[DISPATCH_PTR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DISPATCH_PTR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[OUT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP5]], align 4 +// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DISPATCH_PTR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DISPATCH_PTR]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr +// CHECK-NEXT: store ptr [[TMP2]], ptr [[DISPATCH_PTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DISPATCH_PTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 // CHECK-NEXT: ret void // __global__ void use_dispatch_ptr(int* out) { @@ -40,12 +40,12 @@ __global__ // CHECK-NEXT: entry: // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) - // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* - // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* - // CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 - // CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 - // CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) - // CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4 + // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr + // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr + // CHECK-NEXT: store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4 + // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4 + // CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(ptr addrspace(3) @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) + // CHECK-NEXT: store volatile float [[TMP1]], ptr [[X_ASCAST]], align 4 // CHECK-NEXT: ret void // void @@ -58,12 +58,12 @@ __global__ // CHECK-NEXT: entry: // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* -// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* @_ZZ12test_ds_faddfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) -// CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) @_ZZ12test_ds_faddfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP1]], ptr [[X_ASCAST]], align 4 // CHECK-NEXT: ret void // __global__ void test_ds_fadd(float src) { @@ -73,24 +73,24 @@ __global__ void test_ds_fadd(float src) { // CHECK-LABEL: @_Z12test_ds_fminfPf( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SHARED:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** -// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* -// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* -// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float* -// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8 -// CHECK-NEXT: [[SHARED1:%.*]] = load float*, float** [[SHARED_ASCAST]], align 8 -// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store float* [[SHARED1]], float** [[SHARED_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[TMP1]] to float addrspace(3)* -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false) -// CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED]] to ptr +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr +// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED_ADDR]] to ptr +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[SHARED_COERCE:%.*]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: [[SHARED1:%.*]] = load ptr, ptr [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[SHARED1]], ptr [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3) +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP4]], ptr [[X_ASCAST]], align 4 // CHECK-NEXT: ret void // __global__ void test_ds_fmin(float src, float *shared) { @@ -99,11 +99,11 @@ __global__ void test_ds_fmin(float src, float *shared) { // CHECK-LABEL: @_Z33test_ret_builtin_nondef_addrspacev( // CHECK-NEXT: entry: -// CHECK-NEXT: [[X:%.*]] = alloca i8*, align 8, addrspace(5) -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i8* addrspace(5)* [[X]] to i8** -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i8 addrspace(4)* [[TMP0]] to i8* -// CHECK-NEXT: store i8* [[TMP1]], i8** [[X_ASCAST]], align 8 +// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr +// CHECK-NEXT: store ptr [[TMP1]], ptr [[X_ASCAST]], align 8 // CHECK-NEXT: ret void // __device__ void test_ret_builtin_nondef_addrspace() { @@ -123,25 +123,25 @@ __global__ void endpgm() { // CHECK-LABEL: @_Z14test_uicmp_i64Pyyy( // CHECK-NEXT: entry: -// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5) // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** -// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64** -// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[A_ADDR]] to i64* -// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[B_ADDR]] to i64* -// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64* -// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8 -// CHECK-NEXT: [[OUT1:%.*]] = load i64*, i64** [[OUT_ASCAST]], align 8 -// CHECK-NEXT: store i64* [[OUT1]], i64** [[OUT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 35) -// CHECK-NEXT: [[TMP4:%.*]] = load i64*, i64** [[OUT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP4]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP4]], align 8 // CHECK-NEXT: ret void // __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b) @@ -153,17 +153,17 @@ __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, un // CHECK-LABEL: @_Z14test_s_memtimePy( // CHECK-NEXT: entry: -// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) -// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** -// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64** -// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64* -// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8 -// CHECK-NEXT: [[OUT1:%.*]] = load i64*, i64** [[OUT_ASCAST]], align 8 -// CHECK-NEXT: store i64* [[OUT1]], i64** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.memtime() -// CHECK-NEXT: [[TMP2:%.*]] = load i64*, i64** [[OUT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 // CHECK-NEXT: ret void // __global__ void test_s_memtime(unsigned long long* out) @@ -176,26 +176,26 @@ __device__ void func(float *x); // CHECK-LABEL: @_Z17test_ds_fmin_funcfPf( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SHARED:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** -// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* -// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* -// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float* -// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8 -// CHECK-NEXT: [[SHARED1:%.*]] = load float*, float** [[SHARED_ASCAST]], align 8 -// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store float* [[SHARED1]], float** [[SHARED_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[TMP1]] to float addrspace(3)* -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false) -// CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 -// CHECK-NEXT: call void @_Z4funcPf(float* noundef [[TMP5]]) #[[ATTR8:[0-9]+]] +// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED]] to ptr +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr +// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED_ADDR]] to ptr +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[SHARED_COERCE:%.*]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: [[SHARED1:%.*]] = load ptr, ptr [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[SHARED1]], ptr [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3) +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP4]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP5]]) #[[ATTR8:[0-9]+]] // CHECK-NEXT: ret void // __global__ void test_ds_fmin_func(float src, float *__restrict shared) { @@ -205,21 +205,20 @@ __global__ void test_ds_fmin_func(float src, float *__restrict shared) { // CHECK-LABEL: @_Z14test_is_sharedPf( // CHECK-NEXT: entry: -// CHECK-NEXT: [[X:%.*]] = alloca float*, align 8, addrspace(5) -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[RET:%.*]] = alloca i8, align 1, addrspace(5) -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X]] to float** -// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X_ADDR]] to float** -// CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast i8 addrspace(5)* [[RET]] to i8* -// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[X_COERCE:%.*]] to float* -// CHECK-NEXT: store float* [[TMP0]], float** [[X_ASCAST]], align 8 -// CHECK-NEXT: [[X1:%.*]] = load float*, float** [[X_ASCAST]], align 8 -// CHECK-NEXT: store float* [[X1]], float** [[X_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[X_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to i8* -// CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP2]]) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X_COERCE:%.*]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[X_ASCAST]], align 8 +// CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[TMP1]]) // CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP3]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL]], i8* [[RET_ASCAST]], align 1 +// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[RET_ASCAST]], align 1 // CHECK-NEXT: ret void // __global__ void test_is_shared(float *x){ diff --git a/clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu b/clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu index 23be5e5d0f76bc..66ec200a8e6d4f 100644 --- a/clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu +++ b/clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip \ // RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \ // RUN: -o - | FileCheck %s @@ -6,14 +6,14 @@ typedef __attribute__((address_space(3))) float *LP; // CHECK-LABEL: test_ds_atomic_add_f32 -// CHECK: %[[ADDR_ADDR:.*]] = alloca float*, align 8, addrspace(5) -// CHECK: %[[ADDR_ADDR_ASCAST_PTR:.*]] = addrspacecast float* addrspace(5)* %[[ADDR_ADDR]] to float** -// CHECK: store float* %addr, float** %[[ADDR_ADDR_ASCAST_PTR]], align 8 -// CHECK: %[[ADDR_ADDR_ASCAST:.*]] = load float*, float** %[[ADDR_ADDR_ASCAST_PTR]], align 8 -// CHECK: %[[AS_CAST:.*]] = addrspacecast float* %[[ADDR_ADDR_ASCAST]] to float addrspace(3)* -// CHECK: %3 = call contract float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* %[[AS_CAST]] -// CHECK: %4 = load float*, float** %rtn.ascast, align 8 -// CHECK: store float %3, float* %4, align 4 +// CHECK: %[[ADDR_ADDR:.*]] = alloca ptr, align 8, addrspace(5) +// CHECK: %[[ADDR_ADDR_ASCAST_PTR:.*]] = addrspacecast ptr addrspace(5) %[[ADDR_ADDR]] to ptr +// CHECK: store ptr %addr, ptr %[[ADDR_ADDR_ASCAST_PTR]], align 8 +// CHECK: %[[ADDR_ADDR_ASCAST:.*]] = load ptr, ptr %[[ADDR_ADDR_ASCAST_PTR]], align 8 +// CHECK: %[[AS_CAST:.*]] = addrspacecast ptr %[[ADDR_ADDR_ASCAST]] to ptr addrspace(3) +// CHECK: %3 = call contract float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %[[AS_CAST]] +// CHECK: %4 = load ptr, ptr %rtn.ascast, align 8 +// CHECK: store float %3, ptr %4, align 4 __device__ void test_ds_atomic_add_f32(float *addr, float val) { float *rtn; *rtn = __builtin_amdgcn_ds_faddf((LP)addr, val, 0, 0, 0); diff --git a/clang/test/CodeGenCUDA/const-var.cu b/clang/test/CodeGenCUDA/const-var.cu index f667bcd3d6dfc4..70d4df18dfeef1 100644 --- a/clang/test/CodeGenCUDA/const-var.cu +++ b/clang/test/CodeGenCUDA/const-var.cu @@ -1,12 +1,12 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ // RUN: -emit-llvm -o - | FileCheck -check-prefix=DEV %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux-gnu -x hip %s \ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \ // RUN: -emit-llvm -o - | FileCheck -check-prefix=HOST %s // Negative tests. -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ // RUN: -emit-llvm -o - | FileCheck -check-prefix=DEV-NEG %s #include "Inputs/cuda.h" @@ -15,12 +15,12 @@ // Both are promoted to device side. // DEV-DAG: @_ZN5Test1L1aE = internal addrspace(4) constant i32 1 -// DEV-DAG: @_ZN5Test11B2p1E = addrspace(4) externally_initialized constant i32* addrspacecast (i32 addrspace(4)* @_ZN5Test1L1aE to i32*) -// DEV-DAG: @_ZN5Test11B2p2E = addrspace(4) externally_initialized constant i32* addrspacecast (i32 addrspace(4)* @_ZN5Test1L1aE to i32*) +// DEV-DAG: @_ZN5Test11B2p1E = addrspace(4) externally_initialized constant ptr addrspacecast (ptr addrspace(4) @_ZN5Test1L1aE to ptr) +// DEV-DAG: @_ZN5Test11B2p2E = addrspace(4) externally_initialized constant ptr addrspacecast (ptr addrspace(4) @_ZN5Test1L1aE to ptr) // DEV-DAG: @_ZN5Test12b2E = addrspace(1) externally_initialized global i32 1 // HOST-DAG: @_ZN5Test1L1aE = internal constant i32 1 -// HOST-DAG: @_ZN5Test11B2p1E = constant i32* @_ZN5Test1L1aE -// HOST-DAG: @_ZN5Test11B2p2E = internal constant i32* undef +// HOST-DAG: @_ZN5Test11B2p1E = constant ptr @_ZN5Test1L1aE +// HOST-DAG: @_ZN5Test11B2p2E = internal constant ptr undef // HOST-DAG: @_ZN5Test12b1E = global i32 1 // HOST-DAG: @_ZN5Test12b2E = internal global i32 undef namespace Test1 { @@ -42,7 +42,7 @@ __device__ int b2 = B::p1 == B::p2; // DEV-NEG-NOT: @_ZN5Test2L1aE // DEV-NEG-NOT: @_ZN5Test21B1pE // HOST-DAG: @_ZN5Test21aE = global i32 1 -// HOST-DAG: @_ZN5Test21B1pE = constant i32* @_ZN5Test21aE +// HOST-DAG: @_ZN5Test21B1pE = constant ptr @_ZN5Test21aE namespace Test2 { int a = 1; diff --git a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu index aae456614ef4e5..e76e7a2f825290 100644 --- a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu +++ b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -no-opaque-pointers "-triple" "nvptx-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s +// RUN: %clang_cc1 "-triple" "nvptx-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s #include "__clang_cuda_builtin_vars.h" -// CHECK: define{{.*}} void @_Z6kernelPi(i32* noundef %out) +// CHECK: define{{.*}} void @_Z6kernelPi(ptr noundef %out) __attribute__((global)) void kernel(int *out) { int i = 0; diff --git a/clang/test/CodeGenCUDA/debug-info-address-class.cu b/clang/test/CodeGenCUDA/debug-info-address-class.cu index d9499dc91af270..5e0f775cbe9e37 100644 --- a/clang/test/CodeGenCUDA/debug-info-address-class.cu +++ b/clang/test/CodeGenCUDA/debug-info-address-class.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown -debug-info-kind=limited -dwarf-version=2 -debugger-tuning=gdb | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown -debug-info-kind=limited -dwarf-version=2 -debugger-tuning=gdb | FileCheck %s #include "Inputs/cuda.h" @@ -14,12 +14,12 @@ __device__ __constant__ int FileVar2; __device__ void kernel1( // CHECK-DAG: ![[ARG:[0-9]+]] = !DILocalVariable(name: "Arg", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* {{.*}}, metadata ![[ARG]], metadata !DIExpression()), !dbg !{{[0-9]+}} + // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[ARG]], metadata !DIExpression()), !dbg !{{[0-9]+}} int Arg) { // CHECK-DAG: ![[FUNCVAR0:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true) // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR0]], expr: !DIExpression(DW_OP_constu, 8, DW_OP_swap, DW_OP_xderef)) __shared__ int FuncVar0; // CHECK-DAG: ![[FUNCVAR1:[0-9]+]] = !DILocalVariable(name: "FuncVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression()), !dbg !{{[0-9]+}} + // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression()), !dbg !{{[0-9]+}} int FuncVar1; } diff --git a/clang/test/CodeGenCUDA/device-var-init.cu b/clang/test/CodeGenCUDA/device-var-init.cu index 17dbb00b2ce0d7..226b7e295f4b45 100644 --- a/clang/test/CodeGenCUDA/device-var-init.cu +++ b/clang/test/CodeGenCUDA/device-var-init.cu @@ -4,12 +4,12 @@ // Make sure we don't allow dynamic initialization for device // variables, but accept empty constructors allowed by CUDA. -// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ // RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,NVPTX %s -// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx64-nvidia-cuda -std=c++11 \ +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -std=c++11 \ // RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=HOST %s -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device -std=c++11 \ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \ // RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,AMDGCN %s #ifdef __clang__ @@ -192,69 +192,69 @@ __device__ void df() { // NVPTX: %[[t_b_ned:.*]] = alloca %struct.T_B_NED // NVPTX: %[[t_f_ned:.*]] = alloca %struct.T_F_NED // NVPTX: %[[t_fa_ned:.*]] = alloca %struct.T_FA_NED - // AMDGCN: %[[ec:.*]] ={{.*}} addrspacecast %struct.EC addrspace(5)* %ec to %struct.EC* - // AMDGCN: %[[ed:.*]] ={{.*}} addrspacecast %struct.ED addrspace(5)* %ed to %struct.ED* - // AMDGCN: %[[ecd:.*]] ={{.*}} addrspacecast %struct.ECD addrspace(5)* %ecd to %struct.ECD* - // AMDGCN: %[[etc:.*]] ={{.*}} addrspacecast %struct.ETC addrspace(5)* %etc to %struct.ETC* - // AMDGCN: %[[uc:.*]] ={{.*}} addrspacecast %struct.UC addrspace(5)* %uc to %struct.UC* - // AMDGCN: %[[ud:.*]] ={{.*}} addrspacecast %struct.UD addrspace(5)* %ud to %struct.UD* - // AMDGCN: %[[eci:.*]] ={{.*}} addrspacecast %struct.ECI addrspace(5)* %eci to %struct.ECI* - // AMDGCN: %[[nec:.*]] ={{.*}} addrspacecast %struct.NEC addrspace(5)* %nec to %struct.NEC* - // AMDGCN: %[[ned:.*]] ={{.*}} addrspacecast %struct.NED addrspace(5)* %ned to %struct.NED* - // AMDGCN: %[[ncv:.*]] ={{.*}} addrspacecast %struct.NCV addrspace(5)* %ncv to %struct.NCV* - // AMDGCN: %[[vd:.*]] ={{.*}} addrspacecast %struct.VD addrspace(5)* %vd to %struct.VD* - // AMDGCN: %[[ncf:.*]] ={{.*}} addrspacecast %struct.NCF addrspace(5)* %ncf to %struct.NCF* - // AMDGCN: %[[ncfs:.*]] ={{.*}} addrspacecast %struct.NCFS addrspace(5)* %ncfs to %struct.NCFS* - // AMDGCN: %[[utc:.*]] ={{.*}} addrspacecast %struct.UTC addrspace(5)* %utc to %struct.UTC* - // AMDGCN: %[[netc:.*]] ={{.*}} addrspacecast %struct.NETC addrspace(5)* %netc to %struct.NETC* - // AMDGCN: %[[ec_i_ec:.*]] ={{.*}} addrspacecast %struct.EC_I_EC addrspace(5)* %ec_i_ec to %struct.EC_I_EC* - // AMDGCN: %[[ec_i_ec1:.*]] ={{.*}} addrspacecast %struct.EC_I_EC1 addrspace(5)* %ec_i_ec1 to %struct.EC_I_EC1* - // AMDGCN: %[[t_v_t:.*]] ={{.*}} addrspacecast %struct.T_V_T addrspace(5)* %t_v_t to %struct.T_V_T* - // AMDGCN: %[[t_b_nec:.*]] ={{.*}} addrspacecast %struct.T_B_NEC addrspace(5)* %t_b_nec to %struct.T_B_NEC* - // AMDGCN: %[[t_f_nec:.*]] ={{.*}} addrspacecast %struct.T_F_NEC addrspace(5)* %t_f_nec to %struct.T_F_NEC* - // AMDGCN: %[[t_fa_nec:.*]] ={{.*}} addrspacecast %struct.T_FA_NEC addrspace(5)* %t_fa_nec to %struct.T_FA_NEC* - // AMDGCN: %[[t_b_ned:.*]] ={{.*}} addrspacecast %struct.T_B_NED addrspace(5)* %t_b_ned to %struct.T_B_NED* - // AMDGCN: %[[t_f_ned:.*]] ={{.*}} addrspacecast %struct.T_F_NED addrspace(5)* %t_f_ned to %struct.T_F_NED* - // AMDGCN: %[[t_fa_ned:.*]] ={{.*}} addrspacecast %struct.T_FA_NED addrspace(5)* %t_fa_ned to %struct.T_FA_NED* + // AMDGCN: %[[ec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ec to ptr + // AMDGCN: %[[ed:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ed to ptr + // AMDGCN: %[[ecd:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ecd to ptr + // AMDGCN: %[[etc:.*]] ={{.*}} addrspacecast ptr addrspace(5) %etc to ptr + // AMDGCN: %[[uc:.*]] ={{.*}} addrspacecast ptr addrspace(5) %uc to ptr + // AMDGCN: %[[ud:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ud to ptr + // AMDGCN: %[[eci:.*]] ={{.*}} addrspacecast ptr addrspace(5) %eci to ptr + // AMDGCN: %[[nec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %nec to ptr + // AMDGCN: %[[ned:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ned to ptr + // AMDGCN: %[[ncv:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ncv to ptr + // AMDGCN: %[[vd:.*]] ={{.*}} addrspacecast ptr addrspace(5) %vd to ptr + // AMDGCN: %[[ncf:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ncf to ptr + // AMDGCN: %[[ncfs:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ncfs to ptr + // AMDGCN: %[[utc:.*]] ={{.*}} addrspacecast ptr addrspace(5) %utc to ptr + // AMDGCN: %[[netc:.*]] ={{.*}} addrspacecast ptr addrspace(5) %netc to ptr + // AMDGCN: %[[ec_i_ec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ec_i_ec to ptr + // AMDGCN: %[[ec_i_ec1:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ec_i_ec1 to ptr + // AMDGCN: %[[t_v_t:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_v_t to ptr + // AMDGCN: %[[t_b_nec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_b_nec to ptr + // AMDGCN: %[[t_f_nec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_f_nec to ptr + // AMDGCN: %[[t_fa_nec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_fa_nec to ptr + // AMDGCN: %[[t_b_ned:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_b_ned to ptr + // AMDGCN: %[[t_f_ned:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_f_ned to ptr + // AMDGCN: %[[t_fa_ned:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_fa_ned to ptr T t; // DEVICE-NOT: call EC ec; - // DEVICE: call void @_ZN2ECC1Ev(%struct.EC* {{[^,]*}} %[[ec]]) + // DEVICE: call void @_ZN2ECC1Ev(ptr {{[^,]*}} %[[ec]]) ED ed; // DEVICE-NOT: call ECD ecd; - // DEVICE: call void @_ZN3ECDC1Ev(%struct.ECD* {{[^,]*}} %[[ecd]]) + // DEVICE: call void @_ZN3ECDC1Ev(ptr {{[^,]*}} %[[ecd]]) ETC etc; - // DEVICE: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* {{[^,]*}} %[[etc]]) + // DEVICE: call void @_ZN3ETCC1IJEEEDpT_(ptr {{[^,]*}} %[[etc]]) UC uc; // undefined constructor -- not allowed - // DEVICE: call void @_ZN2UCC1Ev(%struct.UC* {{[^,]*}} %[[uc]]) + // DEVICE: call void @_ZN2UCC1Ev(ptr {{[^,]*}} %[[uc]]) UD ud; // undefined destructor -- not allowed // DEVICE-NOT: call ECI eci; // empty constructor w/ initializer list -- not allowed - // DEVICE: call void @_ZN3ECIC1Ev(%struct.ECI* {{[^,]*}} %[[eci]]) + // DEVICE: call void @_ZN3ECIC1Ev(ptr {{[^,]*}} %[[eci]]) NEC nec; // non-empty constructor -- not allowed - // DEVICE: call void @_ZN3NECC1Ev(%struct.NEC* {{[^,]*}} %[[nec]]) + // DEVICE: call void @_ZN3NECC1Ev(ptr {{[^,]*}} %[[nec]]) // non-empty destructor -- not allowed NED ned; // no-constructor, virtual method -- not allowed - // DEVICE: call void @_ZN3NCVC1Ev(%struct.NCV* {{[^,]*}} %[[ncv]]) + // DEVICE: call void @_ZN3NCVC1Ev(ptr {{[^,]*}} %[[ncv]]) NCV ncv; // DEVICE-NOT: call VD vd; - // DEVICE: call void @_ZN2VDC1Ev(%struct.VD* {{[^,]*}} %[[vd]]) + // DEVICE: call void @_ZN2VDC1Ev(ptr {{[^,]*}} %[[vd]]) NCF ncf; - // DEVICE: call void @_ZN3NCFC1Ev(%struct.NCF* {{[^,]*}} %[[ncf]]) + // DEVICE: call void @_ZN3NCFC1Ev(ptr {{[^,]*}} %[[ncf]]) NCFS ncfs; - // DEVICE: call void @_ZN4NCFSC1Ev(%struct.NCFS* {{[^,]*}} %[[ncfs]]) + // DEVICE: call void @_ZN4NCFSC1Ev(ptr {{[^,]*}} %[[ncfs]]) UTC utc; - // DEVICE: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* {{[^,]*}} %[[utc]]) + // DEVICE: call void @_ZN3UTCC1IJEEEDpT_(ptr {{[^,]*}} %[[utc]]) NETC netc; - // DEVICE: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* {{[^,]*}} %[[netc]]) + // DEVICE: call void @_ZN4NETCC1IJEEEDpT_(ptr {{[^,]*}} %[[netc]]) T_B_T t_b_t; // DEVICE-NOT: call T_F_T t_f_t; @@ -262,17 +262,17 @@ __device__ void df() { T_FA_T t_fa_t; // DEVICE-NOT: call EC_I_EC ec_i_ec; - // DEVICE: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* {{[^,]*}} %[[ec_i_ec]]) + // DEVICE: call void @_ZN7EC_I_ECC1Ev(ptr {{[^,]*}} %[[ec_i_ec]]) EC_I_EC1 ec_i_ec1; - // DEVICE: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* {{[^,]*}} %[[ec_i_ec1]]) + // DEVICE: call void @_ZN8EC_I_EC1C1Ev(ptr {{[^,]*}} %[[ec_i_ec1]]) T_V_T t_v_t; - // DEVICE: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* {{[^,]*}} %[[t_v_t]]) + // DEVICE: call void @_ZN5T_V_TC1Ev(ptr {{[^,]*}} %[[t_v_t]]) T_B_NEC t_b_nec; - // DEVICE: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* {{[^,]*}} %[[t_b_nec]]) + // DEVICE: call void @_ZN7T_B_NECC1Ev(ptr {{[^,]*}} %[[t_b_nec]]) T_F_NEC t_f_nec; - // DEVICE: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* {{[^,]*}} %[[t_f_nec]]) + // DEVICE: call void @_ZN7T_F_NECC1Ev(ptr {{[^,]*}} %[[t_f_nec]]) T_FA_NEC t_fa_nec; - // DEVICE: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* {{[^,]*}} %[[t_fa_nec]]) + // DEVICE: call void @_ZN8T_FA_NECC1Ev(ptr {{[^,]*}} %[[t_fa_nec]]) T_B_NED t_b_ned; // DEVICE-NOT: call T_F_NED t_f_ned; @@ -280,9 +280,9 @@ __device__ void df() { T_FA_NED t_fa_ned; // DEVICE-NOT: call static __shared__ EC s_ec; - // DEVICE-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*)) + // DEVICE-NOT: call void @_ZN2ECC1Ev(ptr addrspacecast (ptr addrspace(3) @_ZZ2dfvE4s_ec to ptr)) static __shared__ ETC s_etc; - // DEVICE-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*)) + // DEVICE-NOT: call void @_ZN3ETCC1IJEEEDpT_(ptr addrspacecast (ptr addrspace(3) @_ZZ2dfvE5s_etc to ptr)) static const int const_array[] = {1, 2, 3, 4, 5}; static const int const_int = 123; @@ -291,14 +291,14 @@ __device__ void df() { df(); // DEVICE: call void @_Z2dfv() // Verify that we only call non-empty destructors - // DEVICE-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* {{[^,]*}} %[[t_fa_ned]]) - // DEVICE-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* {{[^,]*}} %[[t_f_ned]]) - // DEVICE-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* {{[^,]*}} %[[t_b_ned]]) - // DEVICE-NEXT: call void @_ZN2VDD1Ev(%struct.VD* {{[^,]*}} %[[vd]]) - // DEVICE-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* {{[^,]*}} %[[ned]]) - // DEVICE-NEXT: call void @_ZN2UDD1Ev(%struct.UD* {{[^,]*}} %[[ud]]) - // DEVICE-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* {{[^,]*}} %[[ecd]]) - // DEVICE-NEXT: call void @_ZN2EDD1Ev(%struct.ED* {{[^,]*}} %[[ed]]) + // DEVICE-NEXT: call void @_ZN8T_FA_NEDD1Ev(ptr {{[^,]*}} %[[t_fa_ned]]) + // DEVICE-NEXT: call void @_ZN7T_F_NEDD1Ev(ptr {{[^,]*}} %[[t_f_ned]]) + // DEVICE-NEXT: call void @_ZN7T_B_NEDD1Ev(ptr {{[^,]*}} %[[t_b_ned]]) + // DEVICE-NEXT: call void @_ZN2VDD1Ev(ptr {{[^,]*}} %[[vd]]) + // DEVICE-NEXT: call void @_ZN3NEDD1Ev(ptr {{[^,]*}} %[[ned]]) + // DEVICE-NEXT: call void @_ZN2UDD1Ev(ptr {{[^,]*}} %[[ud]]) + // DEVICE-NEXT: call void @_ZN3ECDD1Ev(ptr {{[^,]*}} %[[ecd]]) + // DEVICE-NEXT: call void @_ZN2EDD1Ev(ptr {{[^,]*}} %[[ed]]) // DEVICE-NEXT: ret void } diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu index a214feb0541f9c..3c2efb57525c9c 100644 --- a/clang/test/CodeGenCUDA/device-var-linkage.cu +++ b/clang/test/CodeGenCUDA/device-var-linkage.cu @@ -1,16 +1,16 @@ -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device \ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,NORDC %s -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device \ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \ // RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,RDC %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux \ +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux \ +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \ // RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,RDC-H %s -// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device \ +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ // RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefixes=CUDA %s @@ -24,9 +24,9 @@ __device__ int v1; // NORDC-H-DAG: @v2 = internal global i32 undef // RDC-H-DAG: @v2 = global i32 undef __constant__ int v2; -// DEV-DAG: @v3 = addrspace(1) externally_initialized global i32 addrspace(1)* null -// NORDC-H-DAG: @v3 = internal externally_initialized global i32* null -// RDC-H-DAG: @v3 = externally_initialized global i32* null +// DEV-DAG: @v3 = addrspace(1) externally_initialized global ptr addrspace(1) null +// NORDC-H-DAG: @v3 = internal externally_initialized global ptr null +// RDC-H-DAG: @v3 = externally_initialized global ptr null #if __HIP__ __managed__ int v3; #endif @@ -37,8 +37,8 @@ extern __device__ int ev1; // DEV-DAG: @ev2 = external addrspace(4) global i32 // HOST-DAG: @ev2 = external global i32 extern __constant__ int ev2; -// DEV-DAG: @ev3 = external addrspace(1) externally_initialized global i32 addrspace(1)* -// HOST-DAG: @ev3 = external externally_initialized global i32* +// DEV-DAG: @ev3 = external addrspace(1) externally_initialized global ptr addrspace(1) +// HOST-DAG: @ev3 = external externally_initialized global ptr #if __HIP__ extern __managed__ int ev3; #endif @@ -53,9 +53,9 @@ static __device__ int sv1; // HOST-DAG: @_ZL3sv2 = internal global i32 undef // CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 static __constant__ int sv2; -// NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null -// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null -// HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null +// NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global ptr addrspace(1) null +// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global ptr addrspace(1) null +// HOST-DAG: @_ZL3sv3 = internal externally_initialized global ptr null #if __HIP__ static __managed__ int sv3; #endif diff --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu index 2ffaa54db9b337..2c0d06d07c6f28 100644 --- a/clang/test/CodeGenCUDA/host-used-device-var.cu +++ b/clang/test/CodeGenCUDA/host-used-device-var.cu @@ -1,16 +1,16 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ // RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \ // RUN: | FileCheck -check-prefix=DEV %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux-gnu -x hip %s \ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \ // RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST %s // Negative tests. -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ // RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \ // RUN: | FileCheck -check-prefix=DEV-NEG %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux-gnu -x hip %s \ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \ // RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST-NEG %s #include "Inputs/cuda.h" @@ -62,7 +62,7 @@ __device__ T add_func (T x, T y) return x + y; } -// DEV-DAG: @_Z10p_add_funcIiE = linkonce_odr addrspace(1) externally_initialized global i32 (i32, i32)* @_Z8add_funcIiET_S0_S0_ +// DEV-DAG: @_Z10p_add_funcIiE = linkonce_odr addrspace(1) externally_initialized global ptr @_Z8add_funcIiET_S0_S0_ template __device__ func_t p_add_func = add_func; diff --git a/clang/test/CodeGenCUDA/kernel-args-alignment.cu b/clang/test/CodeGenCUDA/kernel-args-alignment.cu index 1c008dcd8a5db6..27b1315681521d 100644 --- a/clang/test/CodeGenCUDA/kernel-args-alignment.cu +++ b/clang/test/CodeGenCUDA/kernel-args-alignment.cu @@ -1,11 +1,11 @@ // New CUDA kernel launch sequence does not require explicit specification of // size/offset for each argument, so only the old way is tested. // -// RUN: %clang_cc1 -no-opaque-pointers --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm \ // RUN: -target-sdk-version=8.0 -o - %s \ // RUN: | FileCheck -check-prefixes=HOST-OLD,CHECK %s -// RUN: %clang_cc1 -no-opaque-pointers --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \ +// RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \ // RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,CHECK %s #include "Inputs/cuda.h" @@ -22,7 +22,7 @@ struct S { // Clang should generate a packed LLVM struct for S (denoted by the <>s), // otherwise this test isn't interesting. -// CHECK: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }> +// CHECK: %struct.S = type <{ ptr, i8, %struct.U, [5 x i8] }> static_assert(alignof(S) == 8, "Unexpected alignment."); @@ -36,5 +36,5 @@ static_assert(alignof(S) == 8, "Unexpected alignment."); // HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24) // DEVICE-LABEL: @_Z6kernelc1SPi -// DEVICE-SAME: i8{{[^,]*}}, %struct.S* noundef byval(%struct.S) align 8{{[^,]*}}, i32* +// DEVICE-SAME: i8{{[^,]*}}, ptr noundef byval(%struct.S) align 8{{[^,]*}}, ptr __global__ void kernel(char a, S s, int *b) {} diff --git a/clang/test/CodeGenCUDA/kernel-args.cu b/clang/test/CodeGenCUDA/kernel-args.cu index 46a2b69a4c3d68..5f064694223b55 100644 --- a/clang/test/CodeGenCUDA/kernel-args.cu +++ b/clang/test/CodeGenCUDA/kernel-args.cu @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -no-opaque-pointers -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s -// RUN: %clang_cc1 -no-opaque-pointers -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \ +// RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s #include "Inputs/cuda.h" @@ -9,15 +9,15 @@ struct A { float *p; }; -// AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}) -// NVPTX: define{{.*}} void @_Z6kernel1A(%struct.A* noundef byval(%struct.A) align 8 %x) +// AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(ptr addrspace(4) byref(%struct.A) align 8 %{{.+}}) +// NVPTX: define{{.*}} void @_Z6kernel1A(ptr noundef byval(%struct.A) align 8 %x) __global__ void kernel(A x) { } class Kernel { public: - // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}) - // NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(%struct.A* noundef byval(%struct.A) align 8 %x) + // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(4) byref(%struct.A) align 8 %{{.+}}) + // NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(ptr noundef byval(%struct.A) align 8 %x) static __global__ void memberKernel(A x){} template static __global__ void templateMemberKernel(T x) {} }; @@ -30,11 +30,11 @@ void launch(void*); void test() { Kernel K; - // AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}} - // NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(%struct.A* noundef byval(%struct.A) align 8 %x) + // AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(4) byref(%struct.A) align 8 %{{.+}} + // NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(ptr noundef byval(%struct.A) align 8 %x) launch((void*)templateKernel); - // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}} - // NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* noundef byval(%struct.A) align 8 %x) + // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(4) byref(%struct.A) align 8 %{{.+}} + // NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr noundef byval(%struct.A) align 8 %x) launch((void*)Kernel::templateMemberKernel); } diff --git a/clang/test/CodeGenCUDA/kernel-dbg-info.cu b/clang/test/CodeGenCUDA/kernel-dbg-info.cu index 316e5ed15d8c14..524694299d5de4 100644 --- a/clang/test/CodeGenCUDA/kernel-dbg-info.cu +++ b/clang/test/CodeGenCUDA/kernel-dbg-info.cu @@ -1,25 +1,25 @@ // RUN: echo "GPU binary would be here" > %t -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s -O0 \ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip | FileCheck -check-prefixes=CHECK,O0 %s -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip -fcuda-is-device | FileCheck -check-prefix=DEV %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s -O0 \ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \ // RUN: | FileCheck -check-prefixes=CHECK,O0 %s -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \ // RUN: -fcuda-is-device | FileCheck -check-prefix=DEV %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s -O3 \ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O3 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 | FileCheck %s -// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -O3 \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O3 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \ // RUN: -fcuda-is-device | FileCheck -check-prefix=DEV %s @@ -31,7 +31,7 @@ extern "C" __global__ void ckernel(int *a) { } // Kernel symbol for launching kernel. -// CHECK: @[[SYM:ckernel]] = constant void (i32*)* @__device_stub__ckernel, align 8 +// CHECK: @[[SYM:ckernel]] = constant ptr @__device_stub__ckernel, align 8 // Device side kernel names // CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00" diff --git a/clang/test/CodeGenCUDA/kernel-stub-name.cu b/clang/test/CodeGenCUDA/kernel-stub-name.cu index aef99d3c2d736b..9884046fcd0fd0 100644 --- a/clang/test/CodeGenCUDA/kernel-stub-name.cu +++ b/clang/test/CodeGenCUDA/kernel-stub-name.cu @@ -1,19 +1,19 @@ // RUN: echo "GPU binary would be here" > %t -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - -x hip\ // RUN: | FileCheck -check-prefixes=CHECK,GNU %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - -x hip\ // RUN: | FileCheck -check-prefix=NEG %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-pc-windows-msvc -emit-llvm %s \ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \ // RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \ // RUN: %t -o - -x hip\ // RUN: | FileCheck -check-prefixes=CHECK,MSVC %s -// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-pc-windows-msvc -emit-llvm %s \ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \ // RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \ // RUN: %t -o - -x hip\ // RUN: | FileCheck -check-prefix=NEG %s @@ -22,15 +22,15 @@ // Check kernel handles are emitted for non-MSVC target but not for MSVC target. -// GNU: @[[HCKERN:ckernel]] = constant void ()* @[[CSTUB:__device_stub__ckernel]], align 8 -// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8 -// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8 -// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8 +// GNU: @[[HCKERN:ckernel]] = constant ptr @[[CSTUB:__device_stub__ckernel]], align 8 +// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant ptr @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8 +// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant ptr @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8 +// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant ptr, align 8 -// MSVC: @[[HCKERN:ckernel]] = dso_local constant void ()* @[[CSTUB:__device_stub__ckernel]], align 8 -// MSVC: @[[HNSKERN:"\?nskernel@ns@@YAXXZ.*"]] = dso_local constant void ()* @[[NSSTUB:"\?__device_stub__nskernel@ns@@YAXXZ"]], align 8 -// MSVC: @[[HTKERN:"\?\?\$kernelfunc@H@@YAXXZ.*"]] = linkonce_odr dso_local constant void ()* @[[TSTUB:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*"]], comdat, align 8 -// MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant void ()*, align 8 +// MSVC: @[[HCKERN:ckernel]] = dso_local constant ptr @[[CSTUB:__device_stub__ckernel]], align 8 +// MSVC: @[[HNSKERN:"\?nskernel@ns@@YAXXZ.*"]] = dso_local constant ptr @[[NSSTUB:"\?__device_stub__nskernel@ns@@YAXXZ"]], align 8 +// MSVC: @[[HTKERN:"\?\?\$kernelfunc@H@@YAXXZ.*"]] = linkonce_odr dso_local constant ptr @[[TSTUB:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*"]], comdat, align 8 +// MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant ptr, align 8 extern "C" __global__ void ckernel() {} @@ -104,10 +104,10 @@ extern "C" void fun2() { // Check kernel handle is used for assigning a kernel to a function pointer. // CHECK-LABEL: define{{.*}}@fun3() -// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8 -// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8 -// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8 -// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8 +// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8 +// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8 +// CHECK: store ptr @[[HCKERN]], ptr @void_ptr, align 8 +// CHECK: store ptr @[[HCKERN]], ptr @void_ptr, align 8 extern "C" void fun3() { kernel_ptr = ckernel; kernel_ptr = &ckernel; @@ -119,11 +119,10 @@ extern "C" void fun3() { // used with triple chevron. // CHECK-LABEL: define{{.*}}@fun4() -// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr +// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr // CHECK: call noundef i32 @{{.*hipConfigureCall}} -// CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8 -// CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to void ()** -// CHECK: %[[STUB:.*]] = load void ()*, void ()** %[[CAST]], align 8 +// CHECK: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8 +// CHECK: %[[STUB:.*]] = load ptr, ptr %[[HANDLE]], align 8 // CHECK: call void %[[STUB]]() extern "C" void fun4() { kernel_ptr = ckernel; @@ -133,10 +132,9 @@ extern "C" void fun4() { // Check kernel handle is passed to a function. // CHECK-LABEL: define{{.*}}@fun5() -// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr -// CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8 -// CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to i8* -// CHECK: call void @launch(i8* noundef %[[CAST]]) +// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr +// CHECK: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8 +// CHECK: call void @launch(ptr noundef %[[HANDLE]]) extern "C" void fun5() { kernel_ptr = ckernel; launch((void *)kernel_ptr); diff --git a/clang/test/CodeGenCUDA/lambda-noinline.cu b/clang/test/CodeGenCUDA/lambda-noinline.cu index de2196e63f0748..be4c5fe7357d22 100644 --- a/clang/test/CodeGenCUDA/lambda-noinline.cu +++ b/clang/test/CodeGenCUDA/lambda-noinline.cu @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple x86_64-linux-gnu \ // RUN: | FileCheck -check-prefix=HOST %s -// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ // RUN: | FileCheck -check-prefix=DEV %s diff --git a/clang/test/CodeGenCUDA/lambda-reference-var.cu b/clang/test/CodeGenCUDA/lambda-reference-var.cu index 34023bb7603679..ab47a2453bc0b9 100644 --- a/clang/test/CodeGenCUDA/lambda-reference-var.cu +++ b/clang/test/CodeGenCUDA/lambda-reference-var.cu @@ -1,18 +1,18 @@ -// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple x86_64-linux-gnu \ // RUN: | FileCheck -check-prefix=HOST %s -// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ // RUN: | FileCheck -check-prefix=DEV %s #include "Inputs/cuda.h" -// HOST: %[[T1:.*]] = type <{ i32*, i32, [4 x i8] }> -// HOST: %[[T2:.*]] = type { i32*, i32** } -// HOST: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }> -// DEV: %[[T1:.*]] = type { i32* } -// DEV: %[[T2:.*]] = type { i32** } -// DEV: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }> +// HOST: %[[T1:.*]] = type <{ ptr, i32, [4 x i8] }> +// HOST: %[[T2:.*]] = type { ptr, ptr } +// HOST: %[[T3:.*]] = type <{ ptr, i32, [4 x i8] }> +// DEV: %[[T1:.*]] = type { ptr } +// DEV: %[[T2:.*]] = type { ptr } +// DEV: %[[T3:.*]] = type <{ ptr, i32, [4 x i8] }> int global_host_var; __device__ int global_device_var; @@ -20,7 +20,7 @@ template __global__ void kern(F f) { f(); } // DEV-LABEL: @_ZZ27dev_capture_dev_ref_by_copyPiENKUlvE_clEv( -// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) // DEV: store i32 %[[VAL]] __device__ void dev_capture_dev_ref_by_copy(int *out) { int &ref = global_device_var; @@ -37,10 +37,10 @@ __device__ void dev_capture_dev_rval_by_copy(int *out) { } // DEV-LABEL: @_ZZ26dev_capture_dev_ref_by_refPiENKUlvE_clEv( -// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) // DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 -// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) -// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: store i32 %[[VAL2]], ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) +// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) // DEV: store i32 %[[VAL]] __device__ void dev_capture_dev_ref_by_ref(int *out) { int &ref = global_device_var; @@ -48,10 +48,10 @@ __device__ void dev_capture_dev_ref_by_ref(int *out) { } // DEV-LABEL: define{{.*}} void @_Z7dev_refPi( -// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) // DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 -// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) -// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: store i32 %[[VAL2]], ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) +// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) // DEV: store i32 %[[VAL]] __device__ void dev_ref(int *out) { int &ref = global_device_var; @@ -60,10 +60,10 @@ __device__ void dev_ref(int *out) { } // DEV-LABEL: @_ZZ14dev_lambda_refPiENKUlvE_clEv( -// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) // DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 -// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) -// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: store i32 %[[VAL2]], ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) +// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr) // DEV: store i32 %[[VAL]] __device__ void dev_lambda_ref(int *out) { [=](){ @@ -74,7 +74,7 @@ __device__ void dev_lambda_ref(int *out) { } // HOST-LABEL: @_ZZ29host_capture_host_ref_by_copyPiENKUlvE_clEv( -// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var // HOST: store i32 %[[VAL]] void host_capture_host_ref_by_copy(int *out) { int &ref = global_host_var; @@ -82,12 +82,12 @@ void host_capture_host_ref_by_copy(int *out) { } // HOST-LABEL: @_ZZ28host_capture_host_ref_by_refPiENKUlvE_clEv( -// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T2]], %[[T2]]* %this1, i32 0, i32 0 -// HOST: %[[REF:.*]] = load i32*, i32** %[[CAP]] -// HOST: %[[VAL:.*]] = load i32, i32* %[[REF]] +// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T2]], ptr %this1, i32 0, i32 0 +// HOST: %[[REF:.*]] = load ptr, ptr %[[CAP]] +// HOST: %[[VAL:.*]] = load i32, ptr %[[REF]] // HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 -// HOST: store i32 %[[VAL2]], i32* %[[REF]] -// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: store i32 %[[VAL2]], ptr %[[REF]] +// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var // HOST: store i32 %[[VAL]] void host_capture_host_ref_by_ref(int *out) { int &ref = global_host_var; @@ -95,10 +95,10 @@ void host_capture_host_ref_by_ref(int *out) { } // HOST-LABEL: define{{.*}} void @_Z8host_refPi( -// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var // HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 -// HOST: store i32 %[[VAL2]], i32* @global_host_var -// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: store i32 %[[VAL2]], ptr @global_host_var +// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var // HOST: store i32 %[[VAL]] void host_ref(int *out) { int &ref = global_host_var; @@ -107,10 +107,10 @@ void host_ref(int *out) { } // HOST-LABEL: @_ZZ15host_lambda_refPiENKUlvE_clEv( -// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var // HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 -// HOST: store i32 %[[VAL2]], i32* @global_host_var -// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: store i32 %[[VAL2]], ptr @global_host_var +// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var // HOST: store i32 %[[VAL]] void host_lambda_ref(int *out) { [=](){ @@ -121,12 +121,12 @@ void host_lambda_ref(int *out) { } // HOST-LABEL: define{{.*}} void @_Z28dev_capture_host_ref_by_copyPi( -// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %{{.*}}, i32 0, i32 1 -// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var -// HOST: store i32 %[[VAL]], i32* %[[CAP]] +// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T3]], ptr %{{.*}}, i32 0, i32 1 +// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var +// HOST: store i32 %[[VAL]], ptr %[[CAP]] // DEV-LABEL: define internal void @_ZZ28dev_capture_host_ref_by_copyPiENKUlvE_clEv( -// DEV: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %this1, i32 0, i32 1 -// DEV: %[[VAL:.*]] = load i32, i32* %[[CAP]] +// DEV: %[[CAP:.*]] = getelementptr inbounds %[[T3]], ptr %this1, i32 0, i32 1 +// DEV: %[[VAL:.*]] = load i32, ptr %[[CAP]] // DEV: store i32 %[[VAL]] void dev_capture_host_ref_by_copy(int *out) { int &ref = global_host_var; diff --git a/clang/test/CodeGenCUDA/lambda.cu b/clang/test/CodeGenCUDA/lambda.cu index c2012dc9635589..b620e7d44041c8 100644 --- a/clang/test/CodeGenCUDA/lambda.cu +++ b/clang/test/CodeGenCUDA/lambda.cu @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple x86_64-linux-gnu \ // RUN: | FileCheck -check-prefix=HOST %s -// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ // RUN: | FileCheck -check-prefix=DEV %s @@ -43,7 +43,7 @@ // DEV-LABEL: define{{.*}} amdgpu_kernel void @_Z1gIZ12test_capturevEUlvE_EvT_ // DEV: call void @_ZZ12test_capturevENKUlvE_clEv // DEV-LABEL: define internal void @_ZZ12test_capturevENKUlvE_clEv -// DEV: store i32 1, i32* addrspacecast (i32 addrspace(1)* @a to i32*) +// DEV: store i32 1, ptr addrspacecast (ptr addrspace(1) @a to ptr) // Check functions emitted for test_resolve in device compilation. // Check device version of template function 'overloaded' is emitted and called diff --git a/clang/test/CodeGenCUDA/launch-bounds.cu b/clang/test/CodeGenCUDA/launch-bounds.cu index b722119a21a0f4..58bcc410201f35 100644 --- a/clang/test/CodeGenCUDA/launch-bounds.cu +++ b/clang/test/CodeGenCUDA/launch-bounds.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -no-opaque-pointers %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s #include "Inputs/cuda.h" @@ -14,8 +14,8 @@ Kernel1() } } -// CHECK: !{{[0-9]+}} = !{void ()* @Kernel1, !"maxntidx", i32 256} -// CHECK: !{{[0-9]+}} = !{void ()* @Kernel1, !"minctasm", i32 2} +// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256} +// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2} // Test only max threads per block. Min cta per sm defaults to 0, and // CodeGen doesn't output a zero value for minctasm. @@ -27,7 +27,7 @@ Kernel2() } } -// CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256} +// CHECK: !{{[0-9]+}} = !{ptr @Kernel2, !"maxntidx", i32 256} template __global__ void @@ -37,7 +37,7 @@ Kernel3() } template __global__ void Kernel3(); -// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256} +// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256} template __global__ void @@ -47,8 +47,8 @@ Kernel4() } template __global__ void Kernel4(); -// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256} -// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2} +// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256} +// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2} const int constint = 100; template @@ -60,8 +60,8 @@ Kernel5() } template __global__ void Kernel5(); -// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356} -// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258} +// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356} +// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258} // Make sure we don't emit negative launch bounds values. __global__ void @@ -69,18 +69,18 @@ __launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP ) Kernel6() { } -// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"maxntidx", -// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm", +// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx", +// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"minctasm", __global__ void __launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP ) Kernel7() { } -// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"maxntidx", -// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"minctasm", +// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"maxntidx", +// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"minctasm", const char constchar = 12; __global__ void __launch_bounds__(constint, constchar) Kernel8() {} -// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100 -// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12 +// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100 +// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12 diff --git a/clang/test/CodeGenCUDA/llvm-used.cu b/clang/test/CodeGenCUDA/llvm-used.cu index 521a39f17ee09e..c39111dd480366 100644 --- a/clang/test/CodeGenCUDA/llvm-used.cu +++ b/clang/test/CodeGenCUDA/llvm-used.cu @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck %s // Make sure we emit the proper addrspacecast for llvm.used. PR22383 exposed an // issue where we were generating a bitcast instead of an addrspacecast. -// CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast ([0 x i32] addrspace(1)* @a to i8 addrspace(1)*) to i8*)], section "llvm.metadata" +// CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @a to ptr)], section "llvm.metadata" __attribute__((device)) __attribute__((__used__)) int a[] = {}; diff --git a/clang/test/CodeGenCUDA/printf.cu b/clang/test/CodeGenCUDA/printf.cu index 56233e46dcf17d..396181820dde8a 100644 --- a/clang/test/CodeGenCUDA/printf.cu +++ b/clang/test/CodeGenCUDA/printf.cu @@ -1,7 +1,7 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm \ +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm \ // RUN: -o - %s | FileCheck %s #include "Inputs/cuda.h" @@ -14,20 +14,19 @@ __device__ int CheckSimple() { // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]] // CHECK: [[FMT:%[0-9]+]] = load{{.*}}%fmt const char* fmt = "%d %lld %f"; - // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 0 - // CHECK: store i32 1, i32* [[PTR0]], align 4 - // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 1 - // CHECK: store i64 2, i64* [[PTR1]], align 8 - // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 2 - // CHECK: store double 3.0{{[^,]*}}, double* [[PTR2]], align 8 - // CHECK: [[BUF_CAST:%[0-9]+]] = bitcast [[SIMPLE_PRINTF_TY]]* [[BUF]] to i8* - // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(i8* [[FMT]], i8* [[BUF_CAST]]) + // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], ptr [[BUF]], i32 0, i32 0 + // CHECK: store i32 1, ptr [[PTR0]], align 4 + // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], ptr [[BUF]], i32 0, i32 1 + // CHECK: store i64 2, ptr [[PTR1]], align 8 + // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], ptr [[BUF]], i32 0, i32 2 + // CHECK: store double 3.0{{[^,]*}}, ptr [[PTR2]], align 8 + // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(ptr [[FMT]], ptr [[BUF]]) // CHECK: ret i32 [[RET]] return printf(fmt, 1, 2ll, 3.0); } __device__ void CheckNoArgs() { - // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}} + // CHECK: call i32 @vprintf({{.*}}, ptr null){{$}} printf("hello, world!"); } diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu index 1941245c42384b..b7172b77369296 100644 --- a/clang/test/CodeGenCUDA/ptx-kernels.cu +++ b/clang/test/CodeGenCUDA/ptx-kernels.cu @@ -2,7 +2,7 @@ // annotations and are added to @llvm.used to prevent their elimination. // REQUIRES: nvptx-registered-target // -// RUN: %clang_cc1 -no-opaque-pointers %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s #include "Inputs/cuda.h" @@ -31,5 +31,5 @@ void host_function() { anonymous_ns_kernel<<<0,0>>>(); } -// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1} -// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1} +// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1} +// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1} diff --git a/clang/test/CodeGenCUDA/redux-builtins.cu b/clang/test/CodeGenCUDA/redux-builtins.cu index 2b295944c6ad87..a6c83945ab156e 100644 --- a/clang/test/CodeGenCUDA/redux-builtins.cu +++ b/clang/test/CodeGenCUDA/redux-builtins.cu @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -no-opaque-pointers "-triple" "nvptx-nvidia-cuda" "-target-feature" "+ptx70" "-target-cpu" "sm_80" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s -// RUN: %clang_cc1 -no-opaque-pointers "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx70" "-target-cpu" "sm_80" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s +// RUN: %clang_cc1 "-triple" "nvptx-nvidia-cuda" "-target-feature" "+ptx70" "-target-cpu" "sm_80" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s +// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx70" "-target-cpu" "sm_80" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s -// CHECK: define{{.*}} void @_Z6kernelPi(i32* noundef %out) +// CHECK: define{{.*}} void @_Z6kernelPi(ptr noundef %out) __attribute__((global)) void kernel(int *out) { int a = 1; unsigned int b = 5;