Skip to content

Commit

Permalink
[Clang] Update some CUDA tests to opaque pointers (NFC)
Browse files Browse the repository at this point in the history
  • Loading branch information
nikic committed Dec 13, 2022
1 parent 0dfe53b commit 0419465
Show file tree
Hide file tree
Showing 25 changed files with 491 additions and 495 deletions.
26 changes: 13 additions & 13 deletions clang/test/CodeGenCUDA/address-spaces.cu
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s
// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck %s
// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s
// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck %s

// Verifies Clang emits correct address spaces and addrspacecast instructions
// for CUDA code.
Expand Down Expand Up @@ -28,17 +28,17 @@ struct MyStruct {
// CHECK: @b ={{.*}} addrspace(3) global float undef

__device__ void foo() {
// CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)
// CHECK: load i32, ptr addrspacecast (ptr addrspace(1) @i to ptr)
i++;

// CHECK: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*)
// CHECK: load i32, ptr addrspacecast (ptr addrspace(4) @j to ptr)
j++;

// CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*)
// CHECK: load i32, ptr addrspacecast (ptr addrspace(3) @k to ptr)
k++;

__shared__ int lk;
// CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*)
// CHECK: load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ3foovE2lk to ptr)
lk++;
}

Expand All @@ -49,7 +49,7 @@ __device__ void func0() {
ap->data2 = 2;
}
// CHECK: define{{.*}} void @_Z5func0v()
// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %{{.*}}
// CHECK: store ptr addrspacecast (ptr addrspace(3) @_ZZ5func0vE1a to ptr), ptr %{{.*}}

__device__ void callee(float *ap) {
*ap = 1.0f;
Expand All @@ -60,36 +60,36 @@ __device__ void func1() {
callee(&a); // implicit cast from parameters
}
// CHECK: define{{.*}} void @_Z5func1v()
// CHECK: call void @_Z6calleePf(float* noundef addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*))
// CHECK: call void @_Z6calleePf(ptr noundef addrspacecast (ptr addrspace(3) @_ZZ5func1vE1a to ptr))

__device__ void func2() {
__shared__ float a[256];
float *ap = &a[128]; // implicit cast from a decayed array
*ap = 1.0f;
}
// CHECK: define{{.*}} void @_Z5func2v()
// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i{{32|64}} 0, i{{32|64}} 128), float** %{{.*}}
// CHECK: store ptr getelementptr inbounds ([256 x float], ptr addrspacecast (ptr addrspace(3) @_ZZ5func2vE1a to ptr), i{{32|64}} 0, i{{32|64}} 128), ptr %{{.*}}

__device__ void func3() {
__shared__ float a;
float *ap = reinterpret_cast<float *>(&a); // explicit cast
*ap = 1.0f;
}
// CHECK: define{{.*}} void @_Z5func3v()
// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %{{.*}}
// CHECK: store ptr addrspacecast (ptr addrspace(3) @_ZZ5func3vE1a to ptr), ptr %{{.*}}

__device__ void func4() {
__shared__ float a;
float *ap = (float *)&a; // explicit c-style cast
*ap = 1.0f;
}
// CHECK: define{{.*}} void @_Z5func4v()
// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %{{.*}}
// CHECK: store ptr addrspacecast (ptr addrspace(3) @_ZZ5func4vE1a to ptr), ptr %{{.*}}

__shared__ float b;

__device__ float *func5() {
return &b; // implicit cast from a return value
}
// CHECK: define{{.*}} float* @_Z5func5v()
// CHECK: ret float* addrspacecast (float addrspace(3)* @b to float*)
// CHECK: define{{.*}} ptr @_Z5func5v()
// CHECK: ret ptr addrspacecast (ptr addrspace(3) @b to ptr)
12 changes: 6 additions & 6 deletions clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -no-opaque-pointers %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
// RUN: -fnative-half-arguments-and-returns | FileCheck %s

Expand All @@ -9,33 +9,33 @@

__device__ float ffp1(float *p) {
// CHECK-LABEL: @_Z4ffp1Pf
// CHECK: atomicrmw fadd float* {{.*}} monotonic
// CHECK: atomicrmw fadd ptr {{.*}} monotonic
return __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
}

__device__ double ffp2(double *p) {
// CHECK-LABEL: @_Z4ffp2Pd
// CHECK: atomicrmw fsub double* {{.*}} monotonic
// CHECK: atomicrmw fsub ptr {{.*}} monotonic
return __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
}

// long double is the same as double for amdgcn.
__device__ long double ffp3(long double *p) {
// CHECK-LABEL: @_Z4ffp3Pe
// CHECK: atomicrmw fsub double* {{.*}} monotonic
// CHECK: atomicrmw fsub ptr {{.*}} monotonic
return __atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
}

__device__ double ffp4(double *p, float f) {
// CHECK-LABEL: @_Z4ffp4Pdf
// CHECK: fpext float {{.*}} to double
// CHECK: atomicrmw fsub double* {{.*}} monotonic
// CHECK: atomicrmw fsub ptr {{.*}} monotonic
return __atomic_fetch_sub(p, f, memory_order_relaxed);
}

__device__ double ffp5(double *p, int i) {
// CHECK-LABEL: @_Z4ffp5Pdi
// CHECK: sitofp i32 {{.*}} to double
// CHECK: atomicrmw fsub double* {{.*}} monotonic
// CHECK: atomicrmw fsub ptr {{.*}} monotonic
return __atomic_fetch_sub(p, i, memory_order_relaxed);
}
32 changes: 16 additions & 16 deletions clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,31 +1,31 @@
// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa \
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=PRECOV5 %s


// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa \
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s

#include "Inputs/cuda.h"

// PRECOV5-LABEL: test_get_workgroup_size
// PRECOV5: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
// PRECOV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 4
// PRECOV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// PRECOV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 6
// PRECOV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// PRECOV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 8
// PRECOV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// PRECOV5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
// PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
// PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
// PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load

// COV5-LABEL: test_get_workgroup_size
// COV5: call align 8 dereferenceable(256) i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
// COV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 12
// COV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// COV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 14
// COV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// COV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 16
// COV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// COV5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
__device__ void test_get_workgroup_size(int d, int *out)
{
switch (d) {
Expand Down

0 comments on commit 0419465

Please sign in to comment.