diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h index fb0ece96e1418..19ce7a5d2c86b 100644 --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -26,6 +26,7 @@ #define __managed__ __attribute__((managed)) #define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__))) +#define __no_cluster__ __attribute__((no_cluster)) #if !defined(__cplusplus) || __cplusplus < 201103L #define nullptr NULL; diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl new file mode 100644 index 0000000000000..be822a6e55f58 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl @@ -0,0 +1,47 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 5 +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1250 -disable-llvm-passes -fno-ident -emit-llvm %s -o - | FileCheck %s + +kernel void foo(global int *p) { *p = 1; } +// CHECK: Function Attrs: convergent norecurse nounwind +// CHECK-LABEL: define dso_local amdgpu_kernel void @foo( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: call void @__clang_ocl_kern_imp_foo(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: alwaysinline convergent norecurse nounwind +// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_foo( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4, !tbaa [[TBAA12:![0-9]+]] +// CHECK-NEXT: ret void +// +//. +// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" } +// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" } +// CHECK: attributes #[[ATTR2]] = { convergent nounwind } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META2:![0-9]+]] = !{i32 2, i32 0} +// CHECK: [[META3]] = !{i32 1} +// CHECK: [[META4]] = !{!"none"} +// CHECK: [[META5]] = !{!"int*"} +// CHECK: [[META6]] = !{!""} +// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"p1 int", [[META9:![0-9]+]], i64 0} +// CHECK: [[META9]] = !{!"any pointer", [[META10:![0-9]+]], i64 0} +// CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0} +// CHECK: [[META11]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0} +// CHECK: [[META13]] = !{!"int", [[META10]], i64 0} +//.