diff --git a/clang/test/CodeGenOpenCL/intel-bfloat16-conversions.cl b/clang/test/CodeGenOpenCL/intel-bfloat16-conversions.cl new file mode 100644 index 0000000000000..d0a0e97150ad6 --- /dev/null +++ b/clang/test/CodeGenOpenCL/intel-bfloat16-conversions.cl @@ -0,0 +1,162 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -fdeclare-opencl-builtins -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s + +// CHECK-LABEL: define dso_local spir_func zeroext i16 @test_convert_bfloat16_as_ushort( +// CHECK-SAME: float noundef [[SOURCE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: store float [[SOURCE]], ptr [[SOURCE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SOURCE_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func zeroext i16 @_Z32intel_convert_bfloat16_as_ushortf(float noundef [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret i16 [[CALL]] +// +ushort test_convert_bfloat16_as_ushort(float source) { + return intel_convert_bfloat16_as_ushort(source); +} + +// CHECK-LABEL: define dso_local spir_func <2 x i16> @test_convert_bfloat162_as_ushort2( +// CHECK-SAME: <2 x float> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <2 x float>, align 8 +// CHECK-NEXT: store <2 x float> [[SOURCE]], ptr [[SOURCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[SOURCE_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <2 x i16> @_Z34intel_convert_bfloat162_as_ushort2Dv2_f(<2 x float> noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: ret <2 x i16> [[CALL]] +// +ushort2 test_convert_bfloat162_as_ushort2(float2 source) { + return intel_convert_bfloat162_as_ushort2(source); +} + +// CHECK-LABEL: define dso_local spir_func <3 x i16> @test_convert_bfloat163_as_ushort3( +// CHECK-SAME: <3 x float> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <3 x float>, align 16 +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[SOURCE]], <3 x float> undef, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr [[SOURCE_ADDR]], align 16 +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr [[SOURCE_ADDR]], align 16 +// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <3 x i16> @_Z34intel_convert_bfloat163_as_ushort3Dv3_f(<3 x float> noundef [[EXTRACTVEC1]]) #[[ATTR2]] +// CHECK-NEXT: ret <3 x i16> [[CALL]] +// +ushort3 test_convert_bfloat163_as_ushort3(float3 source) { + return intel_convert_bfloat163_as_ushort3(source); +} + +// CHECK-LABEL: define dso_local spir_func <4 x i16> @test_convert_bfloat164_as_ushort4( +// CHECK-SAME: <4 x float> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[SOURCE]], ptr [[SOURCE_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[SOURCE_ADDR]], align 16 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <4 x i16> @_Z34intel_convert_bfloat164_as_ushort4Dv4_f(<4 x float> noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: ret <4 x i16> [[CALL]] +// +ushort4 test_convert_bfloat164_as_ushort4(float4 source) { + return intel_convert_bfloat164_as_ushort4(source); +} + +// CHECK-LABEL: define dso_local spir_func <8 x i16> @test_convert_bfloat168_as_ushort8( +// CHECK-SAME: <8 x float> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <8 x float>, align 32 +// CHECK-NEXT: store <8 x float> [[SOURCE]], ptr [[SOURCE_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[SOURCE_ADDR]], align 32 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <8 x i16> @_Z34intel_convert_bfloat168_as_ushort8Dv8_f(<8 x float> noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: ret <8 x i16> [[CALL]] +// +ushort8 test_convert_bfloat168_as_ushort8(float8 source) { + return intel_convert_bfloat168_as_ushort8(source); +} + +// CHECK-LABEL: define dso_local spir_func <16 x i16> @test_convert_bfloat1616_as_ushort16( +// CHECK-SAME: <16 x float> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <16 x float>, align 64 +// CHECK-NEXT: store <16 x float> [[SOURCE]], ptr [[SOURCE_ADDR]], align 64 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[SOURCE_ADDR]], align 64 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <16 x i16> @_Z36intel_convert_bfloat1616_as_ushort16Dv16_f(<16 x float> noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: ret <16 x i16> [[CALL]] +// +ushort16 test_convert_bfloat1616_as_ushort16(float16 source) { + return intel_convert_bfloat1616_as_ushort16(source); +} + +// CHECK-LABEL: define dso_local spir_func float @test_convert_as_bfloat16_float( +// CHECK-SAME: i16 noundef zeroext [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NEXT: store i16 [[SOURCE]], ptr [[SOURCE_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SOURCE_ADDR]], align 2 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func float @_Z31intel_convert_as_bfloat16_floatt(i16 noundef zeroext [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: ret float [[CALL]] +// +float test_convert_as_bfloat16_float(ushort source) { + return intel_convert_as_bfloat16_float(source); +} + +// CHECK-LABEL: define dso_local spir_func <2 x float> @test_convert_as_bfloat162_float2( +// CHECK-SAME: <2 x i16> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <2 x i16>, align 4 +// CHECK-NEXT: store <2 x i16> [[SOURCE]], ptr [[SOURCE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[SOURCE_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <2 x float> @_Z33intel_convert_as_bfloat162_float2Dv2_t(<2 x i16> noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: ret <2 x float> [[CALL]] +// +float2 test_convert_as_bfloat162_float2(ushort2 source) { + return intel_convert_as_bfloat162_float2(source); +} + +// CHECK-LABEL: define dso_local spir_func <3 x float> @test_convert_as_bfloat163_float3( +// CHECK-SAME: <3 x i16> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <3 x i16>, align 8 +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[SOURCE]], <3 x i16> undef, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr [[SOURCE_ADDR]], align 8 +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i16>, ptr [[SOURCE_ADDR]], align 8 +// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i16> [[LOADVECN]], <4 x i16> poison, <3 x i32> +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <3 x float> @_Z33intel_convert_as_bfloat163_float3Dv3_t(<3 x i16> noundef [[EXTRACTVEC1]]) #[[ATTR2]] +// CHECK-NEXT: ret <3 x float> [[CALL]] +// +float3 test_convert_as_bfloat163_float3(ushort3 source) { + return intel_convert_as_bfloat163_float3(source); +} + +// CHECK-LABEL: define dso_local spir_func <4 x float> @test_convert_as_bfloat164_float4( +// CHECK-SAME: <4 x i16> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <4 x i16>, align 8 +// CHECK-NEXT: store <4 x i16> [[SOURCE]], ptr [[SOURCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[SOURCE_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <4 x float> @_Z33intel_convert_as_bfloat164_float4Dv4_t(<4 x i16> noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: ret <4 x float> [[CALL]] +// +float4 test_convert_as_bfloat164_float4(ushort4 source) { + return intel_convert_as_bfloat164_float4(source); +} + +// CHECK-LABEL: define dso_local spir_func <8 x float> @test_convert_as_bfloat168_float8( +// CHECK-SAME: <8 x i16> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[SOURCE]], ptr [[SOURCE_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[SOURCE_ADDR]], align 16 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <8 x float> @_Z33intel_convert_as_bfloat168_float8Dv8_t(<8 x i16> noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: ret <8 x float> [[CALL]] +// +float8 test_convert_as_bfloat168_float8(ushort8 source) { + return intel_convert_as_bfloat168_float8(source); +} + +// CHECK-LABEL: define dso_local spir_func <16 x float> @test_convert_as_bfloat1616_float16( +// CHECK-SAME: <16 x i16> noundef [[SOURCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SOURCE_ADDR:%.*]] = alloca <16 x i16>, align 32 +// CHECK-NEXT: store <16 x i16> [[SOURCE]], ptr [[SOURCE_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i16>, ptr [[SOURCE_ADDR]], align 32 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <16 x float> @_Z35intel_convert_as_bfloat1616_float16Dv16_t(<16 x i16> noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: ret <16 x float> [[CALL]] +// +float16 test_convert_as_bfloat1616_float16(ushort16 source) { + return intel_convert_as_bfloat1616_float16(source); +} diff --git a/clang/test/CodeGenOpenCL/intel-subgroup-buffer-prefetch-builtins.cl b/clang/test/CodeGenOpenCL/intel-subgroup-buffer-prefetch-builtins.cl new file mode 100644 index 0000000000000..6a543431e5751 --- /dev/null +++ b/clang/test/CodeGenOpenCL/intel-subgroup-buffer-prefetch-builtins.cl @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -fdeclare-opencl-builtins -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s + +// CHECK-LABEL: define dso_local spir_func void @test_block_prefetch_ui( +// CHECK-SAME: ptr addrspace(1) noundef [[IN:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z33intel_sub_group_block_prefetch_uiPU3AS1Kj(ptr addrspace(1) noundef [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_ui2PU3AS1Kj(ptr addrspace(1) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_ui4PU3AS1Kj(ptr addrspace(1) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_ui8PU3AS1Kj(ptr addrspace(1) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_prefetch_ui(const __global uint *in) { + intel_sub_group_block_prefetch_ui(in); + intel_sub_group_block_prefetch_ui2(in); + intel_sub_group_block_prefetch_ui4(in); + intel_sub_group_block_prefetch_ui8(in); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_prefetch_us( +// CHECK-SAME: ptr addrspace(1) noundef [[IN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z33intel_sub_group_block_prefetch_usPU3AS1Kt(ptr addrspace(1) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_us2PU3AS1Kt(ptr addrspace(1) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_us4PU3AS1Kt(ptr addrspace(1) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_us8PU3AS1Kt(ptr addrspace(1) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z35intel_sub_group_block_prefetch_us16PU3AS1Kt(ptr addrspace(1) noundef [[TMP4]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_prefetch_us(const __global ushort *in) { + intel_sub_group_block_prefetch_us(in); + intel_sub_group_block_prefetch_us2(in); + intel_sub_group_block_prefetch_us4(in); + intel_sub_group_block_prefetch_us8(in); + intel_sub_group_block_prefetch_us16(in); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_prefetch_uc( +// CHECK-SAME: ptr addrspace(1) noundef [[IN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z33intel_sub_group_block_prefetch_ucPU3AS1Kh(ptr addrspace(1) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_uc2PU3AS1Kh(ptr addrspace(1) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_uc4PU3AS1Kh(ptr addrspace(1) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_uc8PU3AS1Kh(ptr addrspace(1) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z35intel_sub_group_block_prefetch_uc16PU3AS1Kh(ptr addrspace(1) noundef [[TMP4]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_prefetch_uc(const __global uchar *in) { + intel_sub_group_block_prefetch_uc(in); + intel_sub_group_block_prefetch_uc2(in); + intel_sub_group_block_prefetch_uc4(in); + intel_sub_group_block_prefetch_uc8(in); + intel_sub_group_block_prefetch_uc16(in); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_prefetch_ul( +// CHECK-SAME: ptr addrspace(1) noundef [[IN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z33intel_sub_group_block_prefetch_ulPU3AS1Km(ptr addrspace(1) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_ul2PU3AS1Km(ptr addrspace(1) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_ul4PU3AS1Km(ptr addrspace(1) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z34intel_sub_group_block_prefetch_ul8PU3AS1Km(ptr addrspace(1) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_prefetch_ul(const __global ulong *in) { + intel_sub_group_block_prefetch_ul(in); + intel_sub_group_block_prefetch_ul2(in); + intel_sub_group_block_prefetch_ul4(in); + intel_sub_group_block_prefetch_ul8(in); +} diff --git a/clang/test/CodeGenOpenCL/intel-subgroup-local-block-io-builtins.cl b/clang/test/CodeGenOpenCL/intel-subgroup-local-block-io-builtins.cl new file mode 100644 index 0000000000000..63253ceb40384 --- /dev/null +++ b/clang/test/CodeGenOpenCL/intel-subgroup-local-block-io-builtins.cl @@ -0,0 +1,319 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -fdeclare-opencl-builtins -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_local( +// CHECK-SAME: ptr addrspace(3) noundef [[IN:%.*]], ptr addrspace(3) noundef [[OUT:%.*]], i32 noundef [[VALUE:%.*]], <2 x i32> noundef [[VALUE2:%.*]], <4 x i32> noundef [[VALUE4:%.*]], <8 x i32> noundef [[VALUE8:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[V2:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[V4:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[V8:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: store ptr addrspace(3) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: store ptr addrspace(3) [[OUT]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store i32 [[VALUE]], ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[VALUE2]], ptr [[VALUE2_ADDR]], align 8 +// CHECK-NEXT: store <4 x i32> [[VALUE4]], ptr [[VALUE4_ADDR]], align 16 +// CHECK-NEXT: store <8 x i32> [[VALUE8]], ptr [[VALUE8_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i32 @_Z26intel_sub_group_block_readPU3AS3Kj(ptr addrspace(3) noundef [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: store i32 [[CALL]], ptr [[V]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i32> @_Z27intel_sub_group_block_read2PU3AS3Kj(ptr addrspace(3) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: store <2 x i32> [[CALL1]], ptr [[V2]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i32> @_Z27intel_sub_group_block_read4PU3AS3Kj(ptr addrspace(3) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: store <4 x i32> [[CALL2]], ptr [[V4]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i32> @_Z27intel_sub_group_block_read8PU3AS3Kj(ptr addrspace(3) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: store <8 x i32> [[CALL3]], ptr [[V8]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z27intel_sub_group_block_writePU3AS3jj(ptr addrspace(3) noundef [[TMP4]], i32 noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUE2_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z28intel_sub_group_block_write2PU3AS3jDv2_j(ptr addrspace(3) noundef [[TMP6]], <2 x i32> noundef [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr [[VALUE4_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z28intel_sub_group_block_write4PU3AS3jDv4_j(ptr addrspace(3) noundef [[TMP8]], <4 x i32> noundef [[TMP9]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i32>, ptr [[VALUE8_ADDR]], align 32 +// CHECK-NEXT: call spir_func void @_Z28intel_sub_group_block_write8PU3AS3jDv8_j(ptr addrspace(3) noundef [[TMP10]], <8 x i32> noundef [[TMP11]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_local(const __local uint *in, __local uint *out, + uint value, uint2 value2, uint4 value4, + uint8 value8) { + uint v = intel_sub_group_block_read(in); + uint2 v2 = intel_sub_group_block_read2(in); + uint4 v4 = intel_sub_group_block_read4(in); + uint8 v8 = intel_sub_group_block_read8(in); + + intel_sub_group_block_write(out, value); + intel_sub_group_block_write2(out, value2); + intel_sub_group_block_write4(out, value4); + intel_sub_group_block_write8(out, value8); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_ui_local( +// CHECK-SAME: ptr addrspace(3) noundef [[IN:%.*]], ptr addrspace(3) noundef [[OUT:%.*]], i32 noundef [[VALUE:%.*]], <2 x i32> noundef [[VALUE2:%.*]], <4 x i32> noundef [[VALUE4:%.*]], <8 x i32> noundef [[VALUE8:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[V2:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[V4:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[V8:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: store ptr addrspace(3) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: store ptr addrspace(3) [[OUT]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store i32 [[VALUE]], ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[VALUE2]], ptr [[VALUE2_ADDR]], align 8 +// CHECK-NEXT: store <4 x i32> [[VALUE4]], ptr [[VALUE4_ADDR]], align 16 +// CHECK-NEXT: store <8 x i32> [[VALUE8]], ptr [[VALUE8_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i32 @_Z29intel_sub_group_block_read_uiPU3AS3Kj(ptr addrspace(3) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: store i32 [[CALL]], ptr [[V]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i32> @_Z30intel_sub_group_block_read_ui2PU3AS3Kj(ptr addrspace(3) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: store <2 x i32> [[CALL1]], ptr [[V2]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i32> @_Z30intel_sub_group_block_read_ui4PU3AS3Kj(ptr addrspace(3) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: store <4 x i32> [[CALL2]], ptr [[V4]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i32> @_Z30intel_sub_group_block_read_ui8PU3AS3Kj(ptr addrspace(3) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: store <8 x i32> [[CALL3]], ptr [[V8]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_uiPU3AS3jj(ptr addrspace(3) noundef [[TMP4]], i32 noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUE2_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ui2PU3AS3jDv2_j(ptr addrspace(3) noundef [[TMP6]], <2 x i32> noundef [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr [[VALUE4_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ui4PU3AS3jDv4_j(ptr addrspace(3) noundef [[TMP8]], <4 x i32> noundef [[TMP9]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i32>, ptr [[VALUE8_ADDR]], align 32 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ui8PU3AS3jDv8_j(ptr addrspace(3) noundef [[TMP10]], <8 x i32> noundef [[TMP11]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_ui_local(const __local uint *in, __local uint *out, + uint value, uint2 value2, uint4 value4, + uint8 value8) { + uint v = intel_sub_group_block_read_ui(in); + uint2 v2 = intel_sub_group_block_read_ui2(in); + uint4 v4 = intel_sub_group_block_read_ui4(in); + uint8 v8 = intel_sub_group_block_read_ui8(in); + + intel_sub_group_block_write_ui(out, value); + intel_sub_group_block_write_ui2(out, value2); + intel_sub_group_block_write_ui4(out, value4); + intel_sub_group_block_write_ui8(out, value8); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_uc_local( +// CHECK-SAME: ptr addrspace(3) noundef [[IN:%.*]], ptr addrspace(3) noundef [[OUT:%.*]], i8 noundef zeroext [[VALUE:%.*]], <2 x i8> noundef [[VALUE2:%.*]], <4 x i8> noundef [[VALUE4:%.*]], <8 x i8> noundef [[VALUE8:%.*]], <16 x i8> noundef [[VALUE16:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i8>, align 2 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i8>, align 4 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i8>, align 8 +// CHECK-NEXT: [[VALUE16_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[V:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[V2:%.*]] = alloca <2 x i8>, align 2 +// CHECK-NEXT: [[V4:%.*]] = alloca <4 x i8>, align 4 +// CHECK-NEXT: [[V8:%.*]] = alloca <8 x i8>, align 8 +// CHECK-NEXT: [[V16:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store ptr addrspace(3) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: store ptr addrspace(3) [[OUT]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store i8 [[VALUE]], ptr [[VALUE_ADDR]], align 1 +// CHECK-NEXT: store <2 x i8> [[VALUE2]], ptr [[VALUE2_ADDR]], align 2 +// CHECK-NEXT: store <4 x i8> [[VALUE4]], ptr [[VALUE4_ADDR]], align 4 +// CHECK-NEXT: store <8 x i8> [[VALUE8]], ptr [[VALUE8_ADDR]], align 8 +// CHECK-NEXT: store <16 x i8> [[VALUE16]], ptr [[VALUE16_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func zeroext i8 @_Z29intel_sub_group_block_read_ucPU3AS3Kh(ptr addrspace(3) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: store i8 [[CALL]], ptr [[V]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc2PU3AS3Kh(ptr addrspace(3) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: store <2 x i8> [[CALL1]], ptr [[V2]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i8> @_Z30intel_sub_group_block_read_uc4PU3AS3Kh(ptr addrspace(3) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: store <4 x i8> [[CALL2]], ptr [[V4]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i8> @_Z30intel_sub_group_block_read_uc8PU3AS3Kh(ptr addrspace(3) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: store <8 x i8> [[CALL3]], ptr [[V8]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc16PU3AS3Kh(ptr addrspace(3) noundef [[TMP4]]) #[[ATTR2]] +// CHECK-NEXT: store <16 x i8> [[CALL4]], ptr [[V16]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[VALUE_ADDR]], align 1 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_ucPU3AS3hh(ptr addrspace(3) noundef [[TMP5]], i8 noundef zeroext [[TMP6]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load <2 x i8>, ptr [[VALUE2_ADDR]], align 2 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_uc2PU3AS3hDv2_h(ptr addrspace(3) noundef [[TMP7]], <2 x i8> noundef [[TMP8]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[VALUE4_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_uc4PU3AS3hDv4_h(ptr addrspace(3) noundef [[TMP9]], <4 x i8> noundef [[TMP10]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i8>, ptr [[VALUE8_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_uc8PU3AS3hDv8_h(ptr addrspace(3) noundef [[TMP11]], <8 x i8> noundef [[TMP12]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP13:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load <16 x i8>, ptr [[VALUE16_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z32intel_sub_group_block_write_uc16PU3AS3hDv16_h(ptr addrspace(3) noundef [[TMP13]], <16 x i8> noundef [[TMP14]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_uc_local(const __local uchar *in, __local uchar *out, + uchar value, uchar2 value2, uchar4 value4, + uchar8 value8, uchar16 value16) { + uchar v = intel_sub_group_block_read_uc(in); + uchar2 v2 = intel_sub_group_block_read_uc2(in); + uchar4 v4 = intel_sub_group_block_read_uc4(in); + uchar8 v8 = intel_sub_group_block_read_uc8(in); + uchar16 v16 = intel_sub_group_block_read_uc16(in); + + intel_sub_group_block_write_uc(out, value); + intel_sub_group_block_write_uc2(out, value2); + intel_sub_group_block_write_uc4(out, value4); + intel_sub_group_block_write_uc8(out, value8); + intel_sub_group_block_write_uc16(out, value16); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_us_local( +// CHECK-SAME: ptr addrspace(3) noundef [[IN:%.*]], ptr addrspace(3) noundef [[OUT:%.*]], i16 noundef zeroext [[VALUE:%.*]], <2 x i16> noundef [[VALUE2:%.*]], <4 x i16> noundef [[VALUE4:%.*]], <8 x i16> noundef [[VALUE8:%.*]], <16 x i16> noundef [[VALUE16:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i16>, align 4 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i16>, align 8 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[VALUE16_ADDR:%.*]] = alloca <16 x i16>, align 32 +// CHECK-NEXT: [[V:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[V2:%.*]] = alloca <2 x i16>, align 4 +// CHECK-NEXT: [[V4:%.*]] = alloca <4 x i16>, align 8 +// CHECK-NEXT: [[V8:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[V16:%.*]] = alloca <16 x i16>, align 32 +// CHECK-NEXT: store ptr addrspace(3) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: store ptr addrspace(3) [[OUT]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store i16 [[VALUE]], ptr [[VALUE_ADDR]], align 2 +// CHECK-NEXT: store <2 x i16> [[VALUE2]], ptr [[VALUE2_ADDR]], align 4 +// CHECK-NEXT: store <4 x i16> [[VALUE4]], ptr [[VALUE4_ADDR]], align 8 +// CHECK-NEXT: store <8 x i16> [[VALUE8]], ptr [[VALUE8_ADDR]], align 16 +// CHECK-NEXT: store <16 x i16> [[VALUE16]], ptr [[VALUE16_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func zeroext i16 @_Z29intel_sub_group_block_read_usPU3AS3Kt(ptr addrspace(3) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: store i16 [[CALL]], ptr [[V]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i16> @_Z30intel_sub_group_block_read_us2PU3AS3Kt(ptr addrspace(3) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: store <2 x i16> [[CALL1]], ptr [[V2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i16> @_Z30intel_sub_group_block_read_us4PU3AS3Kt(ptr addrspace(3) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: store <4 x i16> [[CALL2]], ptr [[V4]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i16> @_Z30intel_sub_group_block_read_us8PU3AS3Kt(ptr addrspace(3) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: store <8 x i16> [[CALL3]], ptr [[V8]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us16PU3AS3Kt(ptr addrspace(3) noundef [[TMP4]]) #[[ATTR2]] +// CHECK-NEXT: store <16 x i16> [[CALL4]], ptr [[V16]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[VALUE_ADDR]], align 2 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_usPU3AS3tt(ptr addrspace(3) noundef [[TMP5]], i16 noundef zeroext [[TMP6]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load <2 x i16>, ptr [[VALUE2_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_us2PU3AS3tDv2_t(ptr addrspace(3) noundef [[TMP7]], <2 x i16> noundef [[TMP8]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i16>, ptr [[VALUE4_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_us4PU3AS3tDv4_t(ptr addrspace(3) noundef [[TMP9]], <4 x i16> noundef [[TMP10]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i16>, ptr [[VALUE8_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_us8PU3AS3tDv8_t(ptr addrspace(3) noundef [[TMP11]], <8 x i16> noundef [[TMP12]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP13:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load <16 x i16>, ptr [[VALUE16_ADDR]], align 32 +// CHECK-NEXT: call spir_func void @_Z32intel_sub_group_block_write_us16PU3AS3tDv16_t(ptr addrspace(3) noundef [[TMP13]], <16 x i16> noundef [[TMP14]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_us_local(const __local ushort *in, __local ushort *out, + ushort value, ushort2 value2, ushort4 value4, + ushort8 value8, ushort16 value16) { + ushort v = intel_sub_group_block_read_us(in); + ushort2 v2 = intel_sub_group_block_read_us2(in); + ushort4 v4 = intel_sub_group_block_read_us4(in); + ushort8 v8 = intel_sub_group_block_read_us8(in); + ushort16 v16 = intel_sub_group_block_read_us16(in); + + intel_sub_group_block_write_us(out, value); + intel_sub_group_block_write_us2(out, value2); + intel_sub_group_block_write_us4(out, value4); + intel_sub_group_block_write_us8(out, value8); + intel_sub_group_block_write_us16(out, value16); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_ul_local( +// CHECK-SAME: ptr addrspace(3) noundef [[IN:%.*]], ptr addrspace(3) noundef [[OUT:%.*]], i64 noundef [[VALUE:%.*]], <2 x i64> noundef [[VALUE2:%.*]], <4 x i64> noundef [[VALUE4:%.*]], <8 x i64> noundef [[VALUE8:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(3), align 4 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i64>, align 32 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-NEXT: [[V:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[V2:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[V4:%.*]] = alloca <4 x i64>, align 32 +// CHECK-NEXT: [[V8:%.*]] = alloca <8 x i64>, align 64 +// CHECK-NEXT: store ptr addrspace(3) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: store ptr addrspace(3) [[OUT]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store i64 [[VALUE]], ptr [[VALUE_ADDR]], align 8 +// CHECK-NEXT: store <2 x i64> [[VALUE2]], ptr [[VALUE2_ADDR]], align 16 +// CHECK-NEXT: store <4 x i64> [[VALUE4]], ptr [[VALUE4_ADDR]], align 32 +// CHECK-NEXT: store <8 x i64> [[VALUE8]], ptr [[VALUE8_ADDR]], align 64 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i64 @_Z29intel_sub_group_block_read_ulPU3AS3Km(ptr addrspace(3) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: store i64 [[CALL]], ptr [[V]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS3Km(ptr addrspace(3) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: store <2 x i64> [[CALL1]], ptr [[V2]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i64> @_Z30intel_sub_group_block_read_ul4PU3AS3Km(ptr addrspace(3) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: store <4 x i64> [[CALL2]], ptr [[V4]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i64> @_Z30intel_sub_group_block_read_ul8PU3AS3Km(ptr addrspace(3) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: store <8 x i64> [[CALL3]], ptr [[V8]], align 64 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUE_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_ulPU3AS3mm(ptr addrspace(3) noundef [[TMP4]], i64 noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr [[VALUE2_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m(ptr addrspace(3) noundef [[TMP6]], <2 x i64> noundef [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load <4 x i64>, ptr [[VALUE4_ADDR]], align 32 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul4PU3AS3mDv4_m(ptr addrspace(3) noundef [[TMP8]], <4 x i64> noundef [[TMP9]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr addrspace(3), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, ptr [[VALUE8_ADDR]], align 64 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul8PU3AS3mDv8_m(ptr addrspace(3) noundef [[TMP10]], <8 x i64> noundef [[TMP11]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_ul_local(const __local ulong *in, __local ulong *out, + ulong value, ulong2 value2, ulong4 value4, + ulong8 value8) { + ulong v = intel_sub_group_block_read_ul(in); + ulong2 v2 = intel_sub_group_block_read_ul2(in); + ulong4 v4 = intel_sub_group_block_read_ul4(in); + ulong8 v8 = intel_sub_group_block_read_ul8(in); + + intel_sub_group_block_write_ul(out, value); + intel_sub_group_block_write_ul2(out, value2); + intel_sub_group_block_write_ul4(out, value4); + intel_sub_group_block_write_ul8(out, value8); +} diff --git a/clang/test/CodeGenOpenCL/intel-subgroups-builtins.cl b/clang/test/CodeGenOpenCL/intel-subgroups-builtins.cl new file mode 100644 index 0000000000000..c0fb444d375de --- /dev/null +++ b/clang/test/CodeGenOpenCL/intel-subgroups-builtins.cl @@ -0,0 +1,239 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -fdeclare-opencl-builtins -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// CHECK-LABEL: define dso_local spir_func void @test_shuffle( +// CHECK-SAME: i32 noundef [[SCALAR:%.*]], <8 x i32> noundef [[V8:%.*]], <3 x float> noundef [[F3:%.*]], half noundef [[H:%.*]], double noundef [[D:%.*]], i64 noundef [[L:%.*]], i64 noundef [[UL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SCALAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[V8_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[F3_ADDR:%.*]] = alloca <3 x float>, align 16 +// CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[UL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i32 [[SCALAR]], ptr [[SCALAR_ADDR]], align 4 +// CHECK-NEXT: store <8 x i32> [[V8]], ptr [[V8_ADDR]], align 32 +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[F3]], <3 x float> undef, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr [[F3_ADDR]], align 16 +// CHECK-NEXT: store half [[H]], ptr [[H_ADDR]], align 2 +// CHECK-NEXT: store double [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 +// CHECK-NEXT: store i64 [[UL]], ptr [[UL_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCALAR_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i32 @_Z23intel_sub_group_shufflejj(i32 noundef [[TMP0]], i32 noundef 1) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[V8_ADDR]], align 32 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <8 x i32> @_Z23intel_sub_group_shuffleDv8_jj(<8 x i32> noundef [[TMP1]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr [[F3_ADDR]], align 16 +// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <3 x float> @_Z23intel_sub_group_shuffleDv3_fj(<3 x float> noundef [[EXTRACTVEC2]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[H_ADDR]], align 2 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func half @_Z23intel_sub_group_shuffleDhj(half noundef [[TMP2]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[CALL5:%.*]] = call spir_func double @_Z23intel_sub_group_shuffledj(double noundef [[TMP3]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[L_ADDR]], align 8 +// CHECK-NEXT: [[CALL6:%.*]] = call spir_func i64 @_Z23intel_sub_group_shufflelj(i64 noundef [[TMP4]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[UL_ADDR]], align 8 +// CHECK-NEXT: [[CALL7:%.*]] = call spir_func i64 @_Z23intel_sub_group_shufflemj(i64 noundef [[TMP5]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_shuffle(uint scalar, uint8 v8, float3 f3, half h, double d, + long l, ulong ul) { + (void)intel_sub_group_shuffle(scalar, 1u); + (void)intel_sub_group_shuffle(v8, 1u); + (void)intel_sub_group_shuffle(f3, 1u); + (void)intel_sub_group_shuffle(h, 1u); + (void)intel_sub_group_shuffle(d, 1u); + (void)intel_sub_group_shuffle(l, 1u); + (void)intel_sub_group_shuffle(ul, 1u); +} + +// CHECK-LABEL: define dso_local spir_func void @test_shuffle_xor_down_up( +// CHECK-SAME: i32 noundef [[SCALAR:%.*]], <8 x i32> noundef [[V8:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SCALAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[V8_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: store i32 [[SCALAR]], ptr [[SCALAR_ADDR]], align 4 +// CHECK-NEXT: store <8 x i32> [[V8]], ptr [[V8_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCALAR_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i32 @_Z27intel_sub_group_shuffle_xorjj(i32 noundef [[TMP0]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SCALAR_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SCALAR_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func i32 @_Z28intel_sub_group_shuffle_downjjj(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[SCALAR_ADDR]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[SCALAR_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func i32 @_Z26intel_sub_group_shuffle_upjjj(i32 noundef [[TMP3]], i32 noundef [[TMP4]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[V8_ADDR]], align 32 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i32> @_Z27intel_sub_group_shuffle_xorDv8_jj(<8 x i32> noundef [[TMP5]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_shuffle_xor_down_up(uint scalar, uint8 v8) { + (void)intel_sub_group_shuffle_xor(scalar, 1u); + (void)intel_sub_group_shuffle_down(scalar, scalar, 1u); + (void)intel_sub_group_shuffle_up(scalar, scalar, 1u); + (void)intel_sub_group_shuffle_xor(v8, 1u); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_global( +// CHECK-SAME: ptr addrspace(1) noundef [[IN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i32 @_Z26intel_sub_group_block_readPU3AS1Kj(ptr addrspace(1) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i32> @_Z27intel_sub_group_block_read2PU3AS1Kj(ptr addrspace(1) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i32> @_Z27intel_sub_group_block_read4PU3AS1Kj(ptr addrspace(1) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i32> @_Z27intel_sub_group_block_read8PU3AS1Kj(ptr addrspace(1) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func i32 @_Z29intel_sub_group_block_read_uiPU3AS1Kj(ptr addrspace(1) noundef [[TMP4]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_global(const __global uint *in) { + (void)intel_sub_group_block_read(in); + (void)intel_sub_group_block_read2(in); + (void)intel_sub_group_block_read4(in); + (void)intel_sub_group_block_read8(in); + + (void)intel_sub_group_block_read_ui(in); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_image( +// CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[ROIMG:%.*]], target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG:%.*]], <2 x i32> noundef [[COORD:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ROIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), align 4 +// CHECK-NEXT: [[RWIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), align 4 +// CHECK-NEXT: [[COORD_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[ROIMG]], ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG]], ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[COORD]], ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i32 @_Z26intel_sub_group_block_read14ocl_image2d_roDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[TMP0]], <2 x i32> noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i32> @_Z27intel_sub_group_block_read214ocl_image2d_roDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[TMP2]], <2 x i32> noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i32> @_Z27intel_sub_group_block_read414ocl_image2d_rwDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP4]], <2 x i32> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i32> @_Z27intel_sub_group_block_read814ocl_image2d_rwDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP6]], <2 x i32> noundef [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP8:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func i32 @_Z29intel_sub_group_block_read_ui14ocl_image2d_roDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[TMP8]], <2 x i32> noundef [[TMP9]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP10:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL5:%.*]] = call spir_func <4 x i32> @_Z30intel_sub_group_block_read_ui414ocl_image2d_rwDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP10]], <2 x i32> noundef [[TMP11]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_image(read_only image2d_t roimg, + read_write image2d_t rwimg, int2 coord) { + (void)intel_sub_group_block_read(roimg, coord); + (void)intel_sub_group_block_read2(roimg, coord); + (void)intel_sub_group_block_read4(rwimg, coord); + (void)intel_sub_group_block_read8(rwimg, coord); + + (void)intel_sub_group_block_read_ui(roimg, coord); + (void)intel_sub_group_block_read_ui4(rwimg, coord); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_write_global( +// CHECK-SAME: ptr addrspace(1) noundef [[OUT:%.*]], i32 noundef [[VALUE:%.*]], <2 x i32> noundef [[VALUE2:%.*]], <4 x i32> noundef [[VALUE4:%.*]], <8 x i32> noundef [[VALUE8:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store i32 [[VALUE]], ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[VALUE2]], ptr [[VALUE2_ADDR]], align 8 +// CHECK-NEXT: store <4 x i32> [[VALUE4]], ptr [[VALUE4_ADDR]], align 16 +// CHECK-NEXT: store <8 x i32> [[VALUE8]], ptr [[VALUE8_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z27intel_sub_group_block_writePU3AS1jj(ptr addrspace(1) noundef [[TMP0]], i32 noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[VALUE2_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z28intel_sub_group_block_write2PU3AS1jDv2_j(ptr addrspace(1) noundef [[TMP2]], <2 x i32> noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VALUE4_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z28intel_sub_group_block_write4PU3AS1jDv4_j(ptr addrspace(1) noundef [[TMP4]], <4 x i32> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i32>, ptr [[VALUE8_ADDR]], align 32 +// CHECK-NEXT: call spir_func void @_Z28intel_sub_group_block_write8PU3AS1jDv8_j(ptr addrspace(1) noundef [[TMP6]], <8 x i32> noundef [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_uiPU3AS1jj(ptr addrspace(1) noundef [[TMP8]], i32 noundef [[TMP9]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_write_global(__global uint *out, uint value, uint2 value2, + uint4 value4, uint8 value8) { + intel_sub_group_block_write(out, value); + intel_sub_group_block_write2(out, value2); + intel_sub_group_block_write4(out, value4); + intel_sub_group_block_write8(out, value8); + + intel_sub_group_block_write_ui(out, value); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_write_image( +// CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[WOIMG:%.*]], target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG:%.*]], <2 x i32> noundef [[COORD:%.*]], i32 noundef [[VALUE:%.*]], <2 x i32> noundef [[VALUE2:%.*]], <4 x i32> noundef [[VALUE4:%.*]], <8 x i32> noundef [[VALUE8:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WOIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), align 4 +// CHECK-NEXT: [[RWIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), align 4 +// CHECK-NEXT: [[COORD_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[WOIMG]], ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG]], ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[COORD]], ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE]], ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[VALUE2]], ptr [[VALUE2_ADDR]], align 8 +// CHECK-NEXT: store <4 x i32> [[VALUE4]], ptr [[VALUE4_ADDR]], align 16 +// CHECK-NEXT: store <8 x i32> [[VALUE8]], ptr [[VALUE8_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z27intel_sub_group_block_write14ocl_image2d_woDv2_ij(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[TMP0]], <2 x i32> noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[VALUE2_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z28intel_sub_group_block_write214ocl_image2d_woDv2_iDv2_j(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[TMP3]], <2 x i32> noundef [[TMP4]], <2 x i32> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VALUE4_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z28intel_sub_group_block_write414ocl_image2d_rwDv2_iDv4_j(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP6]], <2 x i32> noundef [[TMP7]], <4 x i32> noundef [[TMP8]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP9:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load <8 x i32>, ptr [[VALUE8_ADDR]], align 32 +// CHECK-NEXT: call spir_func void @_Z28intel_sub_group_block_write814ocl_image2d_rwDv2_iDv8_j(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP9]], <2 x i32> noundef [[TMP10]], <8 x i32> noundef [[TMP11]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP12:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_ui14ocl_image2d_woDv2_ij(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[TMP12]], <2 x i32> noundef [[TMP13]], i32 noundef [[TMP14]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP15:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP17:%.*]] = load <4 x i32>, ptr [[VALUE4_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ui414ocl_image2d_rwDv2_iDv4_j(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP15]], <2 x i32> noundef [[TMP16]], <4 x i32> noundef [[TMP17]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_write_image(write_only image2d_t woimg, + read_write image2d_t rwimg, int2 coord, + uint value, uint2 value2, uint4 value4, + uint8 value8) { + intel_sub_group_block_write(woimg, coord, value); + intel_sub_group_block_write2(woimg, coord, value2); + intel_sub_group_block_write4(rwimg, coord, value4); + intel_sub_group_block_write8(rwimg, coord, value8); + + intel_sub_group_block_write_ui(woimg, coord, value); + intel_sub_group_block_write_ui4(rwimg, coord, value4); +} diff --git a/clang/test/CodeGenOpenCL/intel-subgroups-char-builtins.cl b/clang/test/CodeGenOpenCL/intel-subgroups-char-builtins.cl new file mode 100644 index 0000000000000..f5600b16ba22c --- /dev/null +++ b/clang/test/CodeGenOpenCL/intel-subgroups-char-builtins.cl @@ -0,0 +1,185 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -fdeclare-opencl-builtins -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s + +// CHECK-LABEL: define dso_local spir_func void @test_broadcast_shuffle( +// CHECK-SAME: <3 x i8> noundef [[C3:%.*]], <8 x i8> noundef [[UC8:%.*]], <16 x i8> noundef [[C16:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[C3_ADDR:%.*]] = alloca <3 x i8>, align 4 +// CHECK-NEXT: [[UC8_ADDR:%.*]] = alloca <8 x i8>, align 8 +// CHECK-NEXT: [[C16_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[C3]], <3 x i8> undef, <4 x i32> +// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr [[C3_ADDR]], align 4 +// CHECK-NEXT: store <8 x i8> [[UC8]], ptr [[UC8_ADDR]], align 8 +// CHECK-NEXT: store <16 x i8> [[C16]], ptr [[C16_ADDR]], align 16 +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i8>, ptr [[C3_ADDR]], align 4 +// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i8> [[LOADVECN]], <4 x i8> poison, <3 x i32> +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <3 x i8> @_Z25intel_sub_group_broadcastDv3_cj(<3 x i8> noundef [[EXTRACTVEC1]], i32 noundef 1) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[UC8_ADDR]], align 8 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <8 x i8> @_Z25intel_sub_group_broadcastDv8_hj(<8 x i8> noundef [[TMP0]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[C16_ADDR]], align 16 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <16 x i8> @_Z23intel_sub_group_shuffleDv16_cj(<16 x i8> noundef [[TMP1]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[UC8_ADDR]], align 8 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func <8 x i8> @_Z27intel_sub_group_shuffle_xorDv8_hj(<8 x i8> noundef [[TMP2]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[C16_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[C16_ADDR]], align 16 +// CHECK-NEXT: [[CALL5:%.*]] = call spir_func <16 x i8> @_Z28intel_sub_group_shuffle_downDv16_cS_j(<16 x i8> noundef [[TMP3]], <16 x i8> noundef [[TMP4]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr [[UC8_ADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i8>, ptr [[UC8_ADDR]], align 8 +// CHECK-NEXT: [[CALL6:%.*]] = call spir_func <8 x i8> @_Z26intel_sub_group_shuffle_upDv8_hS_j(<8 x i8> noundef [[TMP5]], <8 x i8> noundef [[TMP6]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_broadcast_shuffle(char3 c3, uchar8 uc8, char16 c16) { + (void)intel_sub_group_broadcast(c3, 1u); + (void)intel_sub_group_broadcast(uc8, 1u); + (void)intel_sub_group_shuffle(c16, 1u); + (void)intel_sub_group_shuffle_xor(uc8, 1u); + (void)intel_sub_group_shuffle_down(c16, c16, 1u); + (void)intel_sub_group_shuffle_up(uc8, uc8, 1u); +} + +// CHECK-LABEL: define dso_local spir_func void @test_collectives( +// CHECK-SAME: i8 noundef signext [[C:%.*]], i8 noundef zeroext [[UC:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[UC_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: store i8 [[C]], ptr [[C_ADDR]], align 1 +// CHECK-NEXT: store i8 [[UC]], ptr [[UC_ADDR]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[C_ADDR]], align 1 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func signext i8 @_Z26intel_sub_group_reduce_addc(i8 noundef signext [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[UC_ADDR]], align 1 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func zeroext i8 @_Z26intel_sub_group_reduce_minh(i8 noundef zeroext [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[C_ADDR]], align 1 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func signext i8 @_Z26intel_sub_group_reduce_maxc(i8 noundef signext [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[C_ADDR]], align 1 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func signext i8 @_Z34intel_sub_group_scan_exclusive_addc(i8 noundef signext [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[UC_ADDR]], align 1 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func zeroext i8 @_Z34intel_sub_group_scan_inclusive_maxh(i8 noundef zeroext [[TMP4]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_collectives(char c, uchar uc) { + (void)intel_sub_group_reduce_add(c); + (void)intel_sub_group_reduce_min(uc); + (void)intel_sub_group_reduce_max(c); + (void)intel_sub_group_scan_exclusive_add(c); + (void)intel_sub_group_scan_inclusive_max(uc); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_uc_global( +// CHECK-SAME: ptr addrspace(1) noundef [[IN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func zeroext i8 @_Z29intel_sub_group_block_read_ucPU3AS1Kh(ptr addrspace(1) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i8> @_Z30intel_sub_group_block_read_uc2PU3AS1Kh(ptr addrspace(1) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i8> @_Z30intel_sub_group_block_read_uc4PU3AS1Kh(ptr addrspace(1) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i8> @_Z30intel_sub_group_block_read_uc8PU3AS1Kh(ptr addrspace(1) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc16PU3AS1Kh(ptr addrspace(1) noundef [[TMP4]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_uc_global(const __global uchar *in) { + (void)intel_sub_group_block_read_uc(in); + (void)intel_sub_group_block_read_uc2(in); + (void)intel_sub_group_block_read_uc4(in); + (void)intel_sub_group_block_read_uc8(in); + (void)intel_sub_group_block_read_uc16(in); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_uc_image( +// CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[ROIMG:%.*]], target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG:%.*]], <2 x i32> noundef [[COORD:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ROIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), align 4 +// CHECK-NEXT: [[RWIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), align 4 +// CHECK-NEXT: [[COORD_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[ROIMG]], ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG]], ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[COORD]], ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func zeroext i8 @_Z29intel_sub_group_block_read_uc14ocl_image2d_roDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[TMP0]], <2 x i32> noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <4 x i8> @_Z30intel_sub_group_block_read_uc414ocl_image2d_rwDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP2]], <2 x i32> noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_uc_image(read_only image2d_t roimg, + read_write image2d_t rwimg, int2 coord) { + (void)intel_sub_group_block_read_uc(roimg, coord); + (void)intel_sub_group_block_read_uc4(rwimg, coord); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_write_uc_global( +// CHECK-SAME: ptr addrspace(1) noundef [[OUT:%.*]], i8 noundef zeroext [[VALUE:%.*]], <2 x i8> noundef [[VALUE2:%.*]], <4 x i8> noundef [[VALUE4:%.*]], <8 x i8> noundef [[VALUE8:%.*]], <16 x i8> noundef [[VALUE16:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i8>, align 2 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i8>, align 4 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i8>, align 8 +// CHECK-NEXT: [[VALUE16_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store i8 [[VALUE]], ptr [[VALUE_ADDR]], align 1 +// CHECK-NEXT: store <2 x i8> [[VALUE2]], ptr [[VALUE2_ADDR]], align 2 +// CHECK-NEXT: store <4 x i8> [[VALUE4]], ptr [[VALUE4_ADDR]], align 4 +// CHECK-NEXT: store <8 x i8> [[VALUE8]], ptr [[VALUE8_ADDR]], align 8 +// CHECK-NEXT: store <16 x i8> [[VALUE16]], ptr [[VALUE16_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[VALUE_ADDR]], align 1 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_ucPU3AS1hh(ptr addrspace(1) noundef [[TMP0]], i8 noundef zeroext [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr [[VALUE2_ADDR]], align 2 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_uc2PU3AS1hDv2_h(ptr addrspace(1) noundef [[TMP2]], <2 x i8> noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[VALUE4_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_uc4PU3AS1hDv4_h(ptr addrspace(1) noundef [[TMP4]], <4 x i8> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, ptr [[VALUE8_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_uc8PU3AS1hDv8_h(ptr addrspace(1) noundef [[TMP6]], <8 x i8> noundef [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr [[VALUE16_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z32intel_sub_group_block_write_uc16PU3AS1hDv16_h(ptr addrspace(1) noundef [[TMP8]], <16 x i8> noundef [[TMP9]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_write_uc_global(__global uchar *out, uchar value, + uchar2 value2, uchar4 value4, uchar8 value8, + uchar16 value16) { + intel_sub_group_block_write_uc(out, value); + intel_sub_group_block_write_uc2(out, value2); + intel_sub_group_block_write_uc4(out, value4); + intel_sub_group_block_write_uc8(out, value8); + intel_sub_group_block_write_uc16(out, value16); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_write_uc_image( +// CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[WOIMG:%.*]], target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG:%.*]], <2 x i32> noundef [[COORD:%.*]], i8 noundef zeroext [[VALUE:%.*]], <16 x i8> noundef [[VALUE16:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WOIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), align 4 +// CHECK-NEXT: [[RWIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), align 4 +// CHECK-NEXT: [[COORD_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[VALUE16_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[WOIMG]], ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG]], ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[COORD]], ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: store i8 [[VALUE]], ptr [[VALUE_ADDR]], align 1 +// CHECK-NEXT: store <16 x i8> [[VALUE16]], ptr [[VALUE16_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[VALUE_ADDR]], align 1 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_uc14ocl_image2d_woDv2_ih(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[TMP0]], <2 x i32> noundef [[TMP1]], i8 noundef zeroext [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VALUE16_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z32intel_sub_group_block_write_uc1614ocl_image2d_rwDv2_iDv16_h(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP3]], <2 x i32> noundef [[TMP4]], <16 x i8> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_write_uc_image(write_only image2d_t woimg, + read_write image2d_t rwimg, int2 coord, + uchar value, uchar16 value16) { + intel_sub_group_block_write_uc(woimg, coord, value); + intel_sub_group_block_write_uc16(rwimg, coord, value16); +} diff --git a/clang/test/CodeGenOpenCL/intel-subgroups-long-builtins.cl b/clang/test/CodeGenOpenCL/intel-subgroups-long-builtins.cl new file mode 100644 index 0000000000000..5796234803507 --- /dev/null +++ b/clang/test/CodeGenOpenCL/intel-subgroups-long-builtins.cl @@ -0,0 +1,143 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -fdeclare-opencl-builtins -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s + +// CHECK-LABEL: define dso_local spir_func void @test_shuffle_long( +// CHECK-SAME: i64 noundef [[L:%.*]], i64 noundef [[UL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[UL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 +// CHECK-NEXT: store i64 [[UL]], ptr [[UL_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[L_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i64 @_Z23intel_sub_group_shufflelj(i64 noundef [[TMP0]], i32 noundef 1) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[UL_ADDR]], align 8 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func i64 @_Z27intel_sub_group_shuffle_xormj(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[L_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[L_ADDR]], align 8 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func i64 @_Z28intel_sub_group_shuffle_downllj(i64 noundef [[TMP2]], i64 noundef [[TMP3]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[UL_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[UL_ADDR]], align 8 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func i64 @_Z26intel_sub_group_shuffle_upmmj(i64 noundef [[TMP4]], i64 noundef [[TMP5]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_shuffle_long(long l, ulong ul) { + (void)intel_sub_group_shuffle(l, 1u); + (void)intel_sub_group_shuffle_xor(ul, 1u); + (void)intel_sub_group_shuffle_down(l, l, 1u); + (void)intel_sub_group_shuffle_up(ul, ul, 1u); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_ul_global( +// CHECK-SAME: ptr addrspace(1) noundef [[IN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i64 @_Z29intel_sub_group_block_read_ulPU3AS1Km(ptr addrspace(1) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addrspace(1) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i64> @_Z30intel_sub_group_block_read_ul4PU3AS1Km(ptr addrspace(1) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i64> @_Z30intel_sub_group_block_read_ul8PU3AS1Km(ptr addrspace(1) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_ul_global(const __global ulong *in) { + (void)intel_sub_group_block_read_ul(in); + (void)intel_sub_group_block_read_ul2(in); + (void)intel_sub_group_block_read_ul4(in); + (void)intel_sub_group_block_read_ul8(in); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_ul_image( +// CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[ROIMG:%.*]], target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG:%.*]], <2 x i32> noundef [[COORD:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ROIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), align 4 +// CHECK-NEXT: [[RWIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), align 4 +// CHECK-NEXT: [[COORD_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[ROIMG]], ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG]], ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[COORD]], ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func i64 @_Z29intel_sub_group_block_read_ul14ocl_image2d_roDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[TMP0]], <2 x i32> noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <4 x i64> @_Z30intel_sub_group_block_read_ul414ocl_image2d_rwDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP2]], <2 x i32> noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <16 x i64> @_Z31intel_sub_group_block_read_ul1614ocl_image2d_rwDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP4]], <2 x i32> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_ul_image(read_only image2d_t roimg, + read_write image2d_t rwimg, int2 coord) { + (void)intel_sub_group_block_read_ul(roimg, coord); + (void)intel_sub_group_block_read_ul4(rwimg, coord); + (void)intel_sub_group_block_read_ul16(rwimg, coord); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_write_ul_global( +// CHECK-SAME: ptr addrspace(1) noundef [[OUT:%.*]], i64 noundef [[VALUE:%.*]], <2 x i64> noundef [[VALUE2:%.*]], <4 x i64> noundef [[VALUE4:%.*]], <8 x i64> noundef [[VALUE8:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i64>, align 32 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store i64 [[VALUE]], ptr [[VALUE_ADDR]], align 8 +// CHECK-NEXT: store <2 x i64> [[VALUE2]], ptr [[VALUE2_ADDR]], align 16 +// CHECK-NEXT: store <4 x i64> [[VALUE4]], ptr [[VALUE4_ADDR]], align 32 +// CHECK-NEXT: store <8 x i64> [[VALUE8]], ptr [[VALUE8_ADDR]], align 64 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VALUE_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_ulPU3AS1mm(ptr addrspace(1) noundef [[TMP0]], i64 noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[VALUE2_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1) noundef [[TMP2]], <2 x i64> noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr [[VALUE4_ADDR]], align 32 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul4PU3AS1mDv4_m(ptr addrspace(1) noundef [[TMP4]], <4 x i64> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i64>, ptr [[VALUE8_ADDR]], align 64 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul8PU3AS1mDv8_m(ptr addrspace(1) noundef [[TMP6]], <8 x i64> noundef [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_write_ul_global(__global ulong *out, ulong value, + ulong2 value2, ulong4 value4, + ulong8 value8) { + intel_sub_group_block_write_ul(out, value); + intel_sub_group_block_write_ul2(out, value2); + intel_sub_group_block_write_ul4(out, value4); + intel_sub_group_block_write_ul8(out, value8); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_write_ul_image( +// CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[WOIMG:%.*]], target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG:%.*]], <2 x i32> noundef [[COORD:%.*]], i64 noundef [[VALUE:%.*]], <16 x i64> noundef [[VALUE16:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WOIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), align 4 +// CHECK-NEXT: [[RWIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), align 4 +// CHECK-NEXT: [[COORD_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[VALUE16_ADDR:%.*]] = alloca <16 x i64>, align 128 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[WOIMG]], ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG]], ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[COORD]], ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: store i64 [[VALUE]], ptr [[VALUE_ADDR]], align 8 +// CHECK-NEXT: store <16 x i64> [[VALUE16]], ptr [[VALUE16_ADDR]], align 128 +// CHECK-NEXT: [[TMP0:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VALUE_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_ul14ocl_image2d_woDv2_im(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[TMP0]], <2 x i32> noundef [[TMP1]], i64 noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i64>, ptr [[VALUE16_ADDR]], align 128 +// CHECK-NEXT: call spir_func void @_Z32intel_sub_group_block_write_ul1614ocl_image2d_rwDv2_iDv16_m(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP3]], <2 x i32> noundef [[TMP4]], <16 x i64> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_write_ul_image(write_only image2d_t woimg, + read_write image2d_t rwimg, int2 coord, + ulong value, ulong16 value16) { + intel_sub_group_block_write_ul(woimg, coord, value); + intel_sub_group_block_write_ul16(rwimg, coord, value16); +} diff --git a/clang/test/CodeGenOpenCL/intel-subgroups-short-builtins.cl b/clang/test/CodeGenOpenCL/intel-subgroups-short-builtins.cl new file mode 100644 index 0000000000000..5217faf066e12 --- /dev/null +++ b/clang/test/CodeGenOpenCL/intel-subgroups-short-builtins.cl @@ -0,0 +1,182 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -fdeclare-opencl-builtins -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s + +// CHECK-LABEL: define dso_local spir_func void @test_broadcast_shuffle( +// CHECK-SAME: <3 x i16> noundef [[S3:%.*]], <8 x i16> noundef [[US8:%.*]], <16 x i16> noundef [[S16:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[S3_ADDR:%.*]] = alloca <3 x i16>, align 8 +// CHECK-NEXT: [[US8_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[S16_ADDR:%.*]] = alloca <16 x i16>, align 32 +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[S3]], <3 x i16> undef, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr [[S3_ADDR]], align 8 +// CHECK-NEXT: store <8 x i16> [[US8]], ptr [[US8_ADDR]], align 16 +// CHECK-NEXT: store <16 x i16> [[S16]], ptr [[S16_ADDR]], align 32 +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i16>, ptr [[S3_ADDR]], align 8 +// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i16> [[LOADVECN]], <4 x i16> poison, <3 x i32> +// CHECK-NEXT: [[CALL:%.*]] = call spir_func <3 x i16> @_Z25intel_sub_group_broadcastDv3_sj(<3 x i16> noundef [[EXTRACTVEC1]], i32 noundef 1) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[US8_ADDR]], align 16 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <8 x i16> @_Z25intel_sub_group_broadcastDv8_tj(<8 x i16> noundef [[TMP0]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr [[S16_ADDR]], align 32 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <16 x i16> @_Z23intel_sub_group_shuffleDv16_sj(<16 x i16> noundef [[TMP1]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[US8_ADDR]], align 16 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func <8 x i16> @_Z27intel_sub_group_shuffle_xorDv8_tj(<8 x i16> noundef [[TMP2]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr [[S16_ADDR]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr [[S16_ADDR]], align 32 +// CHECK-NEXT: [[CALL5:%.*]] = call spir_func <16 x i16> @_Z28intel_sub_group_shuffle_downDv16_sS_j(<16 x i16> noundef [[TMP3]], <16 x i16> noundef [[TMP4]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr [[US8_ADDR]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[US8_ADDR]], align 16 +// CHECK-NEXT: [[CALL6:%.*]] = call spir_func <8 x i16> @_Z26intel_sub_group_shuffle_upDv8_tS_j(<8 x i16> noundef [[TMP5]], <8 x i16> noundef [[TMP6]], i32 noundef 1) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_broadcast_shuffle(short3 s3, ushort8 us8, short16 s16) { + (void)intel_sub_group_broadcast(s3, 1u); + (void)intel_sub_group_broadcast(us8, 1u); + (void)intel_sub_group_shuffle(s16, 1u); + (void)intel_sub_group_shuffle_xor(us8, 1u); + (void)intel_sub_group_shuffle_down(s16, s16, 1u); + (void)intel_sub_group_shuffle_up(us8, us8, 1u); +} + +// CHECK-LABEL: define dso_local spir_func void @test_collectives( +// CHECK-SAME: i16 noundef signext [[S:%.*]], i16 noundef zeroext [[US:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[US_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NEXT: store i16 [[S]], ptr [[S_ADDR]], align 2 +// CHECK-NEXT: store i16 [[US]], ptr [[US_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[S_ADDR]], align 2 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func signext i16 @_Z26intel_sub_group_reduce_adds(i16 noundef signext [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[US_ADDR]], align 2 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func zeroext i16 @_Z26intel_sub_group_reduce_mint(i16 noundef zeroext [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[S_ADDR]], align 2 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func signext i16 @_Z34intel_sub_group_scan_exclusive_adds(i16 noundef signext [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[US_ADDR]], align 2 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func zeroext i16 @_Z34intel_sub_group_scan_inclusive_maxt(i16 noundef zeroext [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_collectives(short s, ushort us) { + (void)intel_sub_group_reduce_add(s); + (void)intel_sub_group_reduce_min(us); + (void)intel_sub_group_scan_exclusive_add(s); + (void)intel_sub_group_scan_inclusive_max(us); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_us_global( +// CHECK-SAME: ptr addrspace(1) noundef [[IN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func zeroext i16 @_Z29intel_sub_group_block_read_usPU3AS1Kt(ptr addrspace(1) noundef [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <2 x i16> @_Z30intel_sub_group_block_read_us2PU3AS1Kt(ptr addrspace(1) noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call spir_func <4 x i16> @_Z30intel_sub_group_block_read_us4PU3AS1Kt(ptr addrspace(1) noundef [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL3:%.*]] = call spir_func <8 x i16> @_Z30intel_sub_group_block_read_us8PU3AS1Kt(ptr addrspace(1) noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CALL4:%.*]] = call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us16PU3AS1Kt(ptr addrspace(1) noundef [[TMP4]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_us_global(const __global ushort *in) { + (void)intel_sub_group_block_read_us(in); + (void)intel_sub_group_block_read_us2(in); + (void)intel_sub_group_block_read_us4(in); + (void)intel_sub_group_block_read_us8(in); + (void)intel_sub_group_block_read_us16(in); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_read_us_image( +// CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[ROIMG:%.*]], target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG:%.*]], <2 x i32> noundef [[COORD:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ROIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), align 4 +// CHECK-NEXT: [[RWIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), align 4 +// CHECK-NEXT: [[COORD_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[ROIMG]], ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG]], ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[COORD]], ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), ptr [[ROIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call spir_func zeroext i16 @_Z29intel_sub_group_block_read_us14ocl_image2d_roDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[TMP0]], <2 x i32> noundef [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[CALL1:%.*]] = call spir_func <4 x i16> @_Z30intel_sub_group_block_read_us414ocl_image2d_rwDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP2]], <2 x i32> noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_read_us_image(read_only image2d_t roimg, + read_write image2d_t rwimg, int2 coord) { + (void)intel_sub_group_block_read_us(roimg, coord); + (void)intel_sub_group_block_read_us4(rwimg, coord); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_write_us_global( +// CHECK-SAME: ptr addrspace(1) noundef [[OUT:%.*]], i16 noundef zeroext [[VALUE:%.*]], <2 x i16> noundef [[VALUE2:%.*]], <4 x i16> noundef [[VALUE4:%.*]], <8 x i16> noundef [[VALUE8:%.*]], <16 x i16> noundef [[VALUE16:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[VALUE2_ADDR:%.*]] = alloca <2 x i16>, align 4 +// CHECK-NEXT: [[VALUE4_ADDR:%.*]] = alloca <4 x i16>, align 8 +// CHECK-NEXT: [[VALUE8_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[VALUE16_ADDR:%.*]] = alloca <16 x i16>, align 32 +// CHECK-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store i16 [[VALUE]], ptr [[VALUE_ADDR]], align 2 +// CHECK-NEXT: store <2 x i16> [[VALUE2]], ptr [[VALUE2_ADDR]], align 4 +// CHECK-NEXT: store <4 x i16> [[VALUE4]], ptr [[VALUE4_ADDR]], align 8 +// CHECK-NEXT: store <8 x i16> [[VALUE8]], ptr [[VALUE8_ADDR]], align 16 +// CHECK-NEXT: store <16 x i16> [[VALUE16]], ptr [[VALUE16_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[VALUE_ADDR]], align 2 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_usPU3AS1tt(ptr addrspace(1) noundef [[TMP0]], i16 noundef zeroext [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr [[VALUE2_ADDR]], align 4 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_us2PU3AS1tDv2_t(ptr addrspace(1) noundef [[TMP2]], <2 x i16> noundef [[TMP3]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[VALUE4_ADDR]], align 8 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_us4PU3AS1tDv4_t(ptr addrspace(1) noundef [[TMP4]], <4 x i16> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr [[VALUE8_ADDR]], align 16 +// CHECK-NEXT: call spir_func void @_Z31intel_sub_group_block_write_us8PU3AS1tDv8_t(ptr addrspace(1) noundef [[TMP6]], <8 x i16> noundef [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load <16 x i16>, ptr [[VALUE16_ADDR]], align 32 +// CHECK-NEXT: call spir_func void @_Z32intel_sub_group_block_write_us16PU3AS1tDv16_t(ptr addrspace(1) noundef [[TMP8]], <16 x i16> noundef [[TMP9]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_write_us_global(__global ushort *out, ushort value, + ushort2 value2, ushort4 value4, + ushort8 value8, ushort16 value16) { + intel_sub_group_block_write_us(out, value); + intel_sub_group_block_write_us2(out, value2); + intel_sub_group_block_write_us4(out, value4); + intel_sub_group_block_write_us8(out, value8); + intel_sub_group_block_write_us16(out, value16); +} + +// CHECK-LABEL: define dso_local spir_func void @test_block_write_us_image( +// CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[WOIMG:%.*]], target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG:%.*]], <2 x i32> noundef [[COORD:%.*]], i16 noundef zeroext [[VALUE:%.*]], <16 x i16> noundef [[VALUE16:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WOIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), align 4 +// CHECK-NEXT: [[RWIMG_ADDR:%.*]] = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), align 4 +// CHECK-NEXT: [[COORD_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[VALUE16_ADDR:%.*]] = alloca <16 x i16>, align 32 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[WOIMG]], ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[RWIMG]], ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[COORD]], ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: store i16 [[VALUE]], ptr [[VALUE_ADDR]], align 2 +// CHECK-NEXT: store <16 x i16> [[VALUE16]], ptr [[VALUE16_ADDR]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), ptr [[WOIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[VALUE_ADDR]], align 2 +// CHECK-NEXT: call spir_func void @_Z30intel_sub_group_block_write_us14ocl_image2d_woDv2_it(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[TMP0]], <2 x i32> noundef [[TMP1]], i16 noundef zeroext [[TMP2]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP3:%.*]] = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), ptr [[RWIMG_ADDR]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COORD_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr [[VALUE16_ADDR]], align 32 +// CHECK-NEXT: call spir_func void @_Z32intel_sub_group_block_write_us1614ocl_image2d_rwDv2_iDv16_t(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[TMP3]], <2 x i32> noundef [[TMP4]], <16 x i16> noundef [[TMP5]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +void test_block_write_us_image(write_only image2d_t woimg, + read_write image2d_t rwimg, int2 coord, + ushort value, ushort16 value16) { + intel_sub_group_block_write_us(woimg, coord, value); + intel_sub_group_block_write_us16(rwimg, coord, value16); +} diff --git a/clang/test/SemaOpenCL/intel-bfloat16-conversions-builtins.cl b/clang/test/SemaOpenCL/intel-bfloat16-conversions-builtins.cl index 42883789905e0..bb4b45bdc6edb 100644 --- a/clang/test/SemaOpenCL/intel-bfloat16-conversions-builtins.cl +++ b/clang/test/SemaOpenCL/intel-bfloat16-conversions-builtins.cl @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 -fdeclare-opencl-builtins -verify -fsyntax-only %s -// expected-no-diagnostics // Keep this test header-free so it exercises OpenCLBuiltins.td instead of // declarations from opencl-c.h. @@ -63,3 +62,25 @@ float8 test_convert_as_bfloat168_float8(ushort8 source) { float16 test_convert_as_bfloat1616_float16(ushort16 source) { return intel_convert_as_bfloat1616_float16(source); } + +struct S { int x; }; + +void test_convert_bfloat16_as_ushort_invalid(float source, struct S s, + float4 f4) { + intel_convert_bfloat16_as_ushort(); // expected-error{{too few arguments to function call, expected 1, have 0}} + // expected-note@-1 0+{{'intel_convert_bfloat16_as_ushort' declared here}} + intel_convert_bfloat16_as_ushort(source, source); // expected-error{{too many arguments to function call, expected 1, have 2}} + // expected-note@-1 0+{{'intel_convert_bfloat16_as_ushort' declared here}} + intel_convert_bfloat16_as_ushort(s); // expected-error{{passing '__private struct S' to parameter of incompatible type 'float'}} + intel_convert_bfloat162_as_ushort2(f4); // expected-error{{passing '__private float4' (vector of 4 'float' values) to parameter of incompatible type 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}} +} + +void test_convert_as_bfloat16_float_invalid(ushort source, struct S s, + ushort4 u4) { + intel_convert_as_bfloat16_float(); // expected-error{{too few arguments to function call, expected 1, have 0}} + // expected-note@-1 0+{{'intel_convert_as_bfloat16_float' declared here}} + intel_convert_as_bfloat16_float(source, source); // expected-error{{too many arguments to function call, expected 1, have 2}} + // expected-note@-1 0+{{'intel_convert_as_bfloat16_float' declared here}} + intel_convert_as_bfloat16_float(s); // expected-error{{passing '__private struct S' to parameter of incompatible type 'unsigned short'}} + intel_convert_as_bfloat162_float2(u4); // expected-error{{passing '__private ushort4' (vector of 4 'ushort' values) to parameter of incompatible type 'unsigned short __attribute__((ext_vector_type(2)))' (vector of 2 'unsigned short' values)}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl b/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl index 5a1244fc76511..2cd929da332cf 100644 --- a/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl +++ b/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl @@ -45,3 +45,45 @@ void test_block_prefetch_ui16_rejected(const __global uint *in) { void test_block_prefetch_ul16_rejected(const __global ulong *in) { intel_sub_group_block_prefetch_ul16(in); // expected-error{{use of undeclared identifier 'intel_sub_group_block_prefetch_ul16'}} } + +void test_block_prefetch_ui_invalid(const __global uint *in, + const __local uint *local_in, + const __global ushort *us_in, uint v) { + intel_sub_group_block_prefetch_ui(); // expected-error{{too few arguments to function call, expected 1, have 0}} + // expected-note@-1 0+{{'intel_sub_group_block_prefetch_ui' declared here}} + intel_sub_group_block_prefetch_ui(in, in); // expected-error{{too many arguments to function call, expected 1, have 2}} + // expected-note@-1 0+{{'intel_sub_group_block_prefetch_ui' declared here}} + intel_sub_group_block_prefetch_ui(v); // expected-error{{incompatible integer to pointer conversion passing '__private uint'}} + intel_sub_group_block_prefetch_ui(local_in); // expected-error{{changes address space of pointer}} + intel_sub_group_block_prefetch_ui(us_in); // expected-error{{incompatible pointer types passing 'const __global ushort *__private'}} +} + +void test_block_prefetch_us_invalid(const __global ushort *in, + const __global uint *u_in, ushort v) { + intel_sub_group_block_prefetch_us(); // expected-error{{too few arguments to function call, expected 1, have 0}} + // expected-note@-1 0+{{'intel_sub_group_block_prefetch_us' declared here}} + intel_sub_group_block_prefetch_us(in, in); // expected-error{{too many arguments to function call, expected 1, have 2}} + // expected-note@-1 0+{{'intel_sub_group_block_prefetch_us' declared here}} + intel_sub_group_block_prefetch_us(v); // expected-error{{incompatible integer to pointer conversion passing '__private ushort'}} + intel_sub_group_block_prefetch_us(u_in); // expected-error{{incompatible pointer types passing 'const __global uint *__private'}} +} + +void test_block_prefetch_uc_invalid(const __global uchar *in, + const __global uint *u_in, uchar v) { + intel_sub_group_block_prefetch_uc(); // expected-error{{too few arguments to function call, expected 1, have 0}} + // expected-note@-1 0+{{'intel_sub_group_block_prefetch_uc' declared here}} + intel_sub_group_block_prefetch_uc(in, in); // expected-error{{too many arguments to function call, expected 1, have 2}} + // expected-note@-1 0+{{'intel_sub_group_block_prefetch_uc' declared here}} + intel_sub_group_block_prefetch_uc(v); // expected-error{{incompatible integer to pointer conversion passing '__private uchar'}} + intel_sub_group_block_prefetch_uc(u_in); // expected-error{{incompatible pointer types passing 'const __global uint *__private'}} +} + +void test_block_prefetch_ul_invalid(const __global ulong *in, + const __global uint *u_in, ulong v) { + intel_sub_group_block_prefetch_ul(); // expected-error{{too few arguments to function call, expected 1, have 0}} + // expected-note@-1 0+{{'intel_sub_group_block_prefetch_ul' declared here}} + intel_sub_group_block_prefetch_ul(in, in); // expected-error{{too many arguments to function call, expected 1, have 2}} + // expected-note@-1 0+{{'intel_sub_group_block_prefetch_ul' declared here}} + intel_sub_group_block_prefetch_ul(v); // expected-error{{incompatible integer to pointer conversion passing '__private ulong'}} + intel_sub_group_block_prefetch_ul(u_in); // expected-error{{incompatible pointer types passing 'const __global uint *__private'}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl index ddd35810f85f0..23285d84ce166 100644 --- a/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl +++ b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl @@ -163,3 +163,60 @@ void test_block_read_ul16_local_rejected(const __local ulong *in) { intel_sub_group_block_read_ul16(in); // expected-error{{no matching function for call to 'intel_sub_group_block_read_ul16'}} // expected-note@-1 0+{{candidate function not viable}} } + +void test_block_read_local_invalid(const __local uint *in, + const __local ushort *us_in, uint v) { + intel_sub_group_block_read(); // expected-error{{no matching function for call to 'intel_sub_group_block_read'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read(in, in); // expected-error{{no matching function for call to 'intel_sub_group_block_read'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read(v); // expected-error{{no matching function for call to 'intel_sub_group_block_read'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read(us_in); // expected-error{{incompatible pointer types passing 'const __local ushort *__private'}} +} + +void test_block_write_local_invalid(__local uint *out, __local ushort *us_out, + uint value, ushort us_value) { + intel_sub_group_block_write(); // expected-error{{no matching function for call to 'intel_sub_group_block_write'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write(out); // expected-error{{no matching function for call to 'intel_sub_group_block_write'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write(out, value, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write(value, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write(us_out, value); // expected-error{{incompatible pointer types passing '__local ushort *__private'}} +} + +void test_block_read_uc_local_invalid(const __local uchar *in, + const __local ushort *us_in, uchar v) { + intel_sub_group_block_read_uc(); // expected-error{{no matching function for call to 'intel_sub_group_block_read_uc'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_uc(in, in); // expected-error{{no matching function for call to 'intel_sub_group_block_read_uc'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_uc(v); // expected-error{{no matching function for call to 'intel_sub_group_block_read_uc'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_uc(us_in); // expected-error{{incompatible pointer types passing 'const __local ushort *__private'}} +} + +void test_block_write_us_local_invalid(__local ushort *out, __local uint *ui_out, + ushort value) { + intel_sub_group_block_write_us(); // expected-error{{no matching function for call to 'intel_sub_group_block_write_us'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_us(out); // expected-error{{no matching function for call to 'intel_sub_group_block_write_us'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_us(out, value, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write_us'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_us(ui_out, value); // expected-error{{incompatible pointer types passing '__local uint *__private'}} +} + +void test_block_read_ul_local_invalid(const __local ulong *in, + const __local uint *ui_in, ulong v) { + intel_sub_group_block_read_ul(); // expected-error{{no matching function for call to 'intel_sub_group_block_read_ul'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_ul(in, in); // expected-error{{no matching function for call to 'intel_sub_group_block_read_ul'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_ul(v); // expected-error{{no matching function for call to 'intel_sub_group_block_read_ul'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_ul(ui_in); // expected-error{{incompatible pointer types passing 'const __local uint *__private'}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroups-builtins.cl b/clang/test/SemaOpenCL/intel-subgroups-builtins.cl index be77471e89f80..20f5e2c258119 100644 --- a/clang/test/SemaOpenCL/intel-subgroups-builtins.cl +++ b/clang/test/SemaOpenCL/intel-subgroups-builtins.cl @@ -137,3 +137,42 @@ void test_long_vectors_rejected(long2 value) { (void)intel_sub_group_shuffle(value, 0u); // expected-error{{no matching function for call to 'intel_sub_group_shuffle'}} // expected-note@-1 0+{{candidate function not viable}} } + +void test_shuffle_invalid(uint value, __global uint *ptr) { + intel_sub_group_shuffle(); // expected-error{{no matching function for call to 'intel_sub_group_shuffle'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_shuffle(value); // expected-error{{no matching function for call to 'intel_sub_group_shuffle'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_shuffle(value, value, value); // expected-error{{no matching function for call to 'intel_sub_group_shuffle'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_shuffle(ptr, 1u); // expected-error{{no matching function for call to 'intel_sub_group_shuffle'}} + // expected-note@-1 0+{{candidate function not viable}} +} + +void test_shuffle_down_invalid(uint value) { + intel_sub_group_shuffle_down(); // expected-error{{no matching function for call to 'intel_sub_group_shuffle_down'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_shuffle_down(value, value); // expected-error{{no matching function for call to 'intel_sub_group_shuffle_down'}} + // expected-note@-1 0+{{candidate function not viable}} +} + +void test_block_read_invalid(const __global uint *in, uint v) { + intel_sub_group_block_read(); // expected-error{{no matching function for call to 'intel_sub_group_block_read'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read(in, in); // expected-error{{no matching function for call to 'intel_sub_group_block_read'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read(v); // expected-error{{no matching function for call to 'intel_sub_group_block_read'}} + // expected-note@-1 0+{{candidate function not viable}} +} + +void test_block_write_invalid(__global uint *out, read_only image2d_t roimg, + int2 coord, uint value) { + intel_sub_group_block_write(); // expected-error{{no matching function for call to 'intel_sub_group_block_write'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write(out); // expected-error{{no matching function for call to 'intel_sub_group_block_write'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write(out, value, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write(roimg, coord, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write'}} + // expected-note@-1 0+{{candidate function not viable}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl b/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl index bad1128a10f6a..1594054e2b9e2 100644 --- a/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl +++ b/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl @@ -104,3 +104,35 @@ void test_broadcast_char16_rejected(char16 value) { (void)intel_sub_group_broadcast(value, 0u); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} // expected-note@-1 0+{{candidate function not viable}} } + +void test_broadcast_invalid(uchar value, __global uchar *ptr) { + intel_sub_group_broadcast(); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_broadcast(value); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_broadcast(value, value, value); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_broadcast(ptr, 1u); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} +} + +void test_block_read_uc_invalid(const __global uchar *in, uchar v) { + intel_sub_group_block_read_uc(); // expected-error{{no matching function for call to 'intel_sub_group_block_read_uc'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_uc(in, in); // expected-error{{no matching function for call to 'intel_sub_group_block_read_uc'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_uc(v); // expected-error{{no matching function for call to 'intel_sub_group_block_read_uc'}} + // expected-note@-1 0+{{candidate function not viable}} +} + +void test_block_write_uc_invalid(__global uchar *out, read_only image2d_t roimg, + int2 coord, uchar value) { + intel_sub_group_block_write_uc(); // expected-error{{no matching function for call to 'intel_sub_group_block_write_uc'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_uc(out); // expected-error{{no matching function for call to 'intel_sub_group_block_write_uc'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_uc(out, value, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write_uc'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_uc(roimg, coord, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write_uc'}} + // expected-note@-1 0+{{candidate function not viable}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl b/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl index dce694f8635fe..4c1c6c9b8d522 100644 --- a/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl +++ b/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 -fdeclare-opencl-builtins -verify -fsyntax-only %s -// expected-no-diagnostics // Keep this test header-free so it exercises OpenCLBuiltins.td instead of // declarations from opencl-c.h. @@ -62,3 +61,24 @@ void test_block_write_ul(__global ulong *out, write_only image2d_t image, intel_sub_group_block_write_ul8(rw, coord, value8); intel_sub_group_block_write_ul16(rw, coord, value16); } + +void test_block_read_ul_invalid(const __global ulong *in, ulong v) { + intel_sub_group_block_read_ul(); // expected-error{{no matching function for call to 'intel_sub_group_block_read_ul'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_ul(in, in); // expected-error{{no matching function for call to 'intel_sub_group_block_read_ul'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_ul(v); // expected-error{{no matching function for call to 'intel_sub_group_block_read_ul'}} + // expected-note@-1 0+{{candidate function not viable}} +} + +void test_block_write_ul_invalid(__global ulong *out, read_only image2d_t roimg, + int2 coord, ulong value) { + intel_sub_group_block_write_ul(); // expected-error{{no matching function for call to 'intel_sub_group_block_write_ul'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_ul(out); // expected-error{{no matching function for call to 'intel_sub_group_block_write_ul'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_ul(out, value, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write_ul'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_ul(roimg, coord, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write_ul'}} + // expected-note@-1 0+{{candidate function not viable}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl b/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl index 237974733335d..d4a0d69760332 100644 --- a/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl +++ b/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl @@ -104,3 +104,35 @@ void test_broadcast_short16_rejected(short16 value) { (void)intel_sub_group_broadcast(value, 0u); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} // expected-note@-1 0+{{candidate function not viable}} } + +void test_broadcast_invalid(ushort value, __global ushort *ptr) { + intel_sub_group_broadcast(); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_broadcast(value); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_broadcast(value, value, value); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_broadcast(ptr, 1u); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} +} + +void test_block_read_us_invalid(const __global ushort *in, ushort v) { + intel_sub_group_block_read_us(); // expected-error{{no matching function for call to 'intel_sub_group_block_read_us'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_us(in, in); // expected-error{{no matching function for call to 'intel_sub_group_block_read_us'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_read_us(v); // expected-error{{no matching function for call to 'intel_sub_group_block_read_us'}} + // expected-note@-1 0+{{candidate function not viable}} +} + +void test_block_write_us_invalid(__global ushort *out, read_only image2d_t roimg, + int2 coord, ushort value) { + intel_sub_group_block_write_us(); // expected-error{{no matching function for call to 'intel_sub_group_block_write_us'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_us(out); // expected-error{{no matching function for call to 'intel_sub_group_block_write_us'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_us(out, value, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write_us'}} + // expected-note@-1 0+{{candidate function not viable}} + intel_sub_group_block_write_us(roimg, coord, value); // expected-error{{no matching function for call to 'intel_sub_group_block_write_us'}} + // expected-note@-1 0+{{candidate function not viable}} +}