diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index be3cd8a76503b..93f6026464f91 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2221,6 +2221,15 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>; } +// +// Multi-vector saturating rounding shift right narrow and interleave +// +let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { + def SVSQRSHRN_X2_S8 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "s", MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVUQRSHRN_X2_U8 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Us", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVSQRSHRUN_X2_S8 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "s", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +} + let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq1", [VerifyRuntimeMode], []>; def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq2", [VerifyRuntimeMode], []>; @@ -2300,6 +2309,15 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme-f16f16" in { def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>; } +// +// Multi-vector saturating shift right narrow and interleave +// +let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { + def SVSQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "h2i", "is", MergeNone, "aarch64_sve_sqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVUQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "e2i", "UiUs", MergeNone, "aarch64_sve_uqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVSQSHRUN_X2 : SInst<"svqshrun[_n]_{0}[_{d}_x2]", "e2i", "is", MergeNone, "aarch64_sve_sqshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +} + // // Multi-vector saturating extract narrow // diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c new file mode 100644 index 0000000000000..4523a55b9201a --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c @@ -0,0 +1,141 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// REQUIRES: aarch64-registered-target + +#include + +#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) +#define ATTR __arm_streaming_compatible +#elif defined(__ARM_FEATURE_SME) +#define ATTR __arm_streaming +#else +#define ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: define dso_local @test_svqrshrn_n_s8_s16_x2( +// CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqrshrn.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svqrshrn_n_s8_s16_x211svint16x2_t( +// CPP-CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqrshrn.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint8_t test_svqrshrn_n_s8_s16_x2(svint16x2_t zn) ATTR +{ + return SVE_ACLE_FUNC(svqrshrn,_n,_s8,_s16_x2)(zn, 8); +} + +// CHECK-LABEL: define dso_local @test_svqrshrn_n_u8_u16_x2( +// CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.uqrshrn.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svqrshrn_n_u8_u16_x212svuint16x2_t( +// CPP-CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.uqrshrn.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint8_t test_svqrshrn_n_u8_u16_x2(svuint16x2_t zn) ATTR +{ + return SVE_ACLE_FUNC(svqrshrn,_n,_u8,_u16_x2)(zn, 8); +} + +// CHECK-LABEL: define dso_local @test_svqrshrun_n_u8_s16_x2( +// CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqrshrun.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svqrshrun_n_u8_s16_x211svint16x2_t( +// CPP-CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqrshrun.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint8_t test_svqrshrun_n_u8_s16_x2(svint16x2_t zn) ATTR +{ + return SVE_ACLE_FUNC(svqrshrun,_n,_u8,_s16_x2)(zn, 8); +} diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c new file mode 100644 index 0000000000000..417d8d1bb77cb --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c @@ -0,0 +1,252 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// REQUIRES: aarch64-registered-target + +#include + +#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) +#define ATTR __arm_streaming_compatible +#elif defined(__ARM_FEATURE_SME) +#define ATTR __arm_streaming +#else +#define ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: define dso_local @test_svqshrn_n_s8_s16_x2( +// CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqshrn.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local @_Z24test_svqshrn_n_s8_s16_x211svint16x2_t( +// CPP-CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqshrn.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint8_t test_svqshrn_n_s8_s16_x2(svint16x2_t zn) ATTR +{ + return SVE_ACLE_FUNC(svqshrn,_n,_s8,_s16_x2)(zn, 8); +} + +// CHECK-LABEL: define dso_local @test_svqshrn_n_s16_s32_x2( +// CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqshrn.x2.nxv4i32( [[TMP3]], [[TMP4]], i32 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svqshrn_n_s16_s32_x211svint32x2_t( +// CPP-CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqshrn.x2.nxv4i32( [[TMP3]], [[TMP4]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint16_t test_svqshrn_n_s16_s32_x2(svint32x2_t zn) ATTR +{ + return SVE_ACLE_FUNC(svqshrn,_n,_s16,_s32_x2)(zn, 16); +} + +// CHECK-LABEL: define dso_local @test_svqshrn_n_u8_u16_x2( +// CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.uqshrn.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local @_Z24test_svqshrn_n_u8_u16_x212svuint16x2_t( +// CPP-CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.uqshrn.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint8_t test_svqshrn_n_u8_u16_x2(svuint16x2_t zn) ATTR +{ + return SVE_ACLE_FUNC(svqshrn,_n,_u8,_u16_x2)(zn, 8); +} + +// CHECK-LABEL: define dso_local @test_svqshrn_n_u16_u32_x2( +// CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.uqshrn.x2.nxv4i32( [[TMP3]], [[TMP4]], i32 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svqshrn_n_u16_u32_x212svuint32x2_t( +// CPP-CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.uqshrn.x2.nxv4i32( [[TMP3]], [[TMP4]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint16_t test_svqshrn_n_u16_u32_x2(svuint32x2_t zn) ATTR +{ + return SVE_ACLE_FUNC(svqshrn,_n,_u16,_u32_x2)(zn, 16); +} + +// CHECK-LABEL: define dso_local @test_svqshrun_n_u16_s32_x2( +// CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqshrun.x2.nxv4i32( [[TMP3]], [[TMP4]], i32 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svqshrun_n_u16_s32_x211svint32x2_t( +// CPP-CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqshrun.x2.nxv4i32( [[TMP3]], [[TMP4]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint16_t test_svqshrun_n_u16_s32_x2(svint32x2_t zn) ATTR +{ + return SVE_ACLE_FUNC(svqshrun,_n,_u16,_s32_x2)(zn, 16); +} + +// CHECK-LABEL: define dso_local @test_svqshrun_n_u8_s16_x2( +// CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqshrun.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svqshrun_n_u8_s16_x211svint16x2_t( +// CPP-CHECK-SAME: [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { , } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { , }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { , } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sqshrun.x2.nxv8i16( [[TMP3]], [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint8_t test_svqshrun_n_u8_s16_x2(svint16x2_t zn) ATTR +{ + return SVE_ACLE_FUNC(svqshrun,_n,_u8,_s16_x2)(zn, 8); +} diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c new file mode 100644 index 0000000000000..ce269348809a5 --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c @@ -0,0 +1,88 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify +// expected-no-diagnostics + +// REQUIRES: aarch64-registered-target + +#include + +// Properties: guard="sve,(sve2p3|sme2p3)" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent" + +void test(void) { + svint16x2_t svint16x2_t_val; + svint32x2_t svint32x2_t_val; + svuint16x2_t svuint16x2_t_val; + svuint32x2_t svuint32x2_t_val; + + svqrshrn_n_s8_s16_x2(svint16x2_t_val, 2); + svqrshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + svqrshrn_s8(svint16x2_t_val, 2); + svqrshrn_u8(svuint16x2_t_val, 2); + svqrshrun_n_u8_s16_x2(svint16x2_t_val, 2); + svqrshrun_u8(svint16x2_t_val, 2); + svqshrn_n_s8_s16_x2(svint16x2_t_val, 2); + svqshrn_n_s16_s32_x2(svint32x2_t_val, 2); + svqshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + svqshrn_n_u16_u32_x2(svuint32x2_t_val, 2); + svqshrn_s8(svint16x2_t_val, 2); + svqshrn_s16(svint32x2_t_val, 2); + svqshrn_u8(svuint16x2_t_val, 2); + svqshrn_u16(svuint32x2_t_val, 2); + svqshrun_n_u8_s16_x2(svint16x2_t_val, 2); + svqshrun_n_u16_s32_x2(svint32x2_t_val, 2); + svqshrun_u8(svint16x2_t_val, 2); + svqshrun_u16(svint32x2_t_val, 2); +} + +void test_streaming(void) __arm_streaming{ + svint16x2_t svint16x2_t_val; + svint32x2_t svint32x2_t_val; + svuint16x2_t svuint16x2_t_val; + svuint32x2_t svuint32x2_t_val; + + svqrshrn_n_s8_s16_x2(svint16x2_t_val, 2); + svqrshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + svqrshrn_s8(svint16x2_t_val, 2); + svqrshrn_u8(svuint16x2_t_val, 2); + svqrshrun_n_u8_s16_x2(svint16x2_t_val, 2); + svqrshrun_u8(svint16x2_t_val, 2); + svqshrn_n_s8_s16_x2(svint16x2_t_val, 2); + svqshrn_n_s16_s32_x2(svint32x2_t_val, 2); + svqshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + svqshrn_n_u16_u32_x2(svuint32x2_t_val, 2); + svqshrn_s8(svint16x2_t_val, 2); + svqshrn_s16(svint32x2_t_val, 2); + svqshrn_u8(svuint16x2_t_val, 2); + svqshrn_u16(svuint32x2_t_val, 2); + svqshrun_n_u8_s16_x2(svint16x2_t_val, 2); + svqshrun_n_u16_s32_x2(svint32x2_t_val, 2); + svqshrun_u8(svint16x2_t_val, 2); + svqshrun_u16(svint32x2_t_val, 2); +} + +void test_streaming_compatible(void) __arm_streaming_compatible{ + svint16x2_t svint16x2_t_val; + svint32x2_t svint32x2_t_val; + svuint16x2_t svuint16x2_t_val; + svuint32x2_t svuint32x2_t_val; + + svqrshrn_n_s8_s16_x2(svint16x2_t_val, 2); + svqrshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + svqrshrn_s8(svint16x2_t_val, 2); + svqrshrn_u8(svuint16x2_t_val, 2); + svqrshrun_n_u8_s16_x2(svint16x2_t_val, 2); + svqrshrun_u8(svint16x2_t_val, 2); + svqshrn_n_s8_s16_x2(svint16x2_t_val, 2); + svqshrn_n_s16_s32_x2(svint32x2_t_val, 2); + svqshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + svqshrn_n_u16_u32_x2(svuint32x2_t_val, 2); + svqshrn_s8(svint16x2_t_val, 2); + svqshrn_s16(svint32x2_t_val, 2); + svqshrn_u8(svuint16x2_t_val, 2); + svqshrn_u16(svuint32x2_t_val, 2); + svqshrun_n_u8_s16_x2(svint16x2_t_val, 2); + svqshrun_n_u16_s32_x2(svint32x2_t_val, 2); + svqshrun_u8(svint16x2_t_val, 2); + svqshrun_u16(svint32x2_t_val, 2); +} diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp new file mode 100644 index 0000000000000..0909a46158942 --- /dev/null +++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp @@ -0,0 +1,51 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -verify-ignore-unexpected=error,note -emit-llvm -o - %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -verify=overload -verify-ignore-unexpected=error,note -emit-llvm -o - %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +void test(svint32x2_t s32x2, svint16x2_t s16x2, svint8x2_t s8x2, svuint32x2_t u32x2, svuint16x2_t u16x2, svuint8x2_t u8x2) +{ + // expected-error@+2 {{'svqshrn_n_s8_s16_x2' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + // overload-error@+1 {{'svqshrn_s8' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + SVE_ACLE_FUNC(svqshrn,_n,_s8,_s16_x2)(s16x2, 8); + + // expected-error@+2 {{'svqshrn_n_s16_s32_x2' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + // overload-error@+1 {{'svqshrn_s16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + SVE_ACLE_FUNC(svqshrn,_n,_s16,_s32_x2)(s32x2, 16); + + // expected-error@+2 {{'svqshrn_n_u8_u16_x2' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + // overload-error@+1 {{'svqshrn_u8' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + SVE_ACLE_FUNC(svqshrn,_n,_u8,_u16_x2)(u16x2, 8); + + // expected-error@+2 {{'svqshrn_n_u16_u32_x2' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + // overload-error@+1 {{'svqshrn_u16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + SVE_ACLE_FUNC(svqshrn,_n,_u16,_u32_x2)(u32x2, 16); + + // expected-error@+2 {{'svqshrun_n_u16_s32_x2' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + // overload-error@+1 {{'svqshrun_u16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + SVE_ACLE_FUNC(svqshrun,_n,_u16,_s32_x2)(s32x2, 16); + + // expected-error@+2 {{'svqshrun_n_u8_s16_x2' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + // overload-error@+1 {{'svqshrun_u8' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + SVE_ACLE_FUNC(svqshrun,_n,_u8,_s16_x2)(s16x2, 8); + + // expected-error@+2 {{'svqrshrn_n_s8_s16_x2' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + // overload-error@+1 {{'svqrshrn_s8' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + SVE_ACLE_FUNC(svqrshrn,_n,_s8,_s16_x2)(s16x2, 8); + + // expected-error@+2 {{'svqrshrn_n_u8_u16_x2' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + // overload-error@+1 {{'svqrshrn_u8' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + SVE_ACLE_FUNC(svqrshrn,_n,_u8,_u16_x2)(u16x2, 8); + + // expected-error@+2 {{'svqrshrun_n_u8_s16_x2' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + // overload-error@+1 {{'svqrshrun_u8' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}} + SVE_ACLE_FUNC(svqrshrun,_n,_u8,_s16_x2)(s16x2, 8); +} diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp new file mode 100644 index 0000000000000..e0d21aed716d2 --- /dev/null +++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp @@ -0,0 +1,87 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -fsyntax-only -verify %s + +#include + + + +svint8_t test_svqshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) +{ + svqshrn_n_s8_s16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrn_n_s8_s16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrn_n_s8_s16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqshrn_n_s8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svint16_t test_svqshrn_n_s16_s32_x2(svint32x2_t zn, uint64_t imm) +{ + svqshrn_n_s16_s32_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrn_n_s16_s32_x2(zn, 17); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrn_n_s16_s32_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + + svqshrn_n_s16_s32_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint8_t test_svqshrn_n_u8_u16_x2(svuint16x2_t zn, uint64_t imm) +{ + svqshrn_n_u8_u16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrn_n_u8_u16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrn_n_u8_u16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqshrn_n_u8_u16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint16_t test_svqshrn_n_u16_u32_x2(svuint32x2_t zn, uint64_t imm) +{ + svqshrn_n_u16_u32_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrn_n_u16_u32_x2(zn, 17); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrn_n_u16_u32_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + + svqshrn_n_u16_u32_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint16_t test_svqshrun_n_u16_s32_x2(svint32x2_t zn, uint64_t imm) +{ + svqshrun_n_u16_s32_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrun_n_u16_s32_x2(zn, 17); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrun_n_u16_s32_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + + svqshrun_n_u16_s32_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint8_t test_svqshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) +{ + svqshrun_n_u8_s16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrun_n_u8_s16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrun_n_u8_s16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqshrun_n_u8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +void test_svqrshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) { + svqrshrn_n_s8_s16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrn_n_s8_s16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrn_n_s8_s16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqrshrn_n_s8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint8_t test_svqrshrn_n_u8_u16_x2(svuint16x2_t zn, uint64_t imm) +{ + svqrshrn_n_u8_u16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrn_n_u8_u16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrn_n_u8_u16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqrshrn_n_u8_u16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint8_t test_svqrshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) +{ + svqrshrun_n_u8_s16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrun_n_u8_s16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrun_n_u8_s16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqrshrun_n_u8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 63500beaa6521..9e956a7762dfc 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3781,6 +3781,13 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_uqcvtn_x4 : SVE2_CVT_VG4_SINGLE_Intrinsic; def int_aarch64_sve_sqcvtun_x4 : SVE2_CVT_VG4_SINGLE_Intrinsic; + // + // Multi-vector saturating shift right narrow and interleave + // + def int_aarch64_sve_sqshrn_x2 : SVE2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqshrn_x2 : SVE2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqshrun_x2 : SVE2_VG2_Multi_Imm_Intrinsic; + // // Multi-Single add/sub // @@ -4296,4 +4303,3 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_pmlal_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], [llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; } - diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c5a3bd504adf9..5f56f156cfdb1 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4827,15 +4827,15 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>; // SVE2 saturating shift right narrow by immediate and interleave - defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101>; - defm SQRSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrun", 0b001>; - defm SQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrn", 0b000>; - defm SQSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrun", 0b100>; - defm UQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqrshrn", 0b111>; - defm UQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqshrn", 0b010>; - defm SQSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrun", 0b100, null_frag>; - defm SQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrn", 0b000, null_frag>; - defm UQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqshrn", 0b010, null_frag>; + defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>; + defm SQRSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>; + defm SQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrn", 0b000, int_aarch64_sve_sqshrn_x2>; + defm SQSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrun", 0b100, int_aarch64_sve_sqshrun_x2>; + defm UQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>; + defm UQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqshrn", 0b010, int_aarch64_sve_uqshrn_x2>; + defm SQSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrun", 0b100, int_aarch64_sve_sqshrun_x2>; + defm SQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrn", 0b000, int_aarch64_sve_sqshrn_x2>; + defm UQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqshrn", 0b010, int_aarch64_sve_uqshrn_x2>; defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">; } // End HasSME2p3orSVE2p3 diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 8a3f52090ab4c..031cfdd049e30 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -720,7 +720,7 @@ class SVE_Shift_Add_All_Active_Pat; -class SVE2p1_Sat_Shift_VG2_Pat +class SVE_Sat_Shift_VG2_Pat : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))), (!cast(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; @@ -10118,7 +10118,7 @@ multiclass sve2p1_multi_vec_extract_narrow opc, SDPatte } // SVE2 multi-vec shift narrow -class sve2p1_multi_vec_shift_narrow opc, bits<2> tsz> : I<(outs ZdRC:$Zd), (ins ZSrcOp:$Zn, immtype:$imm), mnemonic, "\t$Zd, $Zn, $imm", @@ -10142,17 +10142,19 @@ class sve2p1_multi_vec_shift_narrow opc, SDPatternOperator intrinsic> { - def NAME : sve2p1_multi_vec_shift_narrow { + def NAME : sve_multi_vec_shift_narrow { let Inst{19} = imm{3}; // imm4 } - def : SVE2p1_Sat_Shift_VG2_Pat; + def : SVE_Sat_Shift_VG2_Pat; } -multiclass sve_multi_vec_round_shift_narrow opc> { - def NAME : sve2p1_multi_vec_shift_narrow { +multiclass sve_multi_vec_round_shift_narrow opc, SDPatternOperator intrinsic> { + def NAME : sve_multi_vec_shift_narrow { let Inst{19} = 0b1; // always 1 for imm3 version } + + def : SVE_Sat_Shift_VG2_Pat; } // SME2 multi-vec contiguous load (scalar plus scalar, two registers) diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics/sve2p3-intrinsics-qrshr.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics/sve2p3-intrinsics-qrshr.ll new file mode 100644 index 0000000000000..dd922bce8b2da --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics/sve2p3-intrinsics-qrshr.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 -enable-subreg-liveness -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s + +define @test_svqrshrn_n_s8_s16_x2( %zn, %zm) { +; CHECK-LABEL: test_svqrshrn_n_s8_s16_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrshrn z0.b, { z0.h, z1.h }, #8 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrn.x2.nxv8i16( %zn, %zm, i32 8) + ret %res +} + +define @test_svqrshrn_n_u8_u16_x2( %zn, %zm) { +; CHECK-LABEL: test_svqrshrn_n_u8_u16_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: uqrshrn z0.b, { z0.h, z1.h }, #8 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshrn.x2.nxv8i16( %zn, %zm, i32 8) + ret %res +} + +define @test_svqrshrun_n_u8_s16_x2( %zn, %zm) { +; CHECK-LABEL: test_svqrshrun_n_u8_s16_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrshrun z0.b, { z0.h, z1.h }, #8 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrun.x2.nxv8i16( %zn, %zm, i32 8) + ret %res +} diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics/sve2p3-intrinsics-qshr.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics/sve2p3-intrinsics-qshr.ll new file mode 100644 index 0000000000000..1185580631866 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics/sve2p3-intrinsics-qshr.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 -enable-subreg-liveness -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s + +define @test_svqshrn_n_s8_s16_x2( %zn, %zm) { +; CHECK-LABEL: test_svqshrn_n_s8_s16_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sqshrn z0.b, { z0.h, z1.h }, #8 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqshrn.x2.nxv8i16( %zn, %zm, i32 8) + ret %res +} + +define @test_svqshrn_n_s16_s32_x2( %zn, %zm) { +; CHECK-LABEL: test_svqshrn_n_s16_s32_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sqshrn z0.h, { z0.s, z1.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqshrn.x2.nxv4i32( %zn, %zm, i32 16) + ret %res +} + +define @test_svqshrn_n_u8_u16_x2( %zn, %zm) { +; CHECK-LABEL: test_svqshrn_n_u8_u16_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: uqshrn z0.b, { z0.h, z1.h }, #8 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqshrn.x2.nxv8i16( %zn, %zm, i32 8) + ret %res +} + +define @test_svqshrn_n_u16_u32_x2( %zn, %zm) { +; CHECK-LABEL: test_svqshrn_n_u16_u32_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: uqshrn z0.h, { z0.s, z1.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqshrn.x2.nxv4i32( %zn, %zm, i32 16) + ret %res +} + +define @test_svqshrun_n_u16_s32_x2( %zn, %zm) { +; CHECK-LABEL: test_svqshrun_n_u16_s32_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sqshrun z0.h, { z0.s, z1.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqshrun.x2.nxv4i32( %zn, %zm, i32 16) + ret %res +} + +define @test_svqshrun_n_u8_s16_x2( %zn, %zm) { +; CHECK-LABEL: test_svqshrun_n_u8_s16_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sqshrun z0.b, { z0.h, z1.h }, #8 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqshrun.x2.nxv8i16( %zn, %zm, i32 8) + ret %res +}