diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 85656c00c5b3e..62dc1c9e5b3a5 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2258,3 +2258,14 @@ let TargetGuard = "sme2" in { def SVQCVTN_U16_U64_X4 : SInst<"svqcvtn_u16[_{d}_x4]", "b4.d", "Ul", MergeNone, "aarch64_sve_uqcvtn_x4", [IsStreaming], []>; def SVQCVTN_U16_S64_X4 : SInst<"svqcvtn_u16[_{d}_x4]", "b4.d", "l", MergeNone, "aarch64_sve_sqcvtun_x4", [IsStreaming], []>; } + +// +// Multi-vector unpack +// + +let TargetGuard = "sme2" in { + def SVSUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "sil", MergeNone, "aarch64_sve_sunpk_x2", [IsStreaming], []>; + def SVUUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x2", [IsStreaming], []>; + def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil", MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>; + def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c new file mode 100644 index 0000000000000..2f427689323b4 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c @@ -0,0 +1,150 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: @test_svunpk_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x2u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_s16,_s8_x2)(zn); +} + +// CHECK-LABEL: @test_svunpk_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x2u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_u16,_u8_x2)(zn); +} + +// CHECK-LABEL: @test_svunpk_s32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x2u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_s32,_s16_x2)(zn); +} + +// CHECK-LABEL: @test_svunpk_u32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x2u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_u32,_u16_x2)(zn); +} + +// CHECK-LABEL: @test_svunpk_s64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x2u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_s64,_s32_x2)(zn); +} + +// CHECK-LABEL: @test_svunpk_u64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x2u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint64x2_t test_svunpk_u64_x2(svuint32_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_u64,_u32_x2)(zn); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c new file mode 100644 index 0000000000000..cdcc62d5405e6 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c @@ -0,0 +1,222 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: @test_svunpk_s16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x410svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_s16,_s8_x4)(zn); +} + +// CHECK-LABEL: @test_svunpk_u16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x411svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_u16,_u8_x4)(zn); +} + +// CHECK-LABEL: @test_svunpk_s32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x411svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_s32,_s16_x4)(zn); +} + +// CHECK-LABEL: @test_svunpk_u32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x412svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_u32,_u16_x4)(zn); +} + +// CHECK-LABEL: @test_svunpk_s64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x411svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_s64,_s32_x4)(zn); +} + +// CHECK-LABEL: @test_svunpk_u64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x412svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svuint64x4_t test_svunpk_u64_x4(svuint32x2_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svunpk_u64,_u32_x4)(zn); +}