diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index e81b75dfe31130..fdc7878c617f0f 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -651,6 +651,43 @@ multiclass vshll_imm { defm vshllbq : vshll_imm<0>; defm vshlltq : vshll_imm<1>; +multiclass DyadicImmShift { + foreach intparams = [!if(!eq(!cast(outtype), !cast(Vector)), + [Vector], [outtype, Vector])] in { + def q_n: Intrinsic< + outtype, (args outtype:$a, Vector:$b, imm:$sh), + !con((IRInt $a, $b, $sh), extraargs)>; + + def q_m_n: Intrinsic< + outtype, (args outtype:$a, Vector:$b, imm:$sh, Predicate:$pred), + !con((IRInt + $a, $b, $sh), extraargs, (? $pred))>; + } +} + +multiclass VSHRN { + defm b: DyadicImmShift; + defm t: DyadicImmShift; +} + +let params = [s16, s32, u16, u32], pnt = PNT_NType in { + foreach U = [(unsignedflag Scalar)] in { + defm vshrn : VSHRN; + defm vqshrn : VSHRN; + defm vrshrn : VSHRN; + defm vqrshrn : VSHRN; + } +} +let params = [s16, s32], pnt = PNT_NType in { + defm vqshrun : VSHRN; + defm vqrshrun : VSHRN; +} +let params = T.Int, pnt = PNT_NType in { + defm vsli : DyadicImmShift; + defm vsri : DyadicImmShift; +} + // Base class for the scalar shift intrinsics. class ScalarShift: Intrinsic { diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 6fba88df34bf56..2bd769c220db6d 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -190,7 +190,10 @@ def CTO_Pred: ComplexTypeOp; class CTO_Tuple: ComplexTypeOp { int n = n_; } class CTO_Pointer: ComplexTypeOp { bit const = const_; } def CTO_CopyKind: ComplexTypeOp; -def CTO_DoubleSize: ComplexTypeOp; +class CTO_ScaleSize: ComplexTypeOp { + int num = num_; + int denom = denom_; +} // ----------------------------------------------------------------------------- // Instances of Type intended to be used directly in the specification of an @@ -268,7 +271,8 @@ class CopyKind: ComplexType<(CTO_CopyKind s, k)>; // DoubleSize expects k to be a scalar type. It returns a scalar type // whose kind (signed, unsigned or float) matches that of k, and whose size // is double that of k, if possible. -class DoubleSize: ComplexType<(CTO_DoubleSize k)>; +class DoubleSize : ComplexType<(CTO_ScaleSize<2, 1> k)>; +class HalfSize : ComplexType<(CTO_ScaleSize<1, 2> k)>; // Unsigned expects t to be a scalar type, and expands to the unsigned // integer scalar of the same size. So it returns u16 if you give it s16 or @@ -280,9 +284,12 @@ class Unsigned: ComplexType<(CTO_CopyKind t, u32)>; def UScalar: Unsigned; def UVector: VecOf; -// DblVector expands to a vector of scalars of size twice the size of -// Scalar. +// DblVector expands to a vector of scalars of size twice the size of Scalar. +// HalfVector, similarly, expands to a vector of half-sized scalars. And +// UHalfVector is a vector of half-sized _unsigned integers_. def DblVector: VecOf>; +def HalfVector: VecOf>; +def UHalfVector: VecOf>>; // Expands to the 32-bit integer of the same signedness as Scalar. def Scalar32: CopyKind; @@ -305,7 +312,10 @@ class IB_ConstRange : ImmediateBounds { } def IB_UEltValue : ImmediateBounds; def IB_LaneIndex : ImmediateBounds; -class IB_EltBit : ImmediateBounds { int base = base_; } +class IB_EltBit : ImmediateBounds { + int base = base_; + Type type = type_; +} // ----------------------------------------------------------------------------- // End-user definitions for immediate arguments. @@ -327,8 +337,12 @@ def imm_simd_vmvn : Immediate { // // imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1 // inclusive. +// +// imm_1toHalfN is like imm_1toN, but applied to a half-width type. +// (So if Scalar is s16, for example, it'll give you the range 1 to 8.) def imm_1toN : Immediate>; def imm_0toNm1 : Immediate>; +def imm_1toHalfN : Immediate>>; // imm_lane has to be the index of a vector lane in the main vector type, i.e // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c new file mode 100644 index 00000000000000..3d4f77b99d744c --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c @@ -0,0 +1,1565 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vshrnbq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 3, i32 0, i32 0, i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshrnbq_n_s16(int8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vshrnbq(a, b, 3); +#else /* POLYMORPHIC */ + return vshrnbq_n_s16(a, b, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrnbq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 9, i32 0, i32 0, i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshrnbq_n_s32(int16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vshrnbq(a, b, 9); +#else /* POLYMORPHIC */ + return vshrnbq_n_s32(a, b, 9); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrnbq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 0, i32 0, i32 1, i32 1, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vshrnbq_n_u16(uint8x16_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vshrnbq(a, b, 1); +#else /* POLYMORPHIC */ + return vshrnbq_n_u16(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrnbq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 3, i32 0, i32 0, i32 1, i32 1, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshrnbq_n_u32(uint16x8_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vshrnbq(a, b, 3); +#else /* POLYMORPHIC */ + return vshrnbq_n_u32(a, b, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrntq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 0, i32 0, i32 0, i32 0, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshrntq_n_s16(int8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vshrntq(a, b, 1); +#else /* POLYMORPHIC */ + return vshrntq_n_s16(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrntq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 10, i32 0, i32 0, i32 0, i32 0, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshrntq_n_s32(int16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vshrntq(a, b, 10); +#else /* POLYMORPHIC */ + return vshrntq_n_s32(a, b, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrntq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 6, i32 0, i32 0, i32 1, i32 1, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vshrntq_n_u16(uint8x16_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vshrntq(a, b, 6); +#else /* POLYMORPHIC */ + return vshrntq_n_u16(a, b, 6); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrntq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 10, i32 0, i32 0, i32 1, i32 1, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshrntq_n_u32(uint16x8_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vshrntq(a, b, 10); +#else /* POLYMORPHIC */ + return vshrntq_n_u32(a, b, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrnbq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshrnbq_m_n_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrnbq_m(a, b, 4, p); +#else /* POLYMORPHIC */ + return vshrnbq_m_n_s16(a, b, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrnbq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshrnbq_m_n_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrnbq_m(a, b, 13, p); +#else /* POLYMORPHIC */ + return vshrnbq_m_n_s32(a, b, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrnbq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 0, i32 0, i32 1, i32 1, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshrnbq_m_n_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrnbq_m(a, b, 7, p); +#else /* POLYMORPHIC */ + return vshrnbq_m_n_u16(a, b, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrnbq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 15, i32 0, i32 0, i32 1, i32 1, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshrnbq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrnbq_m(a, b, 15, p); +#else /* POLYMORPHIC */ + return vshrnbq_m_n_u32(a, b, 15, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrntq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 6, i32 0, i32 0, i32 0, i32 0, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshrntq_m_n_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrntq_m(a, b, 6, p); +#else /* POLYMORPHIC */ + return vshrntq_m_n_s16(a, b, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrntq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 13, i32 0, i32 0, i32 0, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshrntq_m_n_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrntq_m(a, b, 13, p); +#else /* POLYMORPHIC */ + return vshrntq_m_n_s32(a, b, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrntq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 0, i32 0, i32 1, i32 1, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshrntq_m_n_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrntq_m(a, b, 1, p); +#else /* POLYMORPHIC */ + return vshrntq_m_n_u16(a, b, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrntq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 10, i32 0, i32 0, i32 1, i32 1, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshrntq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrntq_m(a, b, 10, p); +#else /* POLYMORPHIC */ + return vshrntq_m_n_u32(a, b, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrnbq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 5, i32 0, i32 1, i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vrshrnbq_n_s16(int8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vrshrnbq(a, b, 5); +#else /* POLYMORPHIC */ + return vrshrnbq_n_s16(a, b, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrnbq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 10, i32 0, i32 1, i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vrshrnbq_n_s32(int16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vrshrnbq(a, b, 10); +#else /* POLYMORPHIC */ + return vrshrnbq_n_s32(a, b, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrnbq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 2, i32 0, i32 1, i32 1, i32 1, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vrshrnbq_n_u16(uint8x16_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vrshrnbq(a, b, 2); +#else /* POLYMORPHIC */ + return vrshrnbq_n_u16(a, b, 2); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrnbq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 12, i32 0, i32 1, i32 1, i32 1, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vrshrnbq_n_u32(uint16x8_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vrshrnbq(a, b, 12); +#else /* POLYMORPHIC */ + return vrshrnbq_n_u32(a, b, 12); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrntq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 4, i32 0, i32 1, i32 0, i32 0, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vrshrntq_n_s16(int8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vrshrntq(a, b, 4); +#else /* POLYMORPHIC */ + return vrshrntq_n_s16(a, b, 4); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrntq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 11, i32 0, i32 1, i32 0, i32 0, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vrshrntq_n_s32(int16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vrshrntq(a, b, 11); +#else /* POLYMORPHIC */ + return vrshrntq_n_s32(a, b, 11); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrntq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 0, i32 1, i32 1, i32 1, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vrshrntq_n_u16(uint8x16_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vrshrntq(a, b, 1); +#else /* POLYMORPHIC */ + return vrshrntq_n_u16(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrntq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 6, i32 0, i32 1, i32 1, i32 1, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vrshrntq_n_u32(uint16x8_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vrshrntq(a, b, 6); +#else /* POLYMORPHIC */ + return vrshrntq_n_u32(a, b, 6); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrnbq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrshrnbq_m_n_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrnbq_m(a, b, 1, p); +#else /* POLYMORPHIC */ + return vrshrnbq_m_n_s16(a, b, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrnbq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 14, i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrshrnbq_m_n_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrnbq_m(a, b, 14, p); +#else /* POLYMORPHIC */ + return vrshrnbq_m_n_s32(a, b, 14, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrnbq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 2, i32 0, i32 1, i32 1, i32 1, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrshrnbq_m_n_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrnbq_m(a, b, 2, p); +#else /* POLYMORPHIC */ + return vrshrnbq_m_n_u16(a, b, 2, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrnbq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 12, i32 0, i32 1, i32 1, i32 1, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrshrnbq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrnbq_m(a, b, 12, p); +#else /* POLYMORPHIC */ + return vrshrnbq_m_n_u32(a, b, 12, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrntq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 4, i32 0, i32 1, i32 0, i32 0, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrshrntq_m_n_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrntq_m(a, b, 4, p); +#else /* POLYMORPHIC */ + return vrshrntq_m_n_s16(a, b, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrntq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 6, i32 0, i32 1, i32 0, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrshrntq_m_n_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrntq_m(a, b, 6, p); +#else /* POLYMORPHIC */ + return vrshrntq_m_n_s32(a, b, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrntq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 6, i32 0, i32 1, i32 1, i32 1, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrshrntq_m_n_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrntq_m(a, b, 6, p); +#else /* POLYMORPHIC */ + return vrshrntq_m_n_u16(a, b, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrntq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 10, i32 0, i32 1, i32 1, i32 1, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrshrntq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrntq_m(a, b, 10, p); +#else /* POLYMORPHIC */ + return vrshrntq_m_n_u32(a, b, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrnbq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 0, i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vqshrnbq_n_s16(int8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vqshrnbq(a, b, 7); +#else /* POLYMORPHIC */ + return vqshrnbq_n_s16(a, b, 7); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrnbq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 15, i32 1, i32 0, i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vqshrnbq_n_s32(int16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vqshrnbq(a, b, 15); +#else /* POLYMORPHIC */ + return vqshrnbq_n_s32(a, b, 15); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrnbq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 3, i32 1, i32 0, i32 1, i32 1, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqshrnbq_n_u16(uint8x16_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vqshrnbq(a, b, 3); +#else /* POLYMORPHIC */ + return vqshrnbq_n_u16(a, b, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrnbq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 3, i32 1, i32 0, i32 1, i32 1, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqshrnbq_n_u32(uint16x8_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vqshrnbq(a, b, 3); +#else /* POLYMORPHIC */ + return vqshrnbq_n_u32(a, b, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrntq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 5, i32 1, i32 0, i32 0, i32 0, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vqshrntq_n_s16(int8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vqshrntq(a, b, 5); +#else /* POLYMORPHIC */ + return vqshrntq_n_s16(a, b, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrntq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 6, i32 1, i32 0, i32 0, i32 0, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vqshrntq_n_s32(int16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vqshrntq(a, b, 6); +#else /* POLYMORPHIC */ + return vqshrntq_n_s32(a, b, 6); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrntq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, i32 0, i32 1, i32 1, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqshrntq_n_u16(uint8x16_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vqshrntq(a, b, 1); +#else /* POLYMORPHIC */ + return vqshrntq_n_u16(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrntq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 15, i32 1, i32 0, i32 1, i32 1, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqshrntq_n_u32(uint16x8_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vqshrntq(a, b, 15); +#else /* POLYMORPHIC */ + return vqshrntq_n_u32(a, b, 15); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrnbq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqshrnbq_m_n_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrnbq_m(a, b, 7, p); +#else /* POLYMORPHIC */ + return vqshrnbq_m_n_s16(a, b, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrnbq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqshrnbq_m_n_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrnbq_m(a, b, 1, p); +#else /* POLYMORPHIC */ + return vqshrnbq_m_n_s32(a, b, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrnbq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, i32 0, i32 1, i32 1, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqshrnbq_m_n_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrnbq_m(a, b, 1, p); +#else /* POLYMORPHIC */ + return vqshrnbq_m_n_u16(a, b, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrnbq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 8, i32 1, i32 0, i32 1, i32 1, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqshrnbq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrnbq_m(a, b, 8, p); +#else /* POLYMORPHIC */ + return vqshrnbq_m_n_u32(a, b, 8, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrntq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, i32 0, i32 0, i32 0, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqshrntq_m_n_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrntq_m(a, b, 1, p); +#else /* POLYMORPHIC */ + return vqshrntq_m_n_s16(a, b, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrntq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 11, i32 1, i32 0, i32 0, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqshrntq_m_n_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrntq_m(a, b, 11, p); +#else /* POLYMORPHIC */ + return vqshrntq_m_n_s32(a, b, 11, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrntq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 3, i32 1, i32 0, i32 1, i32 1, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqshrntq_m_n_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrntq_m(a, b, 3, p); +#else /* POLYMORPHIC */ + return vqshrntq_m_n_u16(a, b, 3, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrntq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, i32 1, i32 0, i32 1, i32 1, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqshrntq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrntq_m(a, b, 1, p); +#else /* POLYMORPHIC */ + return vqshrntq_m_n_u32(a, b, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrunbq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 5, i32 1, i32 0, i32 1, i32 0, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqshrunbq_n_s16(uint8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vqshrunbq(a, b, 5); +#else /* POLYMORPHIC */ + return vqshrunbq_n_s16(a, b, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrunbq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 13, i32 1, i32 0, i32 1, i32 0, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqshrunbq_n_s32(uint16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vqshrunbq(a, b, 13); +#else /* POLYMORPHIC */ + return vqshrunbq_n_s32(a, b, 13); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshruntq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 2, i32 1, i32 0, i32 1, i32 0, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqshruntq_n_s16(uint8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vqshruntq(a, b, 2); +#else /* POLYMORPHIC */ + return vqshruntq_n_s16(a, b, 2); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshruntq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 7, i32 1, i32 0, i32 1, i32 0, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqshruntq_n_s32(uint16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vqshruntq(a, b, 7); +#else /* POLYMORPHIC */ + return vqshruntq_n_s32(a, b, 7); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrunbq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqshrunbq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrunbq_m(a, b, 7, p); +#else /* POLYMORPHIC */ + return vqshrunbq_m_n_s16(a, b, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshrunbq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 7, i32 1, i32 0, i32 1, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqshrunbq_m_n_s32(uint16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshrunbq_m(a, b, 7, p); +#else /* POLYMORPHIC */ + return vqshrunbq_m_n_s32(a, b, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshruntq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 0, i32 1, i32 0, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqshruntq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshruntq_m(a, b, 7, p); +#else /* POLYMORPHIC */ + return vqshruntq_m_n_s16(a, b, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshruntq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 7, i32 1, i32 0, i32 1, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqshruntq_m_n_s32(uint16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshruntq_m(a, b, 7, p); +#else /* POLYMORPHIC */ + return vqshruntq_m_n_s32(a, b, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrnbq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 5, i32 1, i32 1, i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vqrshrnbq_n_s16(int8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vqrshrnbq(a, b, 5); +#else /* POLYMORPHIC */ + return vqrshrnbq_n_s16(a, b, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrnbq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 13, i32 1, i32 1, i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vqrshrnbq_n_s32(int16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vqrshrnbq(a, b, 13); +#else /* POLYMORPHIC */ + return vqrshrnbq_n_s32(a, b, 13); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrnbq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 1, i32 1, i32 1, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqrshrnbq_n_u16(uint8x16_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vqrshrnbq(a, b, 7); +#else /* POLYMORPHIC */ + return vqrshrnbq_n_u16(a, b, 7); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrnbq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 8, i32 1, i32 1, i32 1, i32 1, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqrshrnbq_n_u32(uint16x8_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vqrshrnbq(a, b, 8); +#else /* POLYMORPHIC */ + return vqrshrnbq_n_u32(a, b, 8); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrntq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 1, i32 0, i32 0, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vqrshrntq_n_s16(int8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vqrshrntq(a, b, 7); +#else /* POLYMORPHIC */ + return vqrshrntq_n_s16(a, b, 7); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrntq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 2, i32 1, i32 1, i32 0, i32 0, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vqrshrntq_n_s32(int16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vqrshrntq(a, b, 2); +#else /* POLYMORPHIC */ + return vqrshrntq_n_s32(a, b, 2); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrntq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqrshrntq_n_u16(uint8x16_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vqrshrntq(a, b, 1); +#else /* POLYMORPHIC */ + return vqrshrntq_n_u16(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrntq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 11, i32 1, i32 1, i32 1, i32 1, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqrshrntq_n_u32(uint16x8_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vqrshrntq(a, b, 11); +#else /* POLYMORPHIC */ + return vqrshrntq_n_u32(a, b, 11); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrnbq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 2, i32 1, i32 1, i32 0, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqrshrnbq_m_n_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrnbq_m(a, b, 2, p); +#else /* POLYMORPHIC */ + return vqrshrnbq_m_n_s16(a, b, 2, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrnbq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 12, i32 1, i32 1, i32 0, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqrshrnbq_m_n_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrnbq_m(a, b, 12, p); +#else /* POLYMORPHIC */ + return vqrshrnbq_m_n_s32(a, b, 12, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrnbq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 5, i32 1, i32 1, i32 1, i32 1, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqrshrnbq_m_n_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrnbq_m(a, b, 5, p); +#else /* POLYMORPHIC */ + return vqrshrnbq_m_n_u16(a, b, 5, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrnbq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 11, i32 1, i32 1, i32 1, i32 1, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqrshrnbq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrnbq_m(a, b, 11, p); +#else /* POLYMORPHIC */ + return vqrshrnbq_m_n_u32(a, b, 11, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrntq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 4, i32 1, i32 1, i32 0, i32 0, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqrshrntq_m_n_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrntq_m(a, b, 4, p); +#else /* POLYMORPHIC */ + return vqrshrntq_m_n_s16(a, b, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrntq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 6, i32 1, i32 1, i32 0, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqrshrntq_m_n_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrntq_m(a, b, 6, p); +#else /* POLYMORPHIC */ + return vqrshrntq_m_n_s32(a, b, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrntq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 1, i32 1, i32 1, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqrshrntq_m_n_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrntq_m(a, b, 7, p); +#else /* POLYMORPHIC */ + return vqrshrntq_m_n_u16(a, b, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrntq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 15, i32 1, i32 1, i32 1, i32 1, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqrshrntq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrntq_m(a, b, 15, p); +#else /* POLYMORPHIC */ + return vqrshrntq_m_n_u32(a, b, 15, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrunbq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 0, i32 1, i32 0, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqrshrunbq_n_s16(uint8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vqrshrunbq(a, b, 7); +#else /* POLYMORPHIC */ + return vqrshrunbq_n_s16(a, b, 7); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrunbq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, i32 1, i32 0, i32 1, i32 0, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqrshrunbq_n_s32(uint16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vqrshrunbq(a, b, 1); +#else /* POLYMORPHIC */ + return vqrshrunbq_n_s32(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshruntq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, i32 0, i32 1, i32 0, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqrshruntq_n_s16(uint8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vqrshruntq(a, b, 1); +#else /* POLYMORPHIC */ + return vqrshruntq_n_s16(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshruntq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 3, i32 1, i32 0, i32 1, i32 0, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqrshruntq_n_s32(uint16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vqrshruntq(a, b, 3); +#else /* POLYMORPHIC */ + return vqrshruntq_n_s32(a, b, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrunbq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 4, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqrshrunbq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrunbq_m(a, b, 4, p); +#else /* POLYMORPHIC */ + return vqrshrunbq_m_n_s16(a, b, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshrunbq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 10, i32 1, i32 0, i32 1, i32 0, i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqrshrunbq_m_n_s32(uint16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshrunbq_m(a, b, 10, p); +#else /* POLYMORPHIC */ + return vqrshrunbq_m_n_s32(a, b, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshruntq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 3, i32 1, i32 0, i32 1, i32 0, i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqrshruntq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshruntq_m(a, b, 3, p); +#else /* POLYMORPHIC */ + return vqrshruntq_m_n_s16(a, b, 3, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrshruntq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 13, i32 1, i32 0, i32 1, i32 0, i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqrshruntq_m_n_s32(uint16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqrshruntq_m(a, b, 13, p); +#else /* POLYMORPHIC */ + return vqrshruntq_m_n_s32(a, b, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 2) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) +{ +#ifdef POLYMORPHIC + return vsliq(a, b, 2); +#else /* POLYMORPHIC */ + return vsliq_n_s8(a, b, 2); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 10) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vsliq(a, b, 10); +#else /* POLYMORPHIC */ + return vsliq_n_s16(a, b, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vsliq(a, b, 1); +#else /* POLYMORPHIC */ + return vsliq_n_s32(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) +{ +#ifdef POLYMORPHIC + return vsliq(a, b, 1); +#else /* POLYMORPHIC */ + return vsliq_n_u8(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vsliq(a, b, 1); +#else /* POLYMORPHIC */ + return vsliq_n_u16(a, b, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 28) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vsliq(a, b, 28); +#else /* POLYMORPHIC */ + return vsliq_n_u32(a, b, 28); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 4, <16 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vsliq_m_n_s8(int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsliq_m(a, b, 4, p); +#else /* POLYMORPHIC */ + return vsliq_m_n_s8(a, b, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vsliq_m_n_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsliq_m(a, b, 1, p); +#else /* POLYMORPHIC */ + return vsliq_m_n_s16(a, b, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vsliq_m_n_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsliq_m(a, b, 1, p); +#else /* POLYMORPHIC */ + return vsliq_m_n_s32(a, b, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 5, <16 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vsliq_m_n_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsliq_m(a, b, 5, p); +#else /* POLYMORPHIC */ + return vsliq_m_n_u8(a, b, 5, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 3, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vsliq_m_n_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsliq_m(a, b, 3, p); +#else /* POLYMORPHIC */ + return vsliq_m_n_u16(a, b, 3, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsliq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 9, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vsliq_m_n_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsliq_m(a, b, 9, p); +#else /* POLYMORPHIC */ + return vsliq_m_n_u32(a, b, 9, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) +{ +#ifdef POLYMORPHIC + return vsriq(a, b, 3); +#else /* POLYMORPHIC */ + return vsriq_n_s8(a, b, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 2) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vsriq(a, b, 2); +#else /* POLYMORPHIC */ + return vsriq_n_s16(a, b, 2); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 28) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vsriq(a, b, 28); +#else /* POLYMORPHIC */ + return vsriq_n_s32(a, b, 28); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) +{ +#ifdef POLYMORPHIC + return vsriq(a, b, 3); +#else /* POLYMORPHIC */ + return vsriq_n_u8(a, b, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 3) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vsriq(a, b, 3); +#else /* POLYMORPHIC */ + return vsriq_n_u16(a, b, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 26) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vsriq(a, b, 26); +#else /* POLYMORPHIC */ + return vsriq_n_u32(a, b, 26); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 4, <16 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vsriq_m_n_s8(int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsriq_m(a, b, 4, p); +#else /* POLYMORPHIC */ + return vsriq_m_n_s8(a, b, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vsriq_m_n_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsriq_m(a, b, 1, p); +#else /* POLYMORPHIC */ + return vsriq_m_n_s16(a, b, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 27, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vsriq_m_n_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsriq_m(a, b, 27, p); +#else /* POLYMORPHIC */ + return vsriq_m_n_s32(a, b, 27, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 7, <16 x i1> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vsriq_m_n_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsriq_m(a, b, 7, p); +#else /* POLYMORPHIC */ + return vsriq_m_n_u8(a, b, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 9, <8 x i1> [[TMP1]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vsriq_m_n_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsriq_m(a, b, 9, p); +#else /* POLYMORPHIC */ + return vsriq_m_n_u16(a, b, 9, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsriq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 13, <4 x i1> [[TMP1]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vsriq_m_n_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vsriq_m(a, b, 13, p); +#else /* POLYMORPHIC */ + return vsriq_m_n_u32(a, b, 13, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index 6f3fd2522ce664..431e5c477c2b7e 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -1099,14 +1099,16 @@ const Type *MveEmitter::getType(DagInit *D, const Type *Param) { PrintFatalError("Cannot find a type to satisfy CopyKind"); } - if (Op->getName() == "CTO_DoubleSize") { + if (Op->isSubClassOf("CTO_ScaleSize")) { const ScalarType *STKind = cast(getType(D->getArg(0), Param)); + int Num = Op->getValueAsInt("num"), Denom = Op->getValueAsInt("denom"); + unsigned DesiredSize = STKind->sizeInBits() * Num / Denom; for (const auto &kv : ScalarTypes) { const ScalarType *RT = kv.second.get(); - if (RT->kind() == STKind->kind() && RT->sizeInBits() == 2*STKind->sizeInBits()) + if (RT->kind() == STKind->kind() && RT->sizeInBits() == DesiredSize) return RT; } - PrintFatalError("Cannot find a type to satisfy DoubleSize"); + PrintFatalError("Cannot find a type to satisfy ScaleSize"); } PrintFatalError("Bad operator in type dag expression"); @@ -1338,7 +1340,8 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param) } else if (Bounds->isSubClassOf("IB_EltBit")) { IA.boundsType = ImmediateArg::BoundsType::ExplicitRange; IA.i1 = Bounds->getValueAsInt("base"); - IA.i2 = IA.i1 + Param->sizeInBits() - 1; + const Type *T = ME.getType(Bounds->getValueAsDef("type"), Param); + IA.i2 = IA.i1 + T->sizeInBits() - 1; } else { PrintFatalError("unrecognised ImmediateBounds subclass"); } diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 50329ecc0e6ad7..fd263d17816123 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -944,6 +944,17 @@ defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/, llvm_i32_ty /*top-half*/]>; +defm int_arm_mve_vsli: MVEPredicated< + [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>; +defm int_arm_mve_vsri: MVEPredicated< + [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>; + +defm int_arm_mve_vshrn: MVEPredicated< + [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, + llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, + llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/, + llvm_i32_ty /*top-half*/]>; + // MVE scalar shifts. class ARM_MVE_qrshift_single value, list saturate = []> : diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index e25af565503b29..c98f72b053aaad 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2503,11 +2503,15 @@ foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in foreach top = [0, 1] in defm : MVE_VSHLL_patterns; +class MVE_shift_imm_partial + : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm), + iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> { + Operand immediateType = imm; +} + class MVE_VxSHRN pattern=[]> - : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops), - iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", - pattern> { + Operand imm, list pattern=[]> + : MVE_shift_imm_partial { bits<5> imm; let Inst{28} = bit_28; @@ -2520,45 +2524,35 @@ class MVE_VxSHRN { +def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> { let Inst{20-19} = 0b01; } -def MVE_VRSHRNi16th : MVE_VxSHRN< - "vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> { +def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> { let Inst{20-19} = 0b01; } -def MVE_VRSHRNi32bh : MVE_VxSHRN< - "vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> { +def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> { let Inst{20} = 0b1; } -def MVE_VRSHRNi32th : MVE_VxSHRN< - "vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> { +def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> { let Inst{20} = 0b1; } -def MVE_VSHRNi16bh : MVE_VxSHRN< - "vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> { +def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> { let Inst{20-19} = 0b01; } -def MVE_VSHRNi16th : MVE_VxSHRN< - "vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> { +def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> { let Inst{20-19} = 0b01; } -def MVE_VSHRNi32bh : MVE_VxSHRN< - "vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> { +def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> { let Inst{20} = 0b1; } -def MVE_VSHRNi32th : MVE_VxSHRN< - "vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> { +def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> { let Inst{20} = 0b1; } -class MVE_VxQRSHRUN pattern=[]> - : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops), - iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", - pattern> { +class MVE_VxQRSHRUN pattern=[]> + : MVE_shift_imm_partial { bits<5> imm; let Inst{28} = bit_28; @@ -2572,44 +2566,42 @@ class MVE_VxQRSHRUN { + "vqrshrunb", "s16", 0b1, 0b0, shr_imm8> { let Inst{20-19} = 0b01; } def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN< - "vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> { + "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> { let Inst{20-19} = 0b01; } def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN< - "vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> { + "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> { let Inst{20} = 0b1; } def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN< - "vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> { + "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> { let Inst{20} = 0b1; } def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN< - "vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> { + "vqshrunb", "s16", 0b0, 0b0, shr_imm8> { let Inst{20-19} = 0b01; } def MVE_VQSHRUNs16th : MVE_VxQRSHRUN< - "vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> { + "vqshrunt", "s16", 0b0, 0b1, shr_imm8> { let Inst{20-19} = 0b01; } def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN< - "vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> { + "vqshrunb", "s32", 0b0, 0b0, shr_imm16> { let Inst{20} = 0b1; } def MVE_VQSHRUNs32th : MVE_VxQRSHRUN< - "vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> { + "vqshrunt", "s32", 0b0, 0b1, shr_imm16> { let Inst{20} = 0b1; } class MVE_VxQRSHRN pattern=[]> - : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops), - iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", - pattern> { + Operand imm, list pattern=[]> + : MVE_shift_imm_partial { bits<5> imm; let Inst{25-23} = 0b101; @@ -2622,19 +2614,19 @@ class MVE_VxQRSHRN { - def s16 : MVE_VxQRSHRN { + def s16 : MVE_VxQRSHRN { let Inst{28} = 0b0; let Inst{20-19} = 0b01; } - def u16 : MVE_VxQRSHRN { + def u16 : MVE_VxQRSHRN { let Inst{28} = 0b1; let Inst{20-19} = 0b01; } - def s32 : MVE_VxQRSHRN { + def s32 : MVE_VxQRSHRN { let Inst{28} = 0b0; let Inst{20} = 0b1; } - def u32 : MVE_VxQRSHRN { + def u32 : MVE_VxQRSHRN { let Inst{28} = 0b1; let Inst{20} = 0b1; } @@ -2645,6 +2637,64 @@ defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>; defm MVE_VQSHRNbh : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>; defm MVE_VQSHRNth : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>; +multiclass MVE_VSHRN_patterns { + foreach inparams = [(? (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm), + (inst.immediateType:$imm), (i32 q), (i32 r), + (i32 OutVTI.Unsigned), (i32 InVTI.Unsigned), + (i32 top))] in + foreach outparams = [(inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm), + (imm:$imm))] in { + def : Pat<(OutVTI.Vec !setop(inparams, int_arm_mve_vshrn)), + (OutVTI.Vec outparams)>; + def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated + (InVTI.Pred VCCR:$pred)))), + (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>; + } +} + +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; +defm : MVE_VSHRN_patterns; + // end of mve_imm_shift instructions // start of mve_shift instructions @@ -2733,9 +2783,9 @@ class MVE_shift_with_imm +class MVE_VSxI_imm : MVE_shift_with_imm { bits<6> imm; let Inst{28} = 0b1; @@ -2744,32 +2794,56 @@ class MVE_VSxI_imm let Inst{10-9} = 0b10; let Inst{8} = bit_8; let validForTailPredication = 1; + + Operand immediateType = immType; } -def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> { +def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> { let Inst{21-19} = 0b001; } -def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> { +def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> { let Inst{21-20} = 0b01; } -def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> { +def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> { let Inst{21} = 0b1; } -def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> { +def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> { let Inst{21-19} = 0b001; } -def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> { +def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> { let Inst{21-20} = 0b01; } -def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> { +def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> { let Inst{21} = 0b1; } +multiclass MVE_VSxI_patterns { + foreach inparams = [(? (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm), + (inst.immediateType:$imm))] in + foreach outparams = [(inst (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm), + (inst.immediateType:$imm))] in + foreach unpred_int = [!cast("int_arm_mve_" # name)] in + foreach pred_int = [!cast("int_arm_mve_" # name # "_predicated")] in { + def : Pat<(VTI.Vec !setop(inparams, unpred_int)), + (VTI.Vec outparams)>; + def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))), + (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>; + } +} + +defm : MVE_VSxI_patterns; +defm : MVE_VSxI_patterns; +defm : MVE_VSxI_patterns; +defm : MVE_VSxI_patterns; +defm : MVE_VSxI_patterns; +defm : MVE_VSxI_patterns; + class MVE_VQSHL_imm : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd), (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm-dyadic.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm-dyadic.ll new file mode 100644 index 00000000000000..6173cb5520b741 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm-dyadic.ll @@ -0,0 +1,1270 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vshrnbq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshrnb.i16 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vshrnbq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshrnb.i32 q0, q1, #9 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vshrnbq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshrnb.i16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vshrnbq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshrnb.i32 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 3, i32 0, i32 0, i32 1, i32 1, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vshrntq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshrnt.i16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, i32 0, i32 0, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vshrntq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshrnt.i32 q0, q1, #10 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 0, i32 0, i32 0, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vshrntq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshrnt.i16 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 6, i32 0, i32 0, i32 1, i32 1, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vshrntq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshrnt.i32 q0, q1, #10 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 0, i32 1, i32 1, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vshrnbq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrnbt.i16 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vshrnbq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrnbt.i32 q0, q1, #13 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vshrnbq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrnbt.i16 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 0, i32 0, i32 1, i32 1, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vshrnbq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrnbt.i32 q0, q1, #15 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 15, i32 0, i32 0, i32 1, i32 1, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vshrntq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrntt.i16 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 6, i32 0, i32 0, i32 0, i32 0, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vshrntq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrntt.i32 q0, q1, #13 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 13, i32 0, i32 0, i32 0, i32 0, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vshrntq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrntt.i16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, i32 1, i32 1, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vshrntq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrntt.i32 q0, q1, #10 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 0, i32 1, i32 1, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vrshrnbq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshrnb.i16 q0, q1, #5 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 0, i32 1, i32 0, i32 0, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vrshrnbq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshrnb.i32 q0, q1, #10 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 1, i32 0, i32 0, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vrshrnbq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshrnb.i16 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 2, i32 0, i32 1, i32 1, i32 1, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vrshrnbq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshrnb.i32 q0, q1, #12 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 12, i32 0, i32 1, i32 1, i32 1, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vrshrntq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshrnt.i16 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 4, i32 0, i32 1, i32 0, i32 0, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vrshrntq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshrnt.i32 q0, q1, #11 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 11, i32 0, i32 1, i32 0, i32 0, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vrshrntq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshrnt.i16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1, i32 1, i32 1, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vrshrntq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshrnt.i32 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 6, i32 0, i32 1, i32 1, i32 1, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrnbq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrnbt.i16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrnbq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrnbt.i32 q0, q1, #14 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 14, i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrnbq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrnbt.i16 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 2, i32 0, i32 1, i32 1, i32 1, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrnbq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrnbt.i32 q0, q1, #12 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 12, i32 0, i32 1, i32 1, i32 1, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrntq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrntt.i16 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 0, i32 1, i32 0, i32 0, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrntq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrntt.i32 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 6, i32 0, i32 1, i32 0, i32 0, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrntq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrntt.i16 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 6, i32 0, i32 1, i32 1, i32 1, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrntq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrntt.i32 q0, q1, #10 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 1, i32 1, i32 1, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqshrnbq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrnb.s16 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 0, i32 0, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqshrnbq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrnb.s32 q0, q1, #15 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 15, i32 1, i32 0, i32 0, i32 0, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqshrnbq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrnb.u16 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 3, i32 1, i32 0, i32 1, i32 1, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqshrnbq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrnb.u32 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 3, i32 1, i32 0, i32 1, i32 1, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqshrntq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrnt.s16 q0, q1, #5 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 0, i32 0, i32 0, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqshrntq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrnt.s32 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 6, i32 1, i32 0, i32 0, i32 0, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqshrntq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrnt.u16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 1, i32 1, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqshrntq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrnt.u32 q0, q1, #15 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 15, i32 1, i32 0, i32 1, i32 1, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrnbq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrnbt.s16 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrnbq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrnbt.s32 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrnbq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrnbt.u16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 1, i32 1, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrnbq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrnbt.u32 q0, q1, #8 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 8, i32 1, i32 0, i32 1, i32 1, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrntq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrntt.s16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 0, i32 0, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrntq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrntt.s32 q0, q1, #11 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 11, i32 1, i32 0, i32 0, i32 0, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrntq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrntt.u16 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 3, i32 1, i32 0, i32 1, i32 1, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrntq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrntt.u32 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, i32 1, i32 1, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrunbq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqshrunbq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrunb.s16 q0, q1, #5 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 0, i32 1, i32 0, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrunbq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqshrunbq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrunb.s32 q0, q1, #13 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 13, i32 1, i32 0, i32 1, i32 0, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshruntq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqshruntq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrunt.s16 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 2, i32 1, i32 0, i32 1, i32 0, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshruntq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqshruntq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrunt.s32 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshrunbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrunbq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrunbt.s16 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshrunbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshrunbq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrunbt.s32 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshruntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshruntq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshruntt.s16 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshruntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqshruntq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshruntt.s32 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqrshrnbq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrshrnb.s16 q0, q1, #5 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 1, i32 0, i32 0, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqrshrnbq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrshrnb.s32 q0, q1, #13 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 13, i32 1, i32 1, i32 0, i32 0, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqrshrnbq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrshrnb.u16 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 1, i32 1, i32 1, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqrshrnbq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrshrnb.u32 q0, q1, #8 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 8, i32 1, i32 1, i32 1, i32 1, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqrshrntq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrshrnt.s16 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 1, i32 0, i32 0, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqrshrntq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrshrnt.s32 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 2, i32 1, i32 1, i32 0, i32 0, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqrshrntq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrshrnt.u16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqrshrntq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrshrnt.u32 q0, q1, #11 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 11, i32 1, i32 1, i32 1, i32 1, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrnbq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqrshrnbt.s16 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 2, i32 1, i32 1, i32 0, i32 0, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrnbq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqrshrnbt.s32 q0, q1, #12 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 12, i32 1, i32 1, i32 0, i32 0, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrnbq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqrshrnbt.u16 q0, q1, #5 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 1, i32 1, i32 1, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrnbq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqrshrnbt.u32 q0, q1, #11 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 11, i32 1, i32 1, i32 1, i32 1, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrntq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqrshrntt.s16 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 1, i32 1, i32 0, i32 0, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrntq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqrshrntt.s32 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 6, i32 1, i32 1, i32 0, i32 0, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrntq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqrshrntt.u16 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 1, i32 1, i32 1, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrntq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqrshrntt.u32 q0, q1, #15 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 15, i32 1, i32 1, i32 1, i32 1, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrunbq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqrshrunbq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrunb.s16 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrunbq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqrshrunbq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrunb.s32 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, i32 1, i32 0, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshruntq_n_s16(<16 x i8> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vqrshruntq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrunt.s16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 1, i32 0, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshruntq_n_s32(<8 x i16> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vqrshruntq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshrunt.s32 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshrunbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrunbq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrunbt.s16 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshrunbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshrunbq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshrunbt.s32 q0, q1, #10 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 10, i32 1, i32 0, i32 1, i32 0, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqrshruntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshruntq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshruntt.s16 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqrshruntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vqrshruntq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshruntt.s32 q0, q1, #13 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 13, i32 1, i32 0, i32 1, i32 0, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsliq_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsli.8 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 2) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vsliq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsli.16 q0, q1, #10 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 10) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsliq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsli.32 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsliq_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsli.8 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vsliq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsli.16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsliq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsli.32 q0, q1, #28 +; CHECK-NEXT: bx lr +entry: + %0 = call <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 28) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vsliq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsliq_m_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vslit.8 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 4, <16 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vsliq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsliq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vslit.16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, <8 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vsliq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsliq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vslit.32 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, <4 x i1> %1) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vsliq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsliq_m_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vslit.8 q0, q1, #5 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 5, <16 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vsliq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsliq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vslit.16 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 3, <8 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vsliq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsliq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vslit.32 q0, q1, #9 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 9, <4 x i1> %1) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsriq_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsri.8 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vsriq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsri.16 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 2) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsriq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsri.32 q0, q1, #28 +; CHECK-NEXT: bx lr +entry: + %0 = call <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 28) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsriq_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsri.8 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: test_vsriq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsri.16 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsriq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsri.32 q0, q1, #26 +; CHECK-NEXT: bx lr +entry: + %0 = call <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 26) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vsriq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsriq_m_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vsrit.8 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 4, <16 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vsriq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsriq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vsrit.16 q0, q1, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, <8 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vsriq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsriq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vsrit.32 q0, q1, #27 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 27, <4 x i1> %1) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vsriq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsriq_m_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vsrit.8 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = call <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 7, <16 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vsriq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsriq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vsrit.16 q0, q1, #9 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 9, <8 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vsriq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsriq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vsrit.32 q0, q1, #13 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 13, <4 x i1> %1) + ret <4 x i32> %2 +} + +declare <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8>, <8 x i16>, i32, i32, i32, i32, i32, i32) +declare <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16>, <4 x i32>, i32, i32, i32, i32, i32, i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, i32, i32, i32, i32, i32, <8 x i1>) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, i32, i32, i32, i32, i32, <4 x i1>) +declare <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8>, <16 x i8>, i32) +declare <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16>, <8 x i16>, i32) +declare <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32>, <4 x i32>, i32) +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) +declare <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>) +declare <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>) +declare <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>) +declare <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8>, <16 x i8>, i32) +declare <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16>, <8 x i16>, i32) +declare <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32>, <4 x i32>, i32) +declare <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>) +declare <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>) +declare <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)