diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index c94073a32c652..07d9fc6e04f1b 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -298,6 +298,29 @@ let TargetGuard = "sve,bf16" in { def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalt_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } +let TargetGuard = "sve2p1" in { + // Contiguous zero-extending load to quadword (single vector). + def SVLD1UWQ : MInst<"svld1uwq[_{d}]", "dPc", "iUif", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1uwq">; + def SVLD1UWQ_VNUM : MInst<"svld1uwq_vnum[_{d}]", "dPcl", "iUif", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1uwq">; + + def SVLD1UDQ : MInst<"svld1udq[_{d}]", "dPc", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">; + def SVLD1UDQ_VNUM : MInst<"svld1udq_vnum[_{d}]", "dPcl", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">; + + // Load one vector (vector base + scalar offset) + def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + + // Load N-element structure into N vectors (scalar base) + defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret">; + defm SVLD3Q : StructLoad<"svld3q[_{2}]", "3Pc", "aarch64_sve_ld3q_sret">; + defm SVLD4Q : StructLoad<"svld4q[_{2}]", "4Pc", "aarch64_sve_ld4q_sret">; + + // Load N-element structure into N vectors (scalar base, VL displacement) + defm SVLD2Q_VNUM : StructLoad<"svld2q_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2q_sret">; + defm SVLD3Q_VNUM : StructLoad<"svld3q_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3q_sret">; + defm SVLD4Q_VNUM : StructLoad<"svld4q_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4q_sret">; +} + //////////////////////////////////////////////////////////////////////////////// // Stores @@ -420,6 +443,29 @@ let TargetGuard = "sve,bf16" in { def SVSTNT1_VNUM_BF : MInst<"svstnt1_vnum[_{d}]", "vPpld", "b", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; } +let TargetGuard = "sve2p1" in { + // Contiguous truncating store from quadword (single vector). + def SVST1UWQ : MInst<"svst1uwq[_{d}]", "vPcd", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1uwq">; + def SVST1UWQ_VNUM : MInst<"svst1uwq_vnum[_{d}]", "vPcld", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1uwq">; + + def SVST1UDQ : MInst<"svst1udq[_{d}]", "vPcd", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1udq">; + def SVST1UDQ_VNUM : MInst<"svst1udq_vnum[_{d}]", "vPcld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1udq">; + + // Store one vector (vector base + scalar offset) + def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + + // Store N vectors into N-element structure (scalar base) + defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q">; + defm SVST3Q : StructStore<"svst3q[_{d}]", "vPc3", "aarch64_sve_st3q">; + defm SVST4Q : StructStore<"svst4q[_{d}]", "vPc4", "aarch64_sve_st4q">; + + // Store N vectors into N-element structure (scalar base, VL displacement) + defm SVST2Q_VNUM : StructStore<"svst2q_vnum[_{d}]", "vPcl2", "aarch64_sve_st2q">; + defm SVST3Q_VNUM : StructStore<"svst3q_vnum[_{d}]", "vPcl3", "aarch64_sve_st3q">; + defm SVST4Q_VNUM : StructStore<"svst4q_vnum[_{d}]", "vPcl4", "aarch64_sve_st4q">; +} + //////////////////////////////////////////////////////////////////////////////// // Prefetches diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 33b1fa758eb9d..710e4c162103b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9652,14 +9652,17 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, case Intrinsic::aarch64_sve_ld2_sret: case Intrinsic::aarch64_sve_ld1_pn_x2: case Intrinsic::aarch64_sve_ldnt1_pn_x2: + case Intrinsic::aarch64_sve_ld2q_sret: N = 2; break; case Intrinsic::aarch64_sve_ld3_sret: + case Intrinsic::aarch64_sve_ld3q_sret: N = 3; break; case Intrinsic::aarch64_sve_ld4_sret: case Intrinsic::aarch64_sve_ld1_pn_x4: case Intrinsic::aarch64_sve_ldnt1_pn_x4: + case Intrinsic::aarch64_sve_ld4q_sret: N = 4; break; default: @@ -9697,14 +9700,17 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, case Intrinsic::aarch64_sve_st2: case Intrinsic::aarch64_sve_st1_pn_x2: case Intrinsic::aarch64_sve_stnt1_pn_x2: + case Intrinsic::aarch64_sve_st2q: N = 2; break; case Intrinsic::aarch64_sve_st3: + case Intrinsic::aarch64_sve_st3q: N = 3; break; case Intrinsic::aarch64_sve_st4: case Intrinsic::aarch64_sve_st1_pn_x4: case Intrinsic::aarch64_sve_stnt1_pn_x4: + case Intrinsic::aarch64_sve_st4q: N = 4; break; default: @@ -9780,7 +9786,7 @@ Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, llvm::Type *ReturnTy, SmallVectorImpl &Ops, - unsigned BuiltinID, + unsigned IntrinsicID, bool IsZExtReturn) { QualType LangPTy = E->getArg(1)->getType(); llvm::Type *MemEltTy = CGM.getTypes().ConvertType( @@ -9789,28 +9795,46 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, // The vector type that is returned may be different from the // eventual type loaded from memory. auto VectorTy = cast(ReturnTy); - auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); + llvm::ScalableVectorType *MemoryTy = nullptr; + llvm::ScalableVectorType *PredTy = nullptr; + bool IsQuadLoad = false; + switch (IntrinsicID) { + case Intrinsic::aarch64_sve_ld1uwq: + case Intrinsic::aarch64_sve_ld1udq: + MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1); + PredTy = llvm::ScalableVectorType::get( + llvm::Type::getInt1Ty(getLLVMContext()), 1); + IsQuadLoad = true; + break; + default: + MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); + PredTy = MemoryTy; + break; + } - Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); + Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy); Value *BasePtr = Ops[1]; // Does the load have an offset? if (Ops.size() > 2) BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); - Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); + Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy); auto *Load = cast(Builder.CreateCall(F, {Predicate, BasePtr})); auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); + if (IsQuadLoad) + return Load; + return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy) - : Builder.CreateSExt(Load, VectorTy); + : Builder.CreateSExt(Load, VectorTy); } Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, SmallVectorImpl &Ops, - unsigned BuiltinID) { + unsigned IntrinsicID) { QualType LangPTy = E->getArg(1)->getType(); llvm::Type *MemEltTy = CGM.getTypes().ConvertType( LangPTy->castAs()->getPointeeType()); @@ -9820,17 +9844,34 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, auto VectorTy = cast(Ops.back()->getType()); auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); - Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); + auto PredTy = MemoryTy; + auto AddrMemoryTy = MemoryTy; + bool IsQuadStore = false; + + switch (IntrinsicID) { + case Intrinsic::aarch64_sve_st1uwq: + case Intrinsic::aarch64_sve_st1udq: + AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1); + PredTy = + llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1); + IsQuadStore = true; + break; + default: + break; + } + Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy); Value *BasePtr = Ops[1]; // Does the store have an offset? if (Ops.size() == 4) - BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); + BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]); // Last value is always the data - llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy); + Value *Val = + IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy); - Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); + Function *F = + CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy); auto *Store = cast(Builder.CreateCall(F, {Val, Predicate, BasePtr})); auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1_single.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1_single.c new file mode 100644 index 0000000000000..16361ecc987d3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1_single.c @@ -0,0 +1,255 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +// LD1W + +// CHECK-LABEL: define dso_local @test_svld1uwq_u32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4i32( [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: define dso_local @_Z17test_svld1uwq_u32u10__SVBool_tPKj +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4i32( [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svld1uwq_u32(svbool_t pred, uint32_t const * base) { + return SVE_ACLE_FUNC(svld1uwq, _u32, , )(pred, base); +} + +// CHECK-LABEL: define dso_local @test_svld1uwq_vnum_u32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svld1uwq_vnum_u32u10__SVBool_tPKj +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svuint32_t test_svld1uwq_vnum_u32(svbool_t pred, uint32_t const * base) { + return SVE_ACLE_FUNC(svld1uwq_vnum, _u32, , )(pred, base, -8); +} + +// CHECK-LABEL: define dso_local @test_svld1uwq_s32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4i32( [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: define dso_local @_Z17test_svld1uwq_s32u10__SVBool_tPKi +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4i32( [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svld1uwq_s32(svbool_t pred, int32_t const * base) { + return SVE_ACLE_FUNC(svld1uwq, _s32, , )(pred, base); +} + +// CHECK-LABEL: define dso_local @test_svld1uwq_vnum_s32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svld1uwq_vnum_s32u10__SVBool_tPKi +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svint32_t test_svld1uwq_vnum_s32(svbool_t pred, int32_t const * base) { + return SVE_ACLE_FUNC(svld1uwq_vnum, _s32, , )(pred, base, 7); +} + +// CHECK-LABEL: define dso_local @test_svld1uwq_f32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4f32( [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: define dso_local @_Z17test_svld1uwq_f32u10__SVBool_tPKf +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4f32( [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svld1uwq_f32(svbool_t pred, float32_t const * base) { + return SVE_ACLE_FUNC(svld1uwq, _f32, , )(pred, base); +} + +// CHECK-LABEL: define dso_local @test_svld1uwq_vnum_f32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4f32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svld1uwq_vnum_f32u10__SVBool_tPKf +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1uwq.nxv4f32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svfloat32_t test_svld1uwq_vnum_f32(svbool_t pred, float32_t const * base) { + return SVE_ACLE_FUNC(svld1uwq_vnum, _f32, , )(pred, base, -8); +} + + +// LD1D + +// CHECK-LABEL: define dso_local @test_svld1udq_u64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2i64( [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: define dso_local @_Z17test_svld1udq_u64u10__SVBool_tPKm +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2i64( [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svld1udq_u64(svbool_t pred, uint64_t const * base) { + return SVE_ACLE_FUNC(svld1udq, _u64, , )(pred, base); +} + +// CHECK-LABEL: define dso_local @test_svld1udq_vnum_u64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svld1udq_vnum_u64u10__SVBool_tPKm +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svuint64_t test_svld1udq_vnum_u64(svbool_t pred, uint64_t const * base) { + return SVE_ACLE_FUNC(svld1udq_vnum, _u64, , )(pred, base, 7); +} + +// CHECK-LABEL: define dso_local @test_svld1udq_s64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2i64( [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: define dso_local @_Z17test_svld1udq_s64u10__SVBool_tPKl +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2i64( [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svld1udq_s64(svbool_t pred, int64_t const * base) { + return SVE_ACLE_FUNC(svld1udq, _s64, , )(pred, base); +} + +// CHECK-LABEL: define dso_local @test_svld1udq_vnum_s64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svld1udq_vnum_s64u10__SVBool_tPKl +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svint64_t test_svld1udq_vnum_s64(svbool_t pred, int64_t const * base) { + return SVE_ACLE_FUNC(svld1udq_vnum, _s64, , )(pred, base, -8); +} + +// CHECK-LABEL: define dso_local @test_svld1udq_f64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2f64( [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: define dso_local @_Z17test_svld1udq_f64u10__SVBool_tPKd +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2f64( [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svld1udq_f64(svbool_t pred, float64_t const * base) { + return SVE_ACLE_FUNC(svld1udq, _f64, , )(pred, base); +} + +// CHECK-LABEL: define dso_local @test_svld1udq_vnum_f64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2f64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: define dso_local @_Z22test_svld1udq_vnum_f64u10__SVBool_tPKd +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld1udq.nxv2f64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svfloat64_t test_svld1udq_vnum_f64(svbool_t pred, float64_t const * base) { + return SVE_ACLE_FUNC(svld1udq_vnum, _f64, , )(pred, base, 7); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c new file mode 100644 index 0000000000000..35e0069e17c13 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c @@ -0,0 +1,2530 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svld2q_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z14test_svld2q_u8u10__SVBool_tPKh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint8x2_t test_svld2q_u8(svbool_t pg, const uint8_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_u8,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z14test_svld2q_s8u10__SVBool_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint8x2_t test_svld2q_s8(svbool_t pg, const int8_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_s8,)(pg, base); +} +// CHECK-LABEL: @test_svld2q_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_u16u10__SVBool_tPKt( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint16x2_t test_svld2q_u16(svbool_t pg, const uint16_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_u16,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_s16u10__SVBool_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint16x2_t test_svld2q_s16(svbool_t pg, const int16_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_s16,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_u32u10__SVBool_tPKj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint32x2_t test_svld2q_u32(svbool_t pg, const uint32_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_u32,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_s32u10__SVBool_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint32x2_t test_svld2q_s32(svbool_t pg, const int32_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_s32,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_u64u10__SVBool_tPKm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint64x2_t test_svld2q_u64(svbool_t pg, const uint64_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_u64,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_s64u10__SVBool_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint64x2_t test_svld2q_s64(svbool_t pg, const int64_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_s64,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_f16u10__SVBool_tPKDh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat16x2_t test_svld2q_f16(svbool_t pg, const float16_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_f16,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z16test_svld2q_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svbfloat16x2_t test_svld2q_bf16(svbool_t pg, const bfloat16_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_bf16,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_f32u10__SVBool_tPKf( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat32x2_t test_svld2q_f32(svbool_t pg, const float32_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_f32,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_f64u10__SVBool_tPKd( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat64x2_t test_svld2q_f64(svbool_t pg, const float64_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_f64,)(pg, base); +} + +// CHECK-LABEL: @test_svld2q_vnum_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z19test_svld2q_vnum_u8u10__SVBool_tPKhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint8x2_t test_svld2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum_,,u8,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z19test_svld2q_vnum_s8u10__SVBool_tPKal( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint8x2_t test_svld2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_s8,)(pg, base, vnum); +} +// CHECK-LABEL: @test_svld2q_vnum_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u16u10__SVBool_tPKtl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint16x2_t test_svld2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_u16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s16u10__SVBool_tPKsl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint16x2_t test_svld2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_s16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u32u10__SVBool_tPKjl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint32x2_t test_svld2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_u32,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s32u10__SVBool_tPKil( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint32x2_t test_svld2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_s32,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u64u10__SVBool_tPKml( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint64x2_t test_svld2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_u64,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s64u10__SVBool_tPKll( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint64x2_t test_svld2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_s64,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f16u10__SVBool_tPKDhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svfloat16x2_t test_svld2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_f16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z21test_svld2q_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svbfloat16x2_t test_svld2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_bf16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f32u10__SVBool_tPKfl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svfloat32x2_t test_svld2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_f32,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld2q_vnum_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f64u10__SVBool_tPKdl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svfloat64x2_t test_svld2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_f64,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z14test_svld3q_u8u10__SVBool_tPKh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint8x3_t test_svld3q_u8(svbool_t pg, const uint8_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_u8,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z14test_svld3q_s8u10__SVBool_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint8x3_t test_svld3q_s8(svbool_t pg, const int8_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_s8,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_u16u10__SVBool_tPKt( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svuint16x3_t test_svld3q_u16(svbool_t pg, const uint16_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_u16,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_s16u10__SVBool_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svint16x3_t test_svld3q_s16(svbool_t pg, const int16_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_s16,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_u32u10__SVBool_tPKj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svuint32x3_t test_svld3q_u32(svbool_t pg, const uint32_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_u32,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_s32u10__SVBool_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svint32x3_t test_svld3q_s32(svbool_t pg, const int32_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_s32,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_u64u10__SVBool_tPKm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svuint64x3_t test_svld3q_u64(svbool_t pg, const uint64_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_u64,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_s64u10__SVBool_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svint64x3_t test_svld3q_s64(svbool_t pg, const int64_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_s64,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_f16u10__SVBool_tPKDh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svfloat16x3_t test_svld3q_f16(svbool_t pg, const float16_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_f16,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z16test_svld3q_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svbfloat16x3_t test_svld3q_bf16(svbool_t pg, const bfloat16_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_bf16,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_f32u10__SVBool_tPKf( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svfloat32x3_t test_svld3q_f32(svbool_t pg, const float32_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_f32,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_f64u10__SVBool_tPKd( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svfloat64x3_t test_svld3q_f64(svbool_t pg, const float64_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_f64,)(pg, base); +} + +// CHECK-LABEL: @test_svld3q_vnum_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z19test_svld3q_vnum_u8u10__SVBool_tPKhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svuint8x3_t test_svld3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum_,,u8,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: ret [[TMP7]] +// +// CPP-CHECK-LABEL: @_Z19test_svld3q_vnum_s8u10__SVBool_tPKal( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP7]] +// +svint8x3_t test_svld3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_s8,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u16u10__SVBool_tPKtl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint16x3_t test_svld3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_u16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s16u10__SVBool_tPKsl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint16x3_t test_svld3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_s16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u32u10__SVBool_tPKjl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint32x3_t test_svld3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_u32,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s32u10__SVBool_tPKil( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint32x3_t test_svld3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_s32,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u64u10__SVBool_tPKml( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint64x3_t test_svld3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_u64,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s64u10__SVBool_tPKll( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint64x3_t test_svld3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_s64,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f16u10__SVBool_tPKDhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat16x3_t test_svld3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_f16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z21test_svld3q_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svbfloat16x3_t test_svld3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_bf16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f32u10__SVBool_tPKfl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x3_t test_svld3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_f32,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld3q_vnum_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f64u10__SVBool_tPKdl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x3_t test_svld3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_f64,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z14test_svld4q_u8u10__SVBool_tPKh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint8x4_t test_svld4q_u8(svbool_t pg, const uint8_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_u8,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_u16u10__SVBool_tPKt( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint16x4_t test_svld4q_u16(svbool_t pg, const uint16_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_u16,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_s16u10__SVBool_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint16x4_t test_svld4q_s16(svbool_t pg, const int16_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_s16,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_u32u10__SVBool_tPKj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint32x4_t test_svld4q_u32(svbool_t pg, const uint32_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_u32,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_s32u10__SVBool_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint32x4_t test_svld4q_s32(svbool_t pg, const int32_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_s32,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_u64u10__SVBool_tPKm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint64x4_t test_svld4q_u64(svbool_t pg, const uint64_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_u64,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_s64u10__SVBool_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint64x4_t test_svld4q_s64(svbool_t pg, const int64_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_s64,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_f16u10__SVBool_tPKDh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat16x4_t test_svld4q_f16(svbool_t pg, const float16_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_f16,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z16test_svld4q_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svbfloat16x4_t test_svld4q_bf16(svbool_t pg, const bfloat16_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_bf16,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_f32u10__SVBool_tPKf( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat32x4_t test_svld4q_f32(svbool_t pg, const float32_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_f32,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_f64u10__SVBool_tPKd( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat64x4_t test_svld4q_f64(svbool_t pg, const float64_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_f64,)(pg, base); +} + +// CHECK-LABEL: @test_svld4q_vnum_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z19test_svld4q_vnum_u8u10__SVBool_tPKhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint8x4_t test_svld4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum_,,u8,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z19test_svld4q_vnum_s8u10__SVBool_tPKal( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint8x4_t test_svld4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_s8,)(pg, base, vnum); +} +// CHECK-LABEL: @test_svld4q_vnum_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u16u10__SVBool_tPKtl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svuint16x4_t test_svld4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_u16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s16u10__SVBool_tPKsl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svint16x4_t test_svld4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_s16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u32u10__SVBool_tPKjl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svuint32x4_t test_svld4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_u32,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s32u10__SVBool_tPKil( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svint32x4_t test_svld4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_s32,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u64u10__SVBool_tPKml( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svuint64x4_t test_svld4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_u64,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s64u10__SVBool_tPKll( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svint64x4_t test_svld4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_s64,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 24) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f16u10__SVBool_tPKDhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svfloat16x4_t test_svld4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_f16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 24) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z21test_svld4q_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svbfloat16x4_t test_svld4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_bf16,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 12) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f32u10__SVBool_tPKfl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svfloat32x4_t test_svld4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_f32,)(pg, base, vnum); +} + +// CHECK-LABEL: @test_svld4q_vnum_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 6) +// CHECK-NEXT: ret [[TMP10]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f64u10__SVBool_tPKdl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP10]] +// +svfloat64x4_t test_svld4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_f64,)(pg, base, vnum); +} + + +// Gather for 128 bits +// vector base + scalar offset +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svld1q_gather_u64base_offset_u64(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_u64,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svld1q_gather_u64base_offset_s64(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_s64,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_u32u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svld1q_gather_u64base_offset_u32(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_u32,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_s32u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svld1q_gather_u64base_offset_s32(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_s32,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_u16u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svld1q_gather_u64base_offset_u16(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_u16,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_s16u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svld1q_gather_u64base_offset_s16(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_s16,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_offset_u8u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint8_t test_svld1q_gather_u64base_offset_u8(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_u8,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_offset_s8u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint8_t test_svld1q_gather_u64base_offset_s8(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_s8,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_f64u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svld1q_gather_u64base_offset_f64(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_f64,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_f32u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svld1q_gather_u64base_offset_f32(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_f32,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_f16u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svld1q_gather_u64base_offset_f16(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_f16,)(pg, base, offset); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z38test_svld1q_gather_u64base_offset_bf16u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svld1q_gather_u64base_offset_bf16(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_bf16,)(pg, base, offset); +} + +// Vector base and no offset +// CHECK-LABEL: @test_svld1q_gather_u64base_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_u64u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svld1q_gather_u64base_u64(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_u64,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_s64u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svld1q_gather_u64base_s64(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_s64,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_u32u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svld1q_gather_u64base_u32(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_u32,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_s32u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svld1q_gather_u64base_s32(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_s32,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_u16u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svld1q_gather_u64base_u16(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_u16,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_s16u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svld1q_gather_u64base_s16(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_s16,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z29test_svld1q_gather_u64base_u8u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint8_t test_svld1q_gather_u64base_u8(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_u8,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z29test_svld1q_gather_u64base_s8u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint8_t test_svld1q_gather_u64base_s8(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_s8,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_f64u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svld1q_gather_u64base_f64(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_f64,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_f32u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svld1q_gather_u64base_f32(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_f32,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_f16u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svld1q_gather_u64base_f16(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_f16,)(pg, base); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64base_bf16u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svld1q_gather_u64base_bf16(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_bf16,)(pg, base); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1_single.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1_single.c new file mode 100644 index 0000000000000..52c16faec7f32 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1_single.c @@ -0,0 +1,255 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +// ST1W + +// CHECK-LABEL: define dso_local void @test_svst1uwq_u32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4i32( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1uwq_u32u10__SVBool_tPKju12__SVUint32_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4i32( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1uwq_u32(svbool_t pred, uint32_t const * base, svuint32_t zt) { + SVE_ACLE_FUNC(svst1uwq, _u32, , )(pred, base, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1uwq_vnum_u32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4i32( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1uwq_vnum_u32u10__SVBool_tPKju12__SVUint32_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4i32( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1uwq_vnum_u32(svbool_t pred, uint32_t const * base, svuint32_t zt) { + SVE_ACLE_FUNC(svst1uwq_vnum, _u32, , )(pred, base, 1, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1uwq_s32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4i32( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1uwq_s32u10__SVBool_tPKiu11__SVInt32_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4i32( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1uwq_s32(svbool_t pred, int32_t const * base, svint32_t zt) { + SVE_ACLE_FUNC(svst1uwq, _s32, , )(pred, base, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1uwq_vnum_s32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4i32( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1uwq_vnum_s32u10__SVBool_tPKiu11__SVInt32_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4i32( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1uwq_vnum_s32(svbool_t pred, int32_t const * base, svint32_t zt) { + SVE_ACLE_FUNC(svst1uwq_vnum, _s32, , )(pred, base, 1, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1uwq_f32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4f32( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1uwq_f32u10__SVBool_tPKfu13__SVFloat32_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4f32( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1uwq_f32(svbool_t pred, float32_t const * base, svfloat32_t zt) { + SVE_ACLE_FUNC(svst1uwq, _f32, , )(pred, base, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1uwq_vnum_f32 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4f32( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1uwq_vnum_f32u10__SVBool_tPKfu13__SVFloat32_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1uwq.nxv4f32( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1uwq_vnum_f32(svbool_t pred, float32_t const * base, svfloat32_t zt) { + SVE_ACLE_FUNC(svst1uwq_vnum, _f32, , )(pred, base, 1, zt); +} + + +// ST1D + +// CHECK-LABEL: define dso_local void @test_svst1udq_u64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2i64( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1udq_u64u10__SVBool_tPKmu12__SVUint64_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2i64( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1udq_u64(svbool_t pred, uint64_t const * base, svuint64_t zt) { + SVE_ACLE_FUNC(svst1udq, _u64, , )(pred, base, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1udq_vnum_u64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2i64( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1udq_vnum_u64u10__SVBool_tPKmu12__SVUint64_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2i64( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1udq_vnum_u64(svbool_t pred, uint64_t const * base, svuint64_t zt) { + SVE_ACLE_FUNC(svst1udq_vnum, _u64, , )(pred, base, -8, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1udq_s64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2i64( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1udq_s64u10__SVBool_tPKlu11__SVInt64_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2i64( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1udq_s64(svbool_t pred, int64_t const * base, svint64_t zt) { + SVE_ACLE_FUNC(svst1udq, _s64, , )(pred, base, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1udq_vnum_s64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2i64( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1udq_vnum_s64u10__SVBool_tPKlu11__SVInt64_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2i64( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1udq_vnum_s64(svbool_t pred, int64_t const * base, svint64_t zt) { + SVE_ACLE_FUNC(svst1udq_vnum, _s64, , )(pred, base, -8, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1udq_f64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2f64( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z17test_svst1udq_f64u10__SVBool_tPKdu13__SVFloat64_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2f64( [[ZT]], [[TMP0]], ptr [[BASE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1udq_f64(svbool_t pred, float64_t const * base, svfloat64_t zt) { + SVE_ACLE_FUNC(svst1udq, _f64, , )(pred, base, zt); +} + +// CHECK-LABEL: define dso_local void @test_svst1udq_vnum_f64 +// CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2f64( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svst1udq_vnum_f64u10__SVBool_tPKdu13__SVFloat64_t +// CPP-CHECK-SAME: ( [[PRED:%.*]], ptr noundef [[BASE:%.*]], [[ZT:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PRED]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE]], i64 -8 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1udq.nxv2f64( [[ZT]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1udq_vnum_f64(svbool_t pred, float64_t const * base, svfloat64_t zt) { + SVE_ACLE_FUNC(svst1udq_vnum, _f64, , )(pred, base, -8, zt); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c new file mode 100644 index 0000000000000..1fb5933ce75e1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c @@ -0,0 +1,2132 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svst2q_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst2q_u8u10__SVBool_tPKh11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_u8(svbool_t pg, const uint8_t *base, svuint8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_u8,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst2q_s8u10__SVBool_tPKa10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_s8(svbool_t pg, const int8_t *base, svint8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_s8,)(pg, base, zt); +} +// CHECK-LABEL: @test_svst2q_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_u16u10__SVBool_tPKt12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_u16(svbool_t pg, const uint16_t *base, svuint16x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_u16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_s16u10__SVBool_tPKs11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_s16(svbool_t pg, const int16_t *base, svint16x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_s16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_u32u10__SVBool_tPKj12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_u32(svbool_t pg, const uint32_t *base, svuint32x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_u32,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_s32u10__SVBool_tPKi11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_s32(svbool_t pg, const int32_t *base, svint32x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_s32,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_u64u10__SVBool_tPKm12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_u64(svbool_t pg, const uint64_t *base, svuint64x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_u64,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_s64u10__SVBool_tPKl11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_s64(svbool_t pg, const int64_t *base, svint64x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_s64,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_f16u10__SVBool_tPKDh13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_f16(svbool_t pg, const float16_t *base, svfloat16x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_f16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svst2q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_bf16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_f32u10__SVBool_tPKf13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_f32(svbool_t pg, const float32_t *base, svfloat32x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_f32,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_f64u10__SVBool_tPKd13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_f64(svbool_t pg, const float64_t *base, svfloat64x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_f64,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_u8u10__SVBool_tPKhl11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum_,,u8,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_s8u10__SVBool_tPKal10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_s8,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u16u10__SVBool_tPKtl12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_u16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s16u10__SVBool_tPKsl11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_s16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u32u10__SVBool_tPKjl12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_u32,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s32u10__SVBool_tPKil11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_s32,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u64u10__SVBool_tPKml12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_u64,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s64u10__SVBool_tPKll11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_s64,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f16u10__SVBool_tPKDhl13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_f16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svst2q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_bf16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f32u10__SVBool_tPKfl13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_f32,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst2q_vnum_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f64u10__SVBool_tPKdl13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_f64,)(pg, base, vnum, zt); +} + +// +// ST3Q +// CHECK-LABEL: @test_svst3q_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst3q_u8u10__SVBool_tPKh11svuint8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_u8(svbool_t pg, const uint8_t *base, svuint8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_u8,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst3q_s8u10__SVBool_tPKa10svint8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_s8(svbool_t pg, const int8_t *base, svint8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_s8,)(pg, base, zt); +} +// CHECK-LABEL: @test_svst3q_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_u16u10__SVBool_tPKt12svuint16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_u16(svbool_t pg, const uint16_t *base, svuint16x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_u16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_s16u10__SVBool_tPKs11svint16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_s16(svbool_t pg, const int16_t *base, svint16x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_s16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_u32u10__SVBool_tPKj12svuint32x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_u32(svbool_t pg, const uint32_t *base, svuint32x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_u32,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_s32u10__SVBool_tPKi11svint32x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_s32(svbool_t pg, const int32_t *base, svint32x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_s32,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_u64u10__SVBool_tPKm12svuint64x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_u64(svbool_t pg, const uint64_t *base, svuint64x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_u64,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_s64u10__SVBool_tPKl11svint64x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_s64(svbool_t pg, const int64_t *base, svint64x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_s64,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_f16u10__SVBool_tPKDh13svfloat16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_f16(svbool_t pg, const float16_t *base, svfloat16x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_f16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svst3q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_bf16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_f32u10__SVBool_tPKf13svfloat32x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_f32(svbool_t pg, const float32_t *base, svfloat32x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_f32,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_f64u10__SVBool_tPKd13svfloat64x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_f64(svbool_t pg, const float64_t *base, svfloat64x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_f64,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_u8u10__SVBool_tPKhl11svuint8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum_,,u8,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_s8u10__SVBool_tPKal10svint8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_s8,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u16u10__SVBool_tPKtl12svuint16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_u16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s16u10__SVBool_tPKsl11svint16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_s16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u32u10__SVBool_tPKjl12svuint32x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_u32,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s32u10__SVBool_tPKil11svint32x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_s32,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u64u10__SVBool_tPKml12svuint64x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_u64,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s64u10__SVBool_tPKll11svint64x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_s64,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f16u10__SVBool_tPKDhl13svfloat16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_f16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svst3q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_bf16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f32u10__SVBool_tPKfl13svfloat32x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_f32,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst3q_vnum_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f64u10__SVBool_tPKdl13svfloat64x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_f64,)(pg, base, vnum, zt); +} + +// +// ST4Q +// CHECK-LABEL: @test_svst4q_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst4q_u8u10__SVBool_tPKh11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_u8(svbool_t pg, const uint8_t *base, svuint8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_u8,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst4q_s8u10__SVBool_tPKa10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_s8(svbool_t pg, const int8_t *base, svint8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_s8,)(pg, base, zt); +} +// CHECK-LABEL: @test_svst4q_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_u16u10__SVBool_tPKt12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_u16(svbool_t pg, const uint16_t *base, svuint16x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_u16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_s16u10__SVBool_tPKs11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_s16(svbool_t pg, const int16_t *base, svint16x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_s16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_u32u10__SVBool_tPKj12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_u32(svbool_t pg, const uint32_t *base, svuint32x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_u32,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_s32u10__SVBool_tPKi11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_s32(svbool_t pg, const int32_t *base, svint32x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_s32,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_u64u10__SVBool_tPKm12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_u64(svbool_t pg, const uint64_t *base, svuint64x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_u64,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_s64u10__SVBool_tPKl11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_s64(svbool_t pg, const int64_t *base, svint64x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_s64,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_f16u10__SVBool_tPKDh13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_f16(svbool_t pg, const float16_t *base, svfloat16x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_f16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svst4q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_bf16,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_f32u10__SVBool_tPKf13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_f32(svbool_t pg, const float32_t *base, svfloat32x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_f32,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_f64u10__SVBool_tPKd13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_f64(svbool_t pg, const float64_t *base, svfloat64x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_f64,)(pg, base, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_u8u10__SVBool_tPKhl11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum_,,u8,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_s8u10__SVBool_tPKal10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_s8,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u16u10__SVBool_tPKtl12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_u16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s16u10__SVBool_tPKsl11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_s16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u32u10__SVBool_tPKjl12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_u32,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s32u10__SVBool_tPKil11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_s32,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u64u10__SVBool_tPKml12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_u64,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s64u10__SVBool_tPKll11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_s64,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f16u10__SVBool_tPKDhl13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_f16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svst4q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_bf16,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f32u10__SVBool_tPKfl13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_f32,)(pg, base, vnum, zt); +} + +// CHECK-LABEL: @test_svst4q_vnum_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f64u10__SVBool_tPKdl13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_f64,)(pg, base, vnum, zt); +} + +// Scatter for 128 bits +// vector base + scalar offset +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_u64u10__SVBool_tu12__SVUint64_tlS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_u64(svbool_t pg, svuint64_t base, int64_t offset, svuint64_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _u64)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_s64(svbool_t pg, svuint64_t base, int64_t offset, svint64_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _s64)(pg, base, offset, data); +} + + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_u32u10__SVBool_tu12__SVUint64_tlu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_u32(svbool_t pg, svuint64_t base, int64_t offset, svuint32_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _u32)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_s32u10__SVBool_tu12__SVUint64_tlu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_s32(svbool_t pg, svuint64_t base, int64_t offset, svint32_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _s32)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_u16u10__SVBool_tu12__SVUint64_tlu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_u16(svbool_t pg, svuint64_t base, int64_t offset, svuint16_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _u16)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_s16u10__SVBool_tu12__SVUint64_tlu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_s16(svbool_t pg, svuint64_t base, int64_t offset, svint16_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _s16)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_offset_u8u10__SVBool_tu12__SVUint64_tlu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_u8(svbool_t pg, svuint64_t base, int64_t offset, svuint8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _u8)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_offset_s8u10__SVBool_tu12__SVUint64_tlu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_s8(svbool_t pg, svuint64_t base, int64_t offset, svint8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _s8)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_f64u10__SVBool_tu12__SVUint64_tlu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_f64(svbool_t pg, svuint64_t base, int64_t offset, svfloat64_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _f64)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_f32u10__SVBool_tu12__SVUint64_tlu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_f32(svbool_t pg, svuint64_t base, int64_t offset, svfloat32_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _f32)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_f16u10__SVBool_tu12__SVUint64_tlu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_f16(svbool_t pg, svuint64_t base, int64_t offset, svfloat16_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _f16)(pg, base, offset, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z39test_svst1q_scatter_u64base_offset_bf16u10__SVBool_tu12__SVUint64_tlu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_bf16(svbool_t pg, svuint64_t base, int64_t offset, svbfloat16_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _bf16)(pg, base, offset, data); +} + +// Vector Base and no Offset +// CHECK-LABEL: @test_svst1q_scatter_u64base_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_u64u10__SVBool_tu12__SVUint64_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_u64(svbool_t pg, svuint64_t base, svuint64_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _u64)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_s64u10__SVBool_tu12__SVUint64_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_s64(svbool_t pg, svuint64_t base, svint64_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _s64)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_u32u10__SVBool_tu12__SVUint64_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_u32(svbool_t pg, svuint64_t base, svuint32_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _u32)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_s32u10__SVBool_tu12__SVUint64_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_s32(svbool_t pg, svuint64_t base, svint32_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _s32)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_u16u10__SVBool_tu12__SVUint64_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_u16(svbool_t pg, svuint64_t base, svuint16_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _u16)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_s16u10__SVBool_tu12__SVUint64_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_s16(svbool_t pg, svuint64_t base, svint16_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _s16)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svst1q_scatter_u64base_u8u10__SVBool_tu12__SVUint64_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_u8(svbool_t pg, svuint64_t base, svuint8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _u8)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svst1q_scatter_u64base_s8u10__SVBool_tu12__SVUint64_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_s8(svbool_t pg, svuint64_t base, svint8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _s8)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_f64u10__SVBool_tu12__SVUint64_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_f64(svbool_t pg, svuint64_t base, svfloat64_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _f64)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_f32u10__SVBool_tu12__SVUint64_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_f32(svbool_t pg, svuint64_t base, svfloat32_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _f32)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_f16u10__SVBool_tu12__SVUint64_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_f16(svbool_t pg, svuint64_t base, svfloat16_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _f16)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64base_bf16u10__SVBool_tu12__SVUint64_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_bf16(svbool_t pg, svuint64_t base, svbfloat16_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,,_bf16)(pg, base, data); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 4815dde06afdc..35129d6b6c16b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1457,6 +1457,15 @@ class AdvSIMD_GatherLoad_VS_Intrinsic ], [IntrReadMem]>; +class AdvSIMD_GatherLoadQ_VS_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [ + llvm_nxv1i1_ty, + llvm_anyvector_ty, + llvm_i64_ty + ], + [IntrReadMem]>; + class AdvSIMD_GatherLoad_VS_WriteFFR_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [ @@ -1495,6 +1504,15 @@ class AdvSIMD_ScatterStore_VS_Intrinsic ], [IntrWriteMem]>; +class AdvSIMD_ScatterStoreQ_VS_Intrinsic + : DefaultAttrsIntrinsic<[], + [ + llvm_anyvector_ty, + llvm_nxv1i1_ty, + llvm_anyvector_ty, + llvm_i64_ty + ], + [IntrWriteMem]>; class SVE_gather_prf_SV : DefaultAttrsIntrinsic<[], @@ -2132,6 +2150,8 @@ def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_In def int_aarch64_sve_ld1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic; +// 128-bit loads, unscaled offsets +def int_aarch64_sve_ld1q_gather_scalar_offset : AdvSIMD_GatherLoadQ_VS_Intrinsic; // // First-faulting gather loads: scalar base + vector offsets @@ -2208,6 +2228,9 @@ def int_aarch64_sve_st1_scatter_uxtw_index def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic; +// 128-bit stores, unscaled offsets +def int_aarch64_sve_st1q_scatter_scalar_offset : AdvSIMD_ScatterStoreQ_VS_Intrinsic; + // // Non-temporal scatter stores: scalar base + vector offsets // @@ -2614,6 +2637,37 @@ def int_aarch64_sve_ld1_pn_x4 : SVE2p1_Load_PN_X4_Intrinsic; def int_aarch64_sve_ldnt1_pn_x2 : SVE2p1_Load_PN_X2_Intrinsic; def int_aarch64_sve_ldnt1_pn_x4 : SVE2p1_Load_PN_X4_Intrinsic; +// +// SVE2.1 - Contiguous loads to quadword (single vector) +// + +class SVE2p1_Single_Load_Quadword + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_nxv1i1_ty, llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_aarch64_sve_ld1uwq : SVE2p1_Single_Load_Quadword; +def int_aarch64_sve_ld1udq : SVE2p1_Single_Load_Quadword; + +// +// SVE2.1 - Contiguous store from quadword (single vector) +// + +class SVE2p1_Single_Store_Quadword + : DefaultAttrsIntrinsic<[], + [llvm_anyvector_ty, llvm_nxv1i1_ty, llvm_ptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_aarch64_sve_st1uwq : SVE2p1_Single_Store_Quadword; +def int_aarch64_sve_st1udq : SVE2p1_Single_Store_Quadword; + + +def int_aarch64_sve_ld2q_sret : AdvSIMD_2Vec_PredLoad_Intrinsic; +def int_aarch64_sve_ld3q_sret : AdvSIMD_3Vec_PredLoad_Intrinsic; +def int_aarch64_sve_ld4q_sret : AdvSIMD_4Vec_PredLoad_Intrinsic; + +def int_aarch64_sve_st2q : AdvSIMD_2Vec_PredStore_Intrinsic; +def int_aarch64_sve_st3q : AdvSIMD_3Vec_PredStore_Intrinsic; +def int_aarch64_sve_st4q : AdvSIMD_4Vec_PredStore_Intrinsic; + // // SVE2.1 - Contiguous stores to multiple consecutive vectors // diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7617dccdeee39..136512db123b3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1787,7 +1787,7 @@ void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_ri, unsigned Opc_rr, bool IsIntr) { - assert(Scale < 4 && "Invalid scaling value."); + assert(Scale < 5 && "Invalid scaling value."); SDLoc DL(N); EVT VT = N->getValueType(0); SDValue Chain = N->getOperand(0); @@ -4692,6 +4692,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_ld64b: SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); return; + case Intrinsic::aarch64_sve_ld2q_sret: { + SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true); + return; + } + case Intrinsic::aarch64_sve_ld3q_sret: { + SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true); + return; + } + case Intrinsic::aarch64_sve_ld4q_sret: { + SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true); + return; + } case Intrinsic::aarch64_sve_ld2_sret: { if (VT == MVT::nxv16i8) { SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, @@ -5904,6 +5916,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } break; } + case Intrinsic::aarch64_sve_st2q: { + SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM); + return; + } + case Intrinsic::aarch64_sve_st3q: { + SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM); + return; + } + case Intrinsic::aarch64_sve_st4q: { + SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM); + return; + } case Intrinsic::aarch64_sve_st2: { if (VT == MVT::nxv16i8) { SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); @@ -6685,14 +6709,32 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); case Intrinsic::aarch64_sve_ld2_sret: + case Intrinsic::aarch64_sve_ld2q_sret: return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2); + case Intrinsic::aarch64_sve_st2q: + return getPackedVectorTypeFromPredicateType( + Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2); case Intrinsic::aarch64_sve_ld3_sret: + case Intrinsic::aarch64_sve_ld3q_sret: return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3); + case Intrinsic::aarch64_sve_st3q: + return getPackedVectorTypeFromPredicateType( + Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3); case Intrinsic::aarch64_sve_ld4_sret: + case Intrinsic::aarch64_sve_ld4q_sret: return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4); + case Intrinsic::aarch64_sve_st4q: + return getPackedVectorTypeFromPredicateType( + Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4); + case Intrinsic::aarch64_sve_ld1udq: + case Intrinsic::aarch64_sve_st1udq: + return EVT(MVT::nxv1i64); + case Intrinsic::aarch64_sve_ld1uwq: + case Intrinsic::aarch64_sve_st1uwq: + return EVT(MVT::nxv1i32); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d42ae4ff93a44..f744643a9d9b3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2578,6 +2578,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1Q_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO) @@ -2602,6 +2603,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO) + MAKE_CASE(AArch64ISD::SST1Q_PRED) MAKE_CASE(AArch64ISD::ST1_PRED) MAKE_CASE(AArch64ISD::SST1_PRED) MAKE_CASE(AArch64ISD::SST1_SCALED_PRED) @@ -22759,8 +22761,11 @@ static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // For FPs, ACLE only supports _packed_ single and double precision types. + // SST1Q_PRED is the ST1Q for sve2p1 and should allow all sizes if (SrcElVT.isFloatingPoint()) - if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64)) + if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64) && + (Opcode != AArch64ISD::SST1Q_PRED || + ((SrcVT != MVT::nxv8f16) && (SrcVT != MVT::nxv8bf16)))) return SDValue(); // Depending on the addressing mode, this is either a pointer or a vector of @@ -23729,6 +23734,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED); case Intrinsic::aarch64_sve_ld1_gather: return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO); + case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset: + return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1Q_MERGE_ZERO); case Intrinsic::aarch64_sve_ld1_gather_index: return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SCALED_MERGE_ZERO); @@ -23772,6 +23779,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset: return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_IMM_MERGE_ZERO); + case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset: + return performScatterStoreCombine(N, DAG, AArch64ISD::SST1Q_PRED); case Intrinsic::aarch64_sve_st1_scatter: return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED); case Intrinsic::aarch64_sve_st1_scatter_index: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 2a039488f2a9a..b4e89297ad587 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -375,6 +375,7 @@ enum NodeType : unsigned { GLD1_UXTW_SCALED_MERGE_ZERO, GLD1_SXTW_SCALED_MERGE_ZERO, GLD1_IMM_MERGE_ZERO, + GLD1Q_MERGE_ZERO, // Signed gather loads GLD1S_MERGE_ZERO, @@ -419,6 +420,7 @@ enum NodeType : unsigned { SST1_UXTW_SCALED_PRED, SST1_SXTW_SCALED_PRED, SST1_IMM_PRED, + SST1Q_PRED, // Non-temporal scatter store SSTNT1_PRED, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index e69287a5307ce..21cafe9b6c445 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -100,6 +100,8 @@ def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_Z def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; +// Gather vector base + scalar offset +def AArch64ld1q_gather_z: SDNode<"AArch64ISD::GLD1Q_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; // Contiguous stores - node definitions // @@ -132,6 +134,9 @@ def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM_PRED", def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; +// Scatter vector base + scalar offset +def AArch64st1q_scatter : SDNode<"AArch64ISD::SST1Q_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; + // AArch64 SVE/SVE2 - the remaining node definitions // @@ -1157,7 +1162,7 @@ let Predicates = [HasSVE] in { defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather_z, nxv2i64>; defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_z, nxv2i64>; let Predicates = [HasSVE2p1] in { - defm GLD1Q : sve_mem_128b_gld_64_unscaled<"ld1q">; + defm GLD1Q : sve_mem_128b_gld_64_unscaled<"ld1q", AArch64ld1q_gather_z>; } // Gathers using scaled 64-bit offsets, e.g. @@ -1338,6 +1343,49 @@ let Predicates = [HasSVEorSME] in { let Predicates = [HasSVE2p1] in { defm ST1D_Q : sve_mem_cst_ss<0b1110, "st1d", Z_q, ZPR128, GPR64NoXZRshifted64>; } + + multiclass sve_ld1q_pat { + let AddedComplexity = 2 in { + def _reg_imm : Pat<(Ty (Load1qOp (PredTy PPR3bAny:$Pg), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$imm))), + (RegImmInst PPR3bAny:$Pg, GPR64sp:$base, simm4s1:$imm)>; + } + + let AddedComplexity = 1 in { + def _reg_reg : Pat<(Ty (Load1qOp (PredTy PPR3bAny:$Pg), (AddrCP GPR64sp:$base, GPR64:$offset))), + (RegRegInst PPR3bAny:$Pg, GPR64sp:$base, GPR64:$offset)>; + } + + def _default : Pat<(Ty (Load1qOp (PredTy PPR3bAny:$Pg), (i64 GPR64sp:$base))), + (RegImmInst PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; + } + + multiclass sve_st1q_pat { + let AddedComplexity = 2 in { + def _reg_imm : Pat<(Store1qOp (DataType ZPR128:$Zt), (PredTy PPR3bAny:$Pg), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$imm)), + (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, simm4s1:$imm)>; + } + + let AddedComplexity = 1 in { + def _reg_reg : Pat<(Store1qOp (DataType ZPR128:$Zt), (PredTy PPR3bAny:$Pg), (AddrCP GPR64sp:$base, GPR64:$offset)), + (RegRegInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, GPR64:$offset)>; + } + + def _default : Pat<(Store1qOp (DataType ZPR128:$Zt), (PredTy PPR3bAny:$Pg), (i64 GPR64sp:$base)), + (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; + } + + // ld1quw/st1quw + defm : sve_ld1q_pat; + defm : sve_ld1q_pat; + defm : sve_st1q_pat; + defm : sve_st1q_pat; + + // ld1qud/st1qud + defm : sve_ld1q_pat; + defm : sve_ld1q_pat; + defm : sve_st1q_pat; + defm : sve_st1q_pat; + } // End HasSVEorSME let Predicates = [HasSVE] in { @@ -1385,7 +1433,7 @@ let Predicates = [HasSVE] in { defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>; defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>; let Predicates = [HasSVE2p1] in { - defm SST1Q : sve_mem_sst_128b_64_unscaled<"st1q">; + defm SST1Q : sve_mem_sst_128b_64_unscaled<"st1q", AArch64st1q_scatter>; } // Scatters using scaled 64-bit offsets, e.g. diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 60c87edfbfee6..e765926d8a635 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -9801,11 +9801,30 @@ class sve_mem_128b_gld_64_unscaled } -multiclass sve_mem_128b_gld_64_unscaled { +multiclass sve_mem_128b_gld_64_unscaled { def NAME : sve_mem_128b_gld_64_unscaled; def : InstAlias(NAME) Z_q:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; + + + def : Pat<(nxv2i64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv2i64)), + (!cast(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(nxv4i32 (op (nxv4i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv4i32)), + (!cast(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(nxv8i16 (op (nxv8i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8i16)), + (!cast(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(nxv16i8 (op (nxv16i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv16i8)), + (!cast(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; + + def : Pat<(nxv2f64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv2f64)), + (!cast(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(nxv4f32 (op (nxv4i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv4f32)), + (!cast(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(nxv8f16 (op (nxv8i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8f16)), + (!cast(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(nxv8bf16 (op (nxv8i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8bf16)), + (!cast(NAME) PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>; } class sve_mem_sst_128b_64_unscaled @@ -9828,11 +9847,29 @@ class sve_mem_sst_128b_64_unscaled } -multiclass sve_mem_sst_128b_64_unscaled { +multiclass sve_mem_sst_128b_64_unscaled { def NAME : sve_mem_sst_128b_64_unscaled; def : InstAlias(NAME) Z_q:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; + + def : Pat<(op (nxv2i64 Z_q:$Zt), (nxv2i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv2i64), + (!cast(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(op (nxv4i32 Z_q:$Zt), (nxv4i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv4i32), + (!cast(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(op (nxv8i16 Z_q:$Zt), (nxv8i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8i16), + (!cast(NAME) Z_q:$Zt, PPR3bAny:$gp,ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(op (nxv16i8 Z_q:$Zt), (nxv16i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv16i8), + (!cast(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>; + + def : Pat<(op (nxv2f64 Z_q:$Zt), (nxv2i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv2f64), + (!cast(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(op (nxv4f32 Z_q:$Zt), (nxv4i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv4f32), + (!cast(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(op (nxv8f16 Z_q:$Zt), (nxv8i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8f16), + (!cast(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>; + def : Pat<(op (nxv8bf16 Z_q:$Zt), (nxv8i1 PPR3bAny:$gp), (nxv2i64 ZPR64:$Zn), (i64 GPR64sp:$Rm), nxv8bf16), + (!cast(NAME) Z_q:$Zt, PPR3bAny:$gp, ZPR64:$Zn, GPR64:$Rm)>; } diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll new file mode 100644 index 0000000000000..64f15897ebb9a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s + +; +; LD1Q: vector base + unscaled offset +; e.g. ld1q { z0.q }, p0/z, [z0.d, x0] +; +define @ld1q_gather_u64base_i8( %pg, %base, i64 %offset) { +; CHECK-LABEL: ld1q_gather_u64base_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +define @ld1q_gather_u64base_i16( %pg, %base, i64 %offset) { +; CHECK-LABEL: ld1q_gather_u64base_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +define @ld1q_gather_u64base_i32( %pg, %base, i64 %offset) { +; CHECK-LABEL: ld1q_gather_u64base_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +define @ld1q_gather_u64base_i64( %pg, %base, i64 %offset) { +; CHECK-LABEL: ld1q_gather_u64base_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +define @ld1q_gather_u64base_f16( %pg, %base, i64 %offset) { +; CHECK-LABEL: ld1q_gather_u64base_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +define @ld1q_gather_u64base_f32( %pg, %base, i64 %offset) { +; CHECK-LABEL: ld1q_gather_u64base_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + + +define @ld1q_gather_u64base_f64( %pg, %base, i64 %offset) { +; CHECK-LABEL: ld1q_gather_u64base_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +define @ld1q_gather_u64base_bf16( %pg, %base, i64 %offset) { +; CHECK-LABEL: ld1q_gather_u64base_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64(, , i64) +declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(, , i64) +declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(, , i64) +declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(, , i64) +declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64(, , i64) +declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64(, , i64) +declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64(, , i64) +declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64(, , i64) + diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-ld1-single.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-ld1-single.ll new file mode 100644 index 0000000000000..1fbbab875403b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-ld1-single.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1 < %s | FileCheck %s + +; LD1W + +define @test_svld1uwq_i32_ss( %pred, ptr %base, i64 %offset) { +; CHECK-LABEL: test_svld1uwq_i32_ss: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.q }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %gep = getelementptr i32, ptr %base, i64 %offset + %res = call @llvm.aarch64.sve.ld1uwq.nxv4i32( %pred, ptr %gep) + ret %res +} + +define @test_svld1uwq_i32_si( %pred, * %base) { +; CHECK-LABEL: test_svld1uwq_i32_si: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.q }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: ld1w { z1.q }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %gep1 = getelementptr inbounds , * %base, i64 -8 + %res1 = call @llvm.aarch64.sve.ld1uwq.nxv4i32( %pred, ptr %gep1) + + %gep2 = getelementptr inbounds , * %base, i64 7 + %res2 = call @llvm.aarch64.sve.ld1uwq.nxv4i32( %pred, ptr %gep2) + + %res = add %res1, %res2 + ret %res +} + +define @test_svld1uwq_i32_out_of_bound( %pred, * %base) { +; CHECK-LABEL: test_svld1uwq_i32_out_of_bound: +; CHECK: // %bb.0: +; CHECK-NEXT: addvl x8, x0, #2 +; CHECK-NEXT: ld1w { z0.q }, p0/z, [x8] +; CHECK-NEXT: ret + %gep = getelementptr inbounds , * %base, i64 8 + %res = call @llvm.aarch64.sve.ld1uwq.nxv4i32( %pred, ptr %gep) + + ret %res +} + +define @test_svld1uwq_f32_ss( %pred, ptr %base, i64 %offset) { +; CHECK-LABEL: test_svld1uwq_f32_ss: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.q }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %gep = getelementptr float, ptr %base, i64 %offset + %res = call @llvm.aarch64.sve.ld1uwq.nxv4f32( %pred, ptr %gep) + ret %res +} + +define @test_svld1uwq_f32_si( %pred, * %base) { +; CHECK-LABEL: test_svld1uwq_f32_si: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.q }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: ld1w { z1.q }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %gep1 = getelementptr inbounds , * %base, i64 -8 + %res1 = call @llvm.aarch64.sve.ld1uwq.nxv4f32( %pred, ptr %gep1) + + %gep2 = getelementptr inbounds , * %base, i64 7 + %res2 = call @llvm.aarch64.sve.ld1uwq.nxv4f32( %pred, ptr %gep2) + + %res = fadd %res1, %res2 + ret %res +} + +; LD1D + +define @test_svld1udq_i64_ss( %pred, ptr %base, i64 %offset) { +; CHECK-LABEL: test_svld1udq_i64_ss: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.q }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ret + %gep = getelementptr i64, ptr %base, i64 %offset + %res = call @llvm.aarch64.sve.ld1udq.nxv2i64( %pred, ptr %gep) + ret %res +} + +define @test_svld1udq_i64_si( %pred, * %base) { +; CHECK-LABEL: test_svld1udq_i64_si: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.q }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: ld1d { z1.q }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %gep1 = getelementptr inbounds , * %base, i64 -8 + %res1 = call @llvm.aarch64.sve.ld1udq.nxv2i64( %pred, ptr %gep1) + + %gep2 = getelementptr inbounds , * %base, i64 7 + %res2 = call @llvm.aarch64.sve.ld1udq.nxv2i64( %pred, ptr %gep2) + + %res = add %res1, %res2 + ret %res +} + +define @test_svld1udq_i64_out_of_bound( %pred, * %base) { +; CHECK-LABEL: test_svld1udq_i64_out_of_bound: +; CHECK: // %bb.0: +; CHECK-NEXT: addvl x8, x0, #-5 +; CHECK-NEXT: ld1d { z0.q }, p0/z, [x8] +; CHECK-NEXT: ret + %gep = getelementptr inbounds , * %base, i64 -10 + %res = call @llvm.aarch64.sve.ld1udq.nxv2i64( %pred, ptr %gep) + + ret %res +} + +define @test_svld1udq_f64_ss( %pred, ptr %base, i64 %offset) { +; CHECK-LABEL: test_svld1udq_f64_ss: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.q }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ret + %gep = getelementptr double, ptr %base, i64 %offset + %res = call @llvm.aarch64.sve.ld1udq.nxv2f64( %pred, ptr %gep) + ret %res +} + +define @test_svld1udq_f64_si( %pred, * %base) { +; CHECK-LABEL: test_svld1udq_f64_si: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.q }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: ld1d { z1.q }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: fadd z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %gep1 = getelementptr inbounds , * %base, i64 -8 + %res1 = call @llvm.aarch64.sve.ld1udq.nxv2f64( %pred, ptr %gep1) + + %gep2 = getelementptr inbounds , * %base, i64 7 + %res2 = call @llvm.aarch64.sve.ld1udq.nxv2f64( %pred, ptr %gep2) + + %res = fadd %res1, %res2 + ret %res +} + +declare @llvm.aarch64.sve.ld1uwq.nxv4i32(, ptr) +declare @llvm.aarch64.sve.ld1uwq.nxv4f32(, ptr) + +declare @llvm.aarch64.sve.ld1udq.nxv2i64(, ptr) +declare @llvm.aarch64.sve.ld1udq.nxv2f64(, ptr) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-loads.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-loads.ll new file mode 100644 index 0000000000000..a5d6bd4f930e9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-loads.ll @@ -0,0 +1,797 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s + +;;LD2Q + +define { , } @ld2q_si_i8_off16( %pg, *%addr ) { +; CHECK-LABEL: ld2q_si_i8_off16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -16 + %base_ptr = bitcast * %base to i8 * + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( %pg, ptr %base_ptr); + ret { , } %res +} + +define { , } @ld2q_si_i8_off14( %pg, *%addr ) { +; CHECK-LABEL: ld2q_si_i8_off14: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #14, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 14 + %base_ptr = bitcast * %base to i8 * + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( %pg, ptr %base_ptr); + ret { , } %res +} + +define { , } @ld2q_ss_i8( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld2q_ss_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( %pg, ptr %addr2); + ret { , } %res +} + +define { , } @ld2q_i8( %pg, ptr %addr) { +; CHECK-LABEL: ld2q_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( %pg, ptr %addr); + ret { , } %res +} + +define { , } @ld2q_si_i16( %pg, *%addr ) { +; CHECK-LABEL: ld2q_si_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -16 + %base_ptr = bitcast * %base to i16 * + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( %pg, ptr %base_ptr); + ret { , } %res +} + +define { , } @ld2q_ss_i16( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld2q_ss_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( %pg, ptr %addr2); + ret { , } %res +} + +define { , } @ld2q_i16( %pg, ptr %addr) { +; CHECK-LABEL: ld2q_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( %pg, ptr %addr); + ret { , } %res +} + +define { , } @ld2q_si_i32( %pg, ptr %addr ) { +; CHECK-LABEL: ld2q_si_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , ptr %addr, i64 -16 + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( %pg, ptr %base); + ret { , } %res +} + +define { , } @ld2q_ss_i32( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld2q_ss_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( %pg, ptr %addr2); + ret { , } %res +} + +define { , } @ld2q_i32( %pg, ptr %addr) { +; CHECK-LABEL: ld2q_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( %pg, ptr %addr); + ret { , } %res +} + +define { , } @ld2q_si_i64( %pg, *%addr ) { +; CHECK-LABEL: ld2q_si_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -16 + %base_ptr = bitcast * %base to i64 * + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( %pg, ptr %base_ptr); + ret { , } %res +} + +define { , } @ld2q_ss_i64( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld2q_ss_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( %pg, ptr %addr2); + ret { , } %res +} + +define { , } @ld2q_i64( %pg, ptr %addr) { +; CHECK-LABEL: ld2q_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( %pg, ptr %addr); + ret { , } %res +} + +define { , } @ld2q_si_f16( %pg, *%addr ) { +; CHECK-LABEL: ld2q_si_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -16 + %base_ptr = bitcast * %base to half * + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( %pg, ptr %base_ptr); + ret { , } %res +} + +define { , } @ld2q_ss_f16( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld2q_ss_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( %pg, ptr %addr2); + ret { , } %res +} + +define { , } @ld2q_f16( %pg, ptr %addr) { +; CHECK-LABEL: ld2q_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( %pg, ptr %addr); + ret { , } %res +} + +define { , } @ld2q_si_f32( %pg, *%addr ) { +; CHECK-LABEL: ld2q_si_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -16 + %base_ptr = bitcast * %base to float * + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( %pg, ptr %base_ptr); + ret { , } %res +} + +define { , } @ld2q_ss_f32( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld2q_ss_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( %pg, ptr %addr2); + ret { , } %res +} + +define { , } @ld2q_f32( %pg, ptr %addr) { +; CHECK-LABEL: ld2q_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( %pg, ptr %addr); + ret { , } %res +} + +define { , } @ld2q_si_f64( %pg, *%addr ) { +; CHECK-LABEL: ld2q_si_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -16 + %base_ptr = bitcast * %base to double * + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( %pg, ptr %base_ptr); + ret { , } %res +} + +define { , } @ld2q_ss_f64( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld2q_ss_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( %pg, ptr %addr2); + ret { , } %res +} + +define { , } @ld2q_f64( %pg, ptr %addr) { +; CHECK-LABEL: ld2q_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( %pg, ptr %addr); + ret { , } %res +} + +define { , } @ld2q_si_bf16( %pg, *%addr ) { +; CHECK-LABEL: ld2q_si_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, #-16, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -16 + %base_ptr = bitcast * %base to bfloat * + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( %pg, ptr %base_ptr); + ret { , } %res +} + +define { , } @ld2q_ss_bf16( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld2q_ss_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( %pg, ptr %addr2); + ret { , } %res +} + +define { , } @ld2q_bf16( %pg, ptr %addr) { +; CHECK-LABEL: ld2q_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2q { z0.q, z1.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( %pg, ptr %addr); + ret { , } %res +} + +;; LD3Q +define { , , } @ld3q_si_i8_off24( %pg, *%addr ) { +; CHECK-LABEL: ld3q_si_i8_off24: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -24 + %base_ptr = bitcast * %base to i8 * + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( %pg, ptr %base_ptr); + ret { , , } %res +} + +define { , , } @ld3q_si_i8_off21( %pg, *%addr ) { +; CHECK-LABEL: ld3q_si_i8_off21: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #21, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 21 + %base_ptr = bitcast * %base to i8 * + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( %pg, ptr %base_ptr); + ret { , , } %res +} + +define { , , } @ld3q_ss_i8( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld3q_ss_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( %pg, ptr %addr2); + ret { , , } %res +} + +define { , , } @ld3q_i8( %pg, ptr %addr) { +; CHECK-LABEL: ld3q_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( %pg, ptr %addr); + ret { , , } %res +} + +define { , , } @ld3q_si_i16( %pg, *%addr ) { +; CHECK-LABEL: ld3q_si_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -24 + %base_ptr = bitcast * %base to i16 * + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( %pg, ptr %base_ptr); + ret { , , } %res +} + +define { , , } @ld3q_ss_i16( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld3q_ss_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( %pg, ptr %addr2); + ret { , , } %res +} + +define { , , } @ld3q_i16( %pg, ptr %addr) { +; CHECK-LABEL: ld3q_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( %pg, ptr %addr); + ret { , , } %res +} + +define { , , } @ld3q_si_i32( %pg, *%addr ) { +; CHECK-LABEL: ld3q_si_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -24 + %base_ptr = bitcast * %base to i32 * + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( %pg, ptr %base_ptr); + ret { , , } %res +} + +define { , , } @ld3q_ss_i32( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld3q_ss_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( %pg, ptr %addr2); + ret { , , } %res +} + +define { , , } @ld3q_i32( %pg, ptr %addr) { +; CHECK-LABEL: ld3q_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( %pg, ptr %addr); + ret { , , } %res +} + +define { , , } @ld3q_si_i64( %pg, ptr %addr ) { +; CHECK-LABEL: ld3q_si_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] +; CHECK-NEXT: ret + %addr2 = getelementptr , ptr %addr, i64 -24 + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( %pg, ptr %addr2); + ret {, , } %res +} + +define { , , } @ld3q_ss_i64( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld3q_ss_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( %pg, ptr %addr2); + ret { , , } %res +} + +define { , , } @ld3q_i64( %pg, ptr %addr) { +; CHECK-LABEL: ld3q_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( %pg, ptr %addr); + ret { , , } %res +} + +define { , , } @ld3q_si_f16( %pg, *%addr ) { +; CHECK-LABEL: ld3q_si_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -24 + %base_ptr = bitcast * %base to half * + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( %pg, ptr %base_ptr); + ret { , , } %res +} + +define { , , } @ld3q_ss_f16( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld3q_ss_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( %pg, ptr %addr2); + ret { , , } %res +} + +define { , , } @ld3q_f16( %pg, ptr %addr) { +; CHECK-LABEL: ld3q_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( %pg, ptr %addr); + ret { , , } %res +} + +define { , , } @ld3q_si_f32( %pg, *%addr ) { +; CHECK-LABEL: ld3q_si_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -24 + %base_ptr = bitcast * %base to float * + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( %pg, ptr %base_ptr); + ret { , , } %res +} + +define { , , } @ld3q_ss_f32( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld3q_ss_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( %pg, ptr %addr2); + ret { , , } %res +} + +define { , , } @ld3q_f32( %pg, ptr %addr) { +; CHECK-LABEL: ld3q_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( %pg, ptr %addr); + ret { , , } %res +} + +define { , , } @ld3q_si_f64( %pg, *%addr ) { +; CHECK-LABEL: ld3q_si_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -24 + %base_ptr = bitcast * %base to double * + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( %pg, ptr %base_ptr); + ret { , , } %res +} + +define { , , } @ld3q_ss_f64( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld3q_ss_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( %pg, ptr %addr2); + ret { , , } %res +} + +define { , , } @ld3q_f64( %pg, ptr %addr) { +; CHECK-LABEL: ld3q_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( %pg, ptr %addr); + ret { , , } %res +} + +define { , , } @ld3q_si_bf16( %pg, *%addr ) { +; CHECK-LABEL: ld3q_si_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, #-24, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -24 + %base_ptr = bitcast * %base to bfloat * + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( %pg, ptr %base_ptr); + ret { , , } %res +} + +define { , , } @ld3q_ss_bf16( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld3q_ss_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( %pg, ptr %addr2); + ret { , , } %res +} + +define { , , } @ld3q_bf16( %pg, ptr %addr) { +; CHECK-LABEL: ld3q_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3q { z0.q - z2.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( %pg, ptr %addr); + ret { , , } %res +} + +;; LD4Q +define { , , , } @ld4q_si_i8_off32( %pg, *%addr ) { +; CHECK-LABEL: ld4q_si_i8_off32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -32 + %base_ptr = bitcast * %base to i8 * + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( %pg, ptr %base_ptr); + ret { , , , } %res +} + +define { , , , } @ld4q_si_i8_off28( %pg, *%addr ) { +; CHECK-LABEL: ld4q_si_i8_off28: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #28, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 28 + %base_ptr = bitcast * %base to i8 * + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( %pg, ptr %base_ptr); + ret { , , , } %res +} + +define { , , , } @ld4q_ss_i8( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld4q_ss_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( %pg, ptr %addr2); + ret { , , , } %res +} + +define { , , , } @ld4q_i8( %pg, ptr %addr) { +; CHECK-LABEL: ld4q_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( %pg, ptr %addr); + ret { , , , } %res +} + +define { , , , } @ld4q_si_i16( %pg, *%addr ) { +; CHECK-LABEL: ld4q_si_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -32 + %base_ptr = bitcast * %base to i16 * + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( %pg, ptr %base_ptr); + ret { , , , } %res +} + +define { , , , } @ld4q_ss_i16( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld4q_ss_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( %pg, ptr %addr2); + ret { , , , } %res +} + +define { , , , } @ld4q_i16( %pg, ptr %addr) { +; CHECK-LABEL: ld4q_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( %pg, ptr %addr); + ret { , , , } %res +} + +define { , , , } @ld4q_si_i32( %pg, *%addr ) { +; CHECK-LABEL: ld4q_si_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -32 + %base_ptr = bitcast * %base to i32 * + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( %pg, ptr %base_ptr); + ret { , , , } %res +} + +define { , , , } @ld4q_ss_i32( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld4q_ss_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( %pg, ptr %addr2); + ret { , , , } %res +} + +define { , , , } @ld4q_i32( %pg, ptr %addr) { +; CHECK-LABEL: ld4q_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( %pg, ptr %addr); + ret { , , , } %res +} + +define { , , , } @ld4q_si_i64( %pg, *%addr ) { +; CHECK-LABEL: ld4q_si_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -32 + %base_ptr = bitcast * %base to i64 * + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( %pg, ptr %base_ptr); + ret { , , , } %res +} + +define { , , , } @ld4q_ss_i64( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld4q_ss_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( %pg, ptr %addr2); + ret { , , , } %res +} + +define { , , , } @ld4q_i64( %pg, ptr %addr) { +; CHECK-LABEL: ld4q_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( %pg, ptr %addr); + ret { , , , } %res +} + +define { , , , } @ld4q_si_f16( %pg, *%addr ) { +; CHECK-LABEL: ld4q_si_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -32 + %base_ptr = bitcast * %base to half * + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( %pg, ptr %base_ptr); + ret { , , , } %res +} + +define { , , , } @ld4q_ss_f16( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld4q_ss_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( %pg, ptr %addr2); + ret { , , , } %res +} + +define { , , , } @ld4q_f16( %pg, ptr %addr) { +; CHECK-LABEL: ld4q_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( %pg, ptr %addr); + ret { , , , } %res +} + +define { , , , } @ld4q_si_f32( %pg, *%addr ) { +; CHECK-LABEL: ld4q_si_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -32 + %base_ptr = bitcast * %base to float * + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( %pg, ptr %base_ptr); + ret { , , , } %res +} + +define { , , , } @ld4q_ss_f32( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld4q_ss_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( %pg, ptr %addr2); + ret { , , , } %res +} + +define { , , , } @ld4q_f32( %pg, ptr %addr) { +; CHECK-LABEL: ld4q_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( %pg, ptr %addr); + ret { , , , } %res +} + +define { , , , } @ld4q_si_f64( %pg, *%addr ) { +; CHECK-LABEL: ld4q_si_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -32 + %base_ptr = bitcast * %base to double * + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( %pg, ptr %base_ptr); + ret { , , , } %res +} + +define { , , , } @ld4q_ss_f64( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld4q_ss_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( %pg, ptr %addr2); + ret { , , , } %res +} + +define { , , , } @ld4q_f64( %pg, ptr %addr) { +; CHECK-LABEL: ld4q_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( %pg, ptr %addr); + ret { , , , } %res +} + +define { , , , } @ld4q_si_bf16( %pg, *%addr ) { +; CHECK-LABEL: ld4q_si_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, #-32, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -32 + %base_ptr = bitcast * %base to bfloat * + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( %pg, ptr %base_ptr); + ret { , , , } %res +} + +define { , , , } @ld4q_ss_bf16( %pg, ptr %addr, i64 %a) { +; CHECK-LABEL: ld4q_ss_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %addr2 = getelementptr i128, ptr %addr, i64 %a + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( %pg, ptr %addr2); + ret { , , , } %res +} + +define { , , , } @ld4q_bf16( %pg, ptr %addr) { +; CHECK-LABEL: ld4q_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4q { z0.q - z3.q }, p0/z, [x0] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( %pg, ptr %addr); + ret { , , , } %res +} + + +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8bf16(target("aarch64.svcount"), ptr) + +declare { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8(, ptr) +declare { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16(, ptr) +declare { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32(, ptr) +declare { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64(, ptr) + +declare { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16(, ptr) +declare { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32(, ptr) +declare { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64(, ptr) +declare { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(, ptr) + +declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8(, ptr) +declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16(, ptr) +declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32(, ptr) +declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64(, ptr) + +declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16(, ptr) +declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32(, ptr) +declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64(, ptr) +declare { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(, ptr) + +declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8(, ptr) +declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16(, ptr) +declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32(, ptr) +declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64(, ptr) + +declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16(, ptr) +declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32(, ptr) +declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64(, ptr) +declare { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(, ptr) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll new file mode 100644 index 0000000000000..48ec0161cb8b8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll @@ -0,0 +1,910 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s + +; +; ST2Q +; +define void @st2q_ss_i8( %v0, %v1, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st2q_ss_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st2q.nxv16i8(%v0, %v1 , + %pred, + ptr %1) + ret void +} + +define void @st2q_ss_i16( %v0, %v1, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st2q_ss_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st2q.nxv8i16( %v0, + %v1, + %pred, + ptr %1) + ret void +} + +define void @st2q_ss_i32( %v0, %v1, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st2q_ss_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st2q.nxv4i32( %v0, + %v1, + %pred, + ptr %1) + ret void +} + +define void @st2q_ss_i64( %v0, %v1, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st2q_ss_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st2q.nxv2i64( %v0, + %v1, + %pred, + ptr %1) + ret void +} + +define void @st2q_ss_f16( %v0, %v1, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st2q_ss_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st2q.nxv8f16( %v0, + %v1, + %pred, + ptr %1) + ret void +} + +define void @st2q_ss_f32( %v0, %v1, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st2q_ss_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st2q.nxv4f32( %v0, + %v1, + %pred, + ptr %1) + ret void +} + +define void @st2q_ss_f64( %v0, %v1, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st2q_ss_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st2q.nxv2f64( %v0, + %v1, + %pred, + ptr %1) + ret void +} + +define void @st2q_ss_bf16( %v0, %v1, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st2q_ss_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st2q.nxv8bf16( %v0, + %v1, + %pred, + ptr %1) + ret void +} + + +define void @st2q_si_i8_off16( %v0, %v1, %pred, * %addr) { +; CHECK-LABEL: st2q_si_i8_off16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -16 + call void @llvm.aarch64.sve.st2q.nxv16i8( %v0, + %v1, + %pred, + i8* %base) + ret void +} + +define void @st2q_si_i8_off14( %v0, %v1, %pred, * %addr) { +; CHECK-LABEL: st2q_si_i8_off14: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 14 + call void @llvm.aarch64.sve.st2q.nxv16i8( %v0, + %v1, + %pred, + i8* %base) + ret void +} + +define void @st2q_si_i16( %v0, %v1, %pred, ptr %base) { +; CHECK-LABEL: st2q_si_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] +; CHECK-NEXT: ret + %gep = getelementptr , ptr %base, i64 14 + call void @llvm.aarch64.sve.st2q.nxv8i16( %v0, + %v1, + %pred, + i8* %gep) + ret void +} + +define void @st2q_si_i32( %v0, %v1, %pred, ptr %base) { +; CHECK-LABEL: st2q_si_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] +; CHECK-NEXT: ret + %gep = getelementptr , ptr %base, i64 14 + call void @llvm.aarch64.sve.st2q.nxv4i32( %v0, + %v1, + %pred, + i32* %gep) + ret void +} + +define void @st2q_si_i64( %v0, %v1, %pred, ptr %base) { +; CHECK-LABEL: st2q_si_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] +; CHECK-NEXT: ret + %gep = getelementptr , ptr %base, i64 14 + call void @llvm.aarch64.sve.st2q.nxv2i64( %v0, + %v1, + %pred, + i64* %gep) + ret void +} + +define void @st2q_si_f16( %v0, %v1, %pred, ptr %base) { +; CHECK-LABEL: st2q_si_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] +; CHECK-NEXT: ret + %gep = getelementptr , ptr %base, i64 14 + call void @llvm.aarch64.sve.st2q.nxv8f16( %v0, + %v1, + %pred, + half* %gep) + ret void +} + +define void @st2q_si_f32( %v0, %v1, %pred, ptr %base) { +; CHECK-LABEL: st2q_si_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] +; CHECK-NEXT: ret + %gep = getelementptr , ptr %base, i64 14 + call void @llvm.aarch64.sve.st2q.nxv4f32( %v0, + %v1, + %pred, + float* %gep) + ret void +} + +define void @st2q_si_f64( %v0, %v1, %pred, ptr %base) { +; CHECK-LABEL: st2q_si_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] +; CHECK-NEXT: ret + %gep= getelementptr , ptr %base, i64 14 + call void @llvm.aarch64.sve.st2q.nxv2f64( %v0, + %v1, + %pred, + double* %gep) + ret void +} + +define void @st2q_si_bf16( %v0, %v1, %pred, ptr %base) { +; CHECK-LABEL: st2q_si_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] +; CHECK-NEXT: ret + %gep = getelementptr , ptr %base, i64 14 + call void @llvm.aarch64.sve.st2q.nxv8bf16( %v0, + %v1, + %pred, + bfloat* %gep) + ret void +} + + +; +; ST3Q +; +define void @st3q_ss_i8( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st3q_ss_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st3q.nxv16i8(%v0, + %v1, + %v2, + %pred, + ptr %1) + ret void +} + +define void @st3q_ss_i16( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st3q_ss_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st3q.nxv8i16( %v0, + %v1, + %v2, + %pred, + ptr %1) + ret void +} + +define void @st3q_ss_i32( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st3q_ss_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st3q.nxv4i32( %v0, + %v1, + %v2, + %pred, + ptr %1) + ret void +} + +define void @st3q_ss_i64( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st3q_ss_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st3q.nxv2i64( %v0, + %v1, + %v2, + %pred, + ptr %1) + ret void +} + +define void @st3q_ss_f16( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st3q_ss_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st3q.nxv8f16( %v0, + %v1, + %v2, + %pred, + ptr %1) + ret void +} + +define void @st3q_ss_f32( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st3q_ss_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st3q.nxv4f32( %v0, + %v1, + %v2, + %pred, + ptr %1) + ret void +} + +define void @st3q_ss_f64( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st3q_ss_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st3q.nxv2f64( %v0, + %v1, + %v2, + %pred, + ptr %1) + ret void +} + +define void @st3q_ss_bf16( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st3q_ss_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st3q.nxv8bf16( %v0, + %v1, + %v2, + %pred, + ptr %1) + ret void +} + +define void @st3q_si_i8_off24( %v0, %v1, %v2, %pred, * %addr) { +; CHECK-LABEL: st3q_si_i8_off24: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -24 + call void @llvm.aarch64.sve.st3q.nxv16i8( %v0, + %v1, + %v2, + %pred, + i8* %base) + ret void +} + +define void @st3q_si_i8_off21( %v0, %v1, %v2, %pred, * %addr) { +; CHECK-LABEL: st3q_si_i8_off21: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 21 + call void @llvm.aarch64.sve.st3q.nxv16i8( %v0, + %v1, + %v2, + %pred, + i8* %base) + ret void +} + +define void @st3q_si_i16( %v0, %v1, %v2, %pred, * %addr) { +; CHECK-LABEL: st3q_si_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 21 + call void @llvm.aarch64.sve.st3q.nxv8i16( %v0, + %v1, + %v2, + %pred, + i8* %base) + ret void +} + +define void @st3q_si_i32( %v0, %v1, %v2, %pred, * %addr) { +; CHECK-LABEL: st3q_si_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 21 + call void @llvm.aarch64.sve.st3q.nxv4i32( %v0, + %v1, + %v2, + %pred, + i32* %base) + ret void +} + +define void @st3q_si_i64( %v0, %v1, %v2, %pred, * %addr) { +; CHECK-LABEL: st3q_si_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 21 + call void @llvm.aarch64.sve.st3q.nxv2i64( %v0, + %v1, + %v2, + %pred, + i64* %base) + ret void +} + +define void @st3q_si_f16( %v0, %v1, %v2, %pred, * %addr) { +; CHECK-LABEL: st3q_si_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 21 + call void @llvm.aarch64.sve.st3q.nxv8f16( %v0, + %v1, + %v2, + %pred, + half* %base) + ret void +} + +define void @st3q_si_f32( %v0, %v1, %v2, %pred, * %addr) { +; CHECK-LABEL: st3q_si_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 21 + call void @llvm.aarch64.sve.st3q.nxv4f32( %v0, + %v1, + %v2, + %pred, + float* %base) + ret void +} + +define void @st3q_si_f64( %v0, %v1, %v2, %pred, * %addr) { +; CHECK-LABEL: st3q_si_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 21 + call void @llvm.aarch64.sve.st3q.nxv2f64( %v0, + %v1, + %v2, + %pred, + double* %base) + ret void +} + +define void @st3q_si_bf16( %v0, %v1, %v2, %pred, * %addr) { +; CHECK-LABEL: st3q_si_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 21 + call void @llvm.aarch64.sve.st3q.nxv8bf16( %v0, + %v1, + %v2, + %pred, + bfloat* %base) + ret void +} + +; +; ST4Q +; +define void @st4q_ss_i8( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st4q_ss_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st4q.nxv16i8(%v0, + %v1, + %v2, + %v3, + %pred, + ptr %1) + ret void +} + +define void @st4q_ss_i16( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st4q_ss_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st4q.nxv8i16( %v0, + %v1, + %v2, + %v3, + %pred, + ptr %1) + ret void +} + +define void @st4q_ss_i32( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st4q_ss_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st4q.nxv4i32( %v0, + %v1, + %v2, + %v3, + %pred, + ptr %1) + ret void +} + +define void @st4q_ss_i64( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st4q_ss_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st4q.nxv2i64( %v0, + %v1, + %v2, + %v3, + %pred, + ptr %1) + ret void +} + +define void @st4q_ss_f16( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st4q_ss_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st4q.nxv8f16( %v0, + %v1, + %v2, + %v3, + %pred, + ptr %1) + ret void +} + +define void @st4q_ss_f32( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st4q_ss_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st4q.nxv4f32( %v0, + %v1, + %v2, + %v3, + %pred, + ptr %1) + ret void +} + +define void @st4q_ss_f64( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st4q_ss_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st4q.nxv2f64( %v0, + %v1, + %v2, + %v3, + %pred, + ptr %1) + ret void +} + +define void @st4q_ss_bf16( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { +; CHECK-LABEL: st4q_ss_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] +; CHECK-NEXT: ret + %1 = getelementptr i128, ptr %addr, i64 %offset + call void @llvm.aarch64.sve.st4q.nxv8bf16( %v0, + %v1, + %v2, + %v3, + %pred, + ptr %1) + ret void +} + +define void @st4q_si_i8_off32( %v0, %v1, %v2, %v3, %pred, * %addr) { +; CHECK-LABEL: st4q_si_i8_off32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 -32 + call void @llvm.aarch64.sve.st4q.nxv16i8( %v0, + %v1, + %v2, + %v3, + %pred, + i8* %base) + ret void +} + +define void @st4q_si_i8_off28( %v0, %v1, %v2, %v3, %pred, * %addr) { +; CHECK-LABEL: st4q_si_i8_off28: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 28 + call void @llvm.aarch64.sve.st4q.nxv16i8( %v0, + %v1, + %v2, + %v3, + %pred, + i8* %base) + ret void +} + +define void @st4q_si_i16( %v0, %v1, %v2, %v3, %pred, * %addr) { +; CHECK-LABEL: st4q_si_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 28 + call void @llvm.aarch64.sve.st4q.nxv8i16( %v0, + %v1, + %v2, + %v3, + %pred, + i8* %base) + ret void +} + +define void @st4q_si_i32( %v0, %v1, %v2, %v3, %pred, * %addr) { +; CHECK-LABEL: st4q_si_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] +; CHECK-NEXT: ret + %base1 = getelementptr , * %addr, i64 28 + call void @llvm.aarch64.sve.st4q.nxv4i32( %v0, + %v1, + %v2, + %v3, + %pred, + i32* %base1) + ret void +} + +define void @st4q_si_i64( %v0, %v1, %v2, %v3, %pred, * %addr) { +; CHECK-LABEL: st4q_si_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 28 + call void @llvm.aarch64.sve.st4q.nxv2i64( %v0, + %v1, + %v2, + %v3, + %pred, + i64* %base) + ret void +} + +define void @st4q_si_f16( %v0, %v1, %v2, %v3, %pred, * %addr) { +; CHECK-LABEL: st4q_si_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 28 + call void @llvm.aarch64.sve.st4q.nxv8f16( %v0, + %v1, + %v2, + %v3, + %pred, + half* %base) + ret void +} + +define void @st4q_si_f32( %v0, %v1, %v2, %v3, %pred, * %addr) { +; CHECK-LABEL: st4q_si_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 28 + call void @llvm.aarch64.sve.st4q.nxv4f32( %v0, + %v1, + %v2, + %v3, + %pred, + float* %base) + ret void +} + +define void @st4q_si_f64( %v0, %v1, %v2, %v3, %pred, * %addr) { +; CHECK-LABEL: st4q_si_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 28 + call void @llvm.aarch64.sve.st4q.nxv2f64( %v0, + %v1, + %v2, + %v3, + %pred, + double* %base) + ret void +} + +define void @st4q_si_bf16( %v0, %v1, %v2, %v3, %pred, * %addr) { +; CHECK-LABEL: st4q_si_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] +; CHECK-NEXT: ret + %base = getelementptr , * %addr, i64 28 + call void @llvm.aarch64.sve.st4q.nxv8bf16( %v0, + %v1, + %v2, + %v3, + %pred, + bfloat* %base) + ret void +} + + +declare void @llvm.aarch64.sve.st2q.nxv16i8(, , , ptr) +declare void @llvm.aarch64.sve.st2q.nxv8i16(, , , ptr) +declare void @llvm.aarch64.sve.st2q.nxv4i32(, , , ptr) +declare void @llvm.aarch64.sve.st2q.nxv2i64(, , , ptr) + +declare void @llvm.aarch64.sve.st2q.nxv8f16(, , , ptr) +declare void @llvm.aarch64.sve.st2q.nxv4f32(, , , ptr) +declare void @llvm.aarch64.sve.st2q.nxv2f64(, , , ptr) +declare void @llvm.aarch64.sve.st2q.nxv8bf16(, , , ptr) + +declare void @llvm.aarch64.sve.st3q.nxv16i8(, ,, , ptr) +declare void @llvm.aarch64.sve.st3q.nxv8i16(, , , , ptr) +declare void @llvm.aarch64.sve.st3q.nxv4i32(, , , , ptr) +declare void @llvm.aarch64.sve.st3q.nxv2i64(, , , , ptr) + +declare void @llvm.aarch64.sve.st3q.nxv8f16(, , , , ptr) +declare void @llvm.aarch64.sve.st3q.nxv4f32(, , , , ptr) +declare void @llvm.aarch64.sve.st3q.nxv2f64(, , , , ptr) +declare void @llvm.aarch64.sve.st3q.nxv8bf16(, , , , ptr) + +declare void @llvm.aarch64.sve.st4q.nxv16i8(, , , ,, ptr) +declare void @llvm.aarch64.sve.st4q.nxv8i16(, , , , , ptr) +declare void @llvm.aarch64.sve.st4q.nxv4i32(, , , ,, ptr) +declare void @llvm.aarch64.sve.st4q.nxv2i64(, , , , , ptr) + +declare void @llvm.aarch64.sve.st4q.nxv8f16(, , , , , ptr) +declare void @llvm.aarch64.sve.st4q.nxv4f32(, , , , , ptr) +declare void @llvm.aarch64.sve.st4q.nxv2f64(, , , , , ptr) +declare void @llvm.aarch64.sve.st4q.nxv8bf16(, , , , , ptr) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll new file mode 100644 index 0000000000000..c62df1d8d254c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s + +; +; ST1Q: vector base + unscaled offset +; e.g. st1q { z0.q }, p0, [z1.d, x0] +; + +define void @sst1_scatter_u64base_offset_i8( %data, %pg, %b, i64 %offset) { +; CHECK-LABEL: sst1_scatter_u64base_offset_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: st1q { z0.q }, p0, [z1.d, x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( %data, + %pg, + %b, + i64 %offset) + ret void +} + +define void @sst1_scatter_u64base_offset_i16( %data, %pg, %b, i64 %offset) { +; CHECK-LABEL: sst1_scatter_u64base_offset_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1q { z0.q }, p0, [z1.d, x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64( %data, + %pg, + %b, + i64 %offset) + ret void +} + +define void @sst1_scatter_u64base_offset_i32( %data, %pg, %b, i64 %offset) { +; CHECK-LABEL: sst1_scatter_u64base_offset_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1q { z0.q }, p0, [z1.d, x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64( %data, + %pg, + %b, + i64 %offset) + ret void +} + +define void @sst1_scatter_u64base_offset_i64( %data, %pg, %b, i64 %offset) { +; CHECK-LABEL: sst1_scatter_u64base_offset_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: st1q { z0.q }, p0, [z1.d, x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( %data, + %pg, + %b, + i64 %offset) + ret void +} + +define void @sst1_scatter_u64base_offset_f16( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1_scatter_u64base_offset_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1q { z0.q }, p0, [z1.d, x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64( %data, + %pg, + %base, + i64 %offset) + ret void +} + +define void @sst1_scatter_u64base_offset_f32( %data, %pg, %b, i64 %offset) { +; CHECK-LABEL: sst1_scatter_u64base_offset_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1q { z0.q }, p0, [z1.d, x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64( %data, + %pg, + %b, + i64 %offset) + ret void +} + +define void @sst1_scatter_u64base_offset_f64( %data, %pg, %b, i64 %offset) { +; CHECK-LABEL: sst1_scatter_u64base_offset_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: st1q { z0.q }, p0, [z1.d, x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64( %data, + %pg, + %b, + i64 %offset) + ret void +} + +define void @sst1_scatter_u64base_offset_bf16( %data, %pg, %b, i64 %offset) { +; CHECK-LABEL: sst1_scatter_u64base_offset_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1q { z0.q }, p0, [z1.d, x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64( %data, + %pg, + %b, + i64 %offset) + ret void +} + +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(, , , i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(, , , i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(, , , i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(, , , i64) + +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(, , , i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(, , , i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(, , , i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(, , , i64) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-st1-single.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-st1-single.ll new file mode 100644 index 0000000000000..e93673c79c30a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-st1-single.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1 < %s | FileCheck %s + +; ST1W + +define void @test_svst1uwq_i32_ss( %zt, %pred, ptr %base, i64 %offset) { +; CHECK-LABEL: test_svst1uwq_i32_ss: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.q }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %gep = getelementptr i32, ptr %base, i64 %offset + call void @llvm.aarch64.sve.st1uwq.nxv4i32( %zt, %pred, ptr %gep) + ret void +} + +define void @test_svst1uwq_i32_si( %zt, %pred, * %base) { +; CHECK-LABEL: test_svst1uwq_i32_si: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.q }, p0, [x0, #-8, mul vl] +; CHECK-NEXT: st1w { z0.q }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret + %gep1 = getelementptr inbounds , * %base, i64 -8 + call void @llvm.aarch64.sve.st1uwq.nxv4i32( %zt, %pred, ptr %gep1) + + %gep2 = getelementptr inbounds , * %base, i64 7 + call void @llvm.aarch64.sve.st1uwq.nxv4i32( %zt, %pred, ptr %gep2) + ret void +} + +define void @test_svst1uwq_i32_out_of_bound( %zt, %pred, * %base) { +; CHECK-LABEL: test_svst1uwq_i32_out_of_bound: +; CHECK: // %bb.0: +; CHECK-NEXT: addvl x8, x0, #2 +; CHECK-NEXT: st1w { z0.q }, p0, [x8] +; CHECK-NEXT: ret + %gep = getelementptr inbounds , * %base, i64 8 + call void @llvm.aarch64.sve.st1uwq.nxv4i32( %zt, %pred, ptr %gep) + ret void +} + +define void @test_svst1uwq_f32_ss( %zt, %pred, ptr %base, i64 %offset) { +; CHECK-LABEL: test_svst1uwq_f32_ss: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.q }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %gep = getelementptr float, ptr %base, i64 %offset + call void @llvm.aarch64.sve.st1uwq.nxv4f32( %zt, %pred, ptr %gep) + ret void +} + +define void @test_svst1uwq_f32_si( %zt, %pred, * %base) { +; CHECK-LABEL: test_svst1uwq_f32_si: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.q }, p0, [x0, #-8, mul vl] +; CHECK-NEXT: st1w { z0.q }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret + %gep1 = getelementptr inbounds , * %base, i64 -8 + call void @llvm.aarch64.sve.st1uwq.nxv4f32( %zt, %pred, ptr %gep1) + + %gep2 = getelementptr inbounds , * %base, i64 7 + call void @llvm.aarch64.sve.st1uwq.nxv4f32( %zt, %pred, ptr %gep2) + ret void +} + +; ST1D + +define void @test_svst1udq_i64_ss( %zt, %pred, ptr %base, i64 %offset) { +; CHECK-LABEL: test_svst1udq_i64_ss: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.q }, p0, [x0, x1, lsl #3] +; CHECK-NEXT: ret + %gep = getelementptr i64, ptr %base, i64 %offset + call void @llvm.aarch64.sve.st1udq.nxv2i64( %zt, %pred, ptr %gep) + ret void +} + +define void @test_svst1udq_i64_si( %zt, %pred, * %base) { +; CHECK-LABEL: test_svst1udq_i64_si: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.q }, p0, [x0, #-8, mul vl] +; CHECK-NEXT: st1d { z0.q }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret + %gep1 = getelementptr inbounds , * %base, i64 -8 + call void @llvm.aarch64.sve.st1udq.nxv2i64( %zt, %pred, ptr %gep1) + + %gep2 = getelementptr inbounds , * %base, i64 7 + call void @llvm.aarch64.sve.st1udq.nxv2i64( %zt, %pred, ptr %gep2) + ret void +} + +define void @test_svst1udq_i64_out_of_bound( %zt, %pred, * %base) { +; CHECK-LABEL: test_svst1udq_i64_out_of_bound: +; CHECK: // %bb.0: +; CHECK-NEXT: addvl x8, x0, #-5 +; CHECK-NEXT: st1d { z0.q }, p0, [x8] +; CHECK-NEXT: ret + %gep = getelementptr inbounds , * %base, i64 -10 + call void @llvm.aarch64.sve.st1udq.nxv2i64( %zt, %pred, ptr %gep) + ret void +} + +define void @test_svst1udq_f64_ss( %zt, %pred, ptr %base, i64 %offset) { +; CHECK-LABEL: test_svst1udq_f64_ss: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.q }, p0, [x0, x1, lsl #3] +; CHECK-NEXT: ret + %gep = getelementptr double, ptr %base, i64 %offset + call void @llvm.aarch64.sve.st1udq.nxv2f64( %zt, %pred, ptr %gep) + ret void +} + +define void @test_svst1udq_f64_si( %zt, %pred, * %base) { +; CHECK-LABEL: test_svst1udq_f64_si: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.q }, p0, [x0, #-8, mul vl] +; CHECK-NEXT: st1d { z0.q }, p0, [x0, #7, mul vl] +; CHECK-NEXT: ret + %gep1 = getelementptr inbounds , * %base, i64 -8 + call void @llvm.aarch64.sve.st1udq.nxv2f64( %zt, %pred, ptr %gep1) + + %gep2 = getelementptr inbounds , * %base, i64 7 + call void @llvm.aarch64.sve.st1udq.nxv2f64( %zt, %pred, ptr %gep2) + ret void +} + +declare void @llvm.aarch64.sve.st1uwq.nxv4i32(, , ptr) +declare void @llvm.aarch64.sve.st1uwq.nxv4f32(, , ptr) + +declare void @llvm.aarch64.sve.st1udq.nxv2i64(, , ptr) +declare void @llvm.aarch64.sve.st1udq.nxv2f64(, , ptr)