diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 6da30e08e7521..6cc249837d3f3 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2215,6 +2215,15 @@ let TargetGuard = "sve2p1" in { def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">; // EXTQ def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheck0_15>]>; + // DUPQ + def SVDUP_LANEQ_B : SInst<"svdup_laneq[_{d}]", "ddi", "cUc", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_15>]>; + def SVDUP_LANEQ_H : SInst<"svdup_laneq[_{d}]", "ddi", "sUsh", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_7>]>; + def SVDUP_LANEQ_S : SInst<"svdup_laneq[_{d}]", "ddi", "iUif", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; + def SVDUP_LANEQ_D : SInst<"svdup_laneq[_{d}]", "ddi", "lUld", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; + + let TargetGuard = "bf16" in { + def SVDUP_LANEQ_BF16 : SInst<"svdup_laneq[_{d}]", "ddi", "b", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_7>]>; + } // PMOV // Move to Pred multiclass PMOV_TO_PRED flags=[], ImmCheckType immCh > { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dupq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dupq.c new file mode 100644 index 0000000000000..587a67aa6b7ca --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dupq.c @@ -0,0 +1,213 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\ +// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1, A2) A1##A2 +#endif + +// CHECK-LABEL: define dso_local @test_svdup_laneq_s8 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv16i8( [[ZN]], i32 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z19test_svdup_laneq_s8u10__SVInt8_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv16i8( [[ZN]], i32 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svdup_laneq_s8(svint8_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _s8)(zn, 0); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_u8 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv16i8( [[ZN]], i32 15) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z19test_svdup_laneq_u8u11__SVUint8_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv16i8( [[ZN]], i32 15) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svdup_laneq_u8(svuint8_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _u8)(zn, 15); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_s16 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv8i16( [[ZN]], i32 1) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z20test_svdup_laneq_s16u11__SVInt16_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv8i16( [[ZN]], i32 1) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint16_t test_svdup_laneq_s16(svint16_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _s16)(zn, 1); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_u16 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv8i16( [[ZN]], i32 7) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z20test_svdup_laneq_u16u12__SVUint16_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv8i16( [[ZN]], i32 7) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint16_t test_svdup_laneq_u16(svuint16_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _u16)(zn, 7); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_s32 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv4i32( [[ZN]], i32 2) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z20test_svdup_laneq_s32u11__SVInt32_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv4i32( [[ZN]], i32 2) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint32_t test_svdup_laneq_s32(svint32_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _s32)(zn, 2); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_u32 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv4i32( [[ZN]], i32 3) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z20test_svdup_laneq_u32u12__SVUint32_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv4i32( [[ZN]], i32 3) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint32_t test_svdup_laneq_u32(svuint32_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _u32)(zn, 3); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_s64 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv2i64( [[ZN]], i32 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z20test_svdup_laneq_s64u11__SVInt64_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv2i64( [[ZN]], i32 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint64_t test_svdup_laneq_s64(svint64_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _s64)(zn, 0); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_u64 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv2i64( [[ZN]], i32 1) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z20test_svdup_laneq_u64u12__SVUint64_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv2i64( [[ZN]], i32 1) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint64_t test_svdup_laneq_u64(svuint64_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _u64)(zn, 1); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_f16 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv8f16( [[ZN]], i32 4) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z20test_svdup_laneq_f16u13__SVFloat16_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv8f16( [[ZN]], i32 4) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _f16)(zn, 4); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_f32 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv4f32( [[ZN]], i32 1) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z20test_svdup_laneq_f32u13__SVFloat32_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv4f32( [[ZN]], i32 1) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _f32)(zn, 1); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_f64 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv2f64( [[ZN]], i32 1) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z20test_svdup_laneq_f64u13__SVFloat64_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv2f64( [[ZN]], i32 1) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _f64)(zn, 1); +} + +// CHECK-LABEL: define dso_local @test_svdup_laneq_bf16 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv8bf16( [[ZN]], i32 3) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z21test_svdup_laneq_bf16u14__SVBfloat16_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dup.laneq.nxv8bf16( [[ZN]], i32 3) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) { + return SVE_ACLE_FUNC(svdup_laneq, _bf16)(zn, 3); +} diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp index d857608d44155..a6ec5150f0aab 100644 --- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp +++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp @@ -188,3 +188,32 @@ void test_svget_svset_b(uint64_t idx, svboolx2_t tuple2, svboolx4_t tuple4, svbo svget2_b(tuple2, idx); // expected-error {{argument to 'svget2_b' must be a constant integer}} svget4_b(tuple4, idx); // expected-error {{argument to 'svget4_b' must be a constant integer}} } + +__attribute__((target("+sve2p1"))) +void test_svdup_laneq(){ + svuint8_t zn_u8; + svuint16_t zn_u16; + svuint32_t zn_u32; + svuint64_t zn_u64; + svint8_t zn_s8; + svint16_t zn_s16; + svint32_t zn_s32; + svint64_t zn_s64; + svfloat16_t zn_f16; + svfloat32_t zn_f32; + svfloat64_t zn_f64; + svbfloat16_t zn_bf16; + + svdup_laneq_u8(zn_u8,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + svdup_laneq_u16(zn_u16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svdup_laneq_u32(zn_u32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svdup_laneq_u64(zn_u64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svdup_laneq_s8(zn_s8,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + svdup_laneq_s16(zn_s16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svdup_laneq_s32(zn_s32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svdup_laneq_s64(zn_s64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svdup_laneq_f16(zn_f16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svdup_laneq_f32(zn_f32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svdup_laneq_f64(zn_f64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svdup_laneq_bf16(zn_bf16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} +} \ No newline at end of file diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 5a9a7c4b43a1f..bcaa37de74b63 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1360,6 +1360,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMSubdivide2VectorType<0>, llvm_i32_ty], [IntrNoMem, ImmArg>]>; + + class SVE2_1VectorArgIndexed_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; class AdvSIMD_SVE_CDOT_LANE_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], @@ -1913,6 +1919,7 @@ def int_aarch64_sve_clastb : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_clastb_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic; def int_aarch64_sve_compact : AdvSIMD_Pred1VectorArg_Intrinsic; def int_aarch64_sve_dupq_lane : AdvSIMD_SVE_DUPQ_Intrinsic; +def int_aarch64_sve_dup_laneq : SVE2_1VectorArgIndexed_Intrinsic; def int_aarch64_sve_ext : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_sel : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_lasta : AdvSIMD_SVE_Reduce_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 7c98f934a1317..e0a010af41553 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4101,7 +4101,7 @@ defm FMINNMQV : sve2p1_fp_reduction_q<0b101, "fminnmqv", int_aarch64_sve_fminnmq defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>; defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>; -defm DUPQ_ZZI : sve2p1_dupq<"dupq">; +defm DUPQ_ZZI : sve2p1_dupq<"dupq", int_aarch64_sve_dup_laneq>; defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>; defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 8baf3a6d3d818..c19e02bb03d1f 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -9893,23 +9893,33 @@ class sve2p1_dupq ind_tsz, string mnemonic, ZPRRegOp zprty, Operand ityp let hasSideEffects = 0; } -multiclass sve2p1_dupq { - def _B : sve2p1_dupq<{?, ?, ?, ?, 1}, mnemonic, ZPR8, VectorIndexB32b> { +multiclass sve2p1_dupq { + def _B : sve2p1_dupq<{?, ?, ?, ?, 1}, mnemonic, ZPR8, VectorIndexB32b_timm> { bits<4> index; let Inst{20-17} = index; } - def _H : sve2p1_dupq<{?, ?, ?, 1, 0}, mnemonic, ZPR16, VectorIndexH32b> { + def _H : sve2p1_dupq<{?, ?, ?, 1, 0}, mnemonic, ZPR16, VectorIndexH32b_timm> { bits<3> index; let Inst{20-18} = index; } - def _S : sve2p1_dupq<{?, ?, 1, 0, 0}, mnemonic, ZPR32, VectorIndexS32b> { + def _S : sve2p1_dupq<{?, ?, 1, 0, 0}, mnemonic, ZPR32, VectorIndexS32b_timm> { bits<2> index; let Inst{20-19} = index; } - def _D : sve2p1_dupq<{?, 1, 0, 0, 0}, mnemonic, ZPR64, VectorIndexD32b> { + def _D : sve2p1_dupq<{?, 1, 0, 0, 0}, mnemonic, ZPR64, VectorIndexD32b_timm> { bits<1> index; let Inst{20} = index; } + + def : SVE_2_Op_Imm_Pat(NAME # _B)>; + def : SVE_2_Op_Imm_Pat(NAME # _H)>; + def : SVE_2_Op_Imm_Pat(NAME # _S)>; + def : SVE_2_Op_Imm_Pat(NAME # _D)>; + + def : SVE_2_Op_Imm_Pat(NAME # _H)>; + def : SVE_2_Op_Imm_Pat(NAME # _S)>; + def : SVE_2_Op_Imm_Pat(NAME # _D)>; + def : SVE_2_Op_Imm_Pat(NAME # _H)>; } diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dupq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dupq.ll new file mode 100644 index 0000000000000..f1a423b02ac2a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dupq.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s + +define @test_dupq_i8 ( %zn) { +; CHECK-LABEL: test_dupq_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: dupq z0.b, z0.b[15] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.dup.laneq.nxv16i8( %zn, i32 15) + ret %res +} + +define @test_dupq_i16 ( %zn) { +; CHECK-LABEL: test_dupq_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupq z0.h, z0.h[7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.dup.laneq.nxv8i16( %zn, i32 7) + ret %res +} + +define @test_dupq__i32 ( %zn) { +; CHECK-LABEL: test_dupq__i32: +; CHECK: // %bb.0: +; CHECK-NEXT: dupq z0.s, z0.s[3] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.dup.laneq.nxv4i32( %zn, i32 3) + ret %res +} + +define @test_dupq_i64 ( %zn) { +; CHECK-LABEL: test_dupq_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: dupq z0.d, z0.d[1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.dup.laneq.nxv2i64( %zn, i32 1) + ret %res +} + +define @test_dupq_f16( %zn) { +; CHECK-LABEL: test_dupq_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupq z0.h, z0.h[4] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.dup.laneq.nxv8f16( %zn, i32 4) + ret %res +} + +define @test_dupq_f32( %zn) { +; CHECK-LABEL: test_dupq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: dupq z0.s, z0.s[2] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.dup.laneq.nxv4f32( %zn, i32 2) + ret %res +} + +define @test_dupq_f64( %zn) { +; CHECK-LABEL: test_dupq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: dupq z0.d, z0.d[0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.dup.laneq.nxv2f64( %zn, i32 0) + ret %res +} + +define @test_dupq_bf16( %zn) { +; CHECK-LABEL: test_dupq_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupq z0.h, z0.h[1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.dup.laneq.nxv8bf16( %zn, i32 1) + ret %res +} + +declare @llvm.aarch64.sve.dup.laneq.nxv16i8(, i32) +declare @llvm.aarch64.sve.dup.laneq.nxv8i16(, i32) +declare @llvm.aarch64.sve.dup.laneq.nxv4i32(, i32) +declare @llvm.aarch64.sve.dup.laneq.nxv2i64(, i32) +declare @llvm.aarch64.sve.dup.laneq.nxv8f16(, i32) +declare @llvm.aarch64.sve.dup.laneq.nxv4f32(, i32) +declare @llvm.aarch64.sve.dup.laneq.nxv2f64(, i32) +declare @llvm.aarch64.sve.dup.laneq.nxv8bf16(, i32)