Skip to content

Commit

Permalink
[Clang][LLVM][SVE2.1] Created intrinsics for DUPQ instr. (#83260)
Browse files Browse the repository at this point in the history
This patch adds clang and llvm support for following intrinsic and maps
it to DUPQ instruction:
```
   // Variants are also available for:
   // _s8, _u16, _s16, _u32, _s32, _u64, _s64
   // _bf16, _f16, _f32, _f64
   svuint8_t svdup_laneq[_u8](svuint8_t zn, uint64_t imm_idx);
```
  • Loading branch information
Lukacma committed Mar 8, 2024
1 parent e0d4906 commit 2b4d818
Show file tree
Hide file tree
Showing 7 changed files with 357 additions and 6 deletions.
9 changes: 9 additions & 0 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -2215,6 +2215,15 @@ let TargetGuard = "sve2p1" in {
def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
// EXTQ
def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheck0_15>]>;
// DUPQ
def SVDUP_LANEQ_B : SInst<"svdup_laneq[_{d}]", "ddi", "cUc", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_15>]>;
def SVDUP_LANEQ_H : SInst<"svdup_laneq[_{d}]", "ddi", "sUsh", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_7>]>;
def SVDUP_LANEQ_S : SInst<"svdup_laneq[_{d}]", "ddi", "iUif", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
def SVDUP_LANEQ_D : SInst<"svdup_laneq[_{d}]", "ddi", "lUld", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;

let TargetGuard = "bf16" in {
def SVDUP_LANEQ_BF16 : SInst<"svdup_laneq[_{d}]", "ddi", "b", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_7>]>;
}
// PMOV
// Move to Pred
multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
Expand Down
213 changes: 213 additions & 0 deletions clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dupq.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s


#include <arm_sve.h>

#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
#else
#define SVE_ACLE_FUNC(A1, A2) A1##A2
#endif

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_s8
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svdup_laneq_s8u10__SVInt8_t
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svdup_laneq_s8(svint8_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _s8)(zn, 0);
}

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_u8
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svdup_laneq_u8u11__SVUint8_t
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _u8)(zn, 15);
}

// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svdup_laneq_s16
// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svdup_laneq_s16u11__SVInt16_t
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svdup_laneq_s16(svint16_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _s16)(zn, 1);
}

// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svdup_laneq_u16
// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svdup_laneq_u16u12__SVUint16_t
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _u16)(zn, 7);
}

// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svdup_laneq_s32
// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svdup_laneq_s32u11__SVInt32_t
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svdup_laneq_s32(svint32_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _s32)(zn, 2);
}

// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svdup_laneq_u32
// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svdup_laneq_u32u12__SVUint32_t
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _u32)(zn, 3);
}

// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svdup_laneq_s64
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svdup_laneq_s64u11__SVInt64_t
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svdup_laneq_s64(svint64_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _s64)(zn, 0);
}

// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svdup_laneq_u64
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svdup_laneq_u64u12__SVUint64_t
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _u64)(zn, 1);
}

// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svdup_laneq_f16
// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svdup_laneq_f16u13__SVFloat16_t
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _f16)(zn, 4);
}

// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svdup_laneq_f32
// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svdup_laneq_f32u13__SVFloat32_t
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _f32)(zn, 1);
}

// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svdup_laneq_f64
// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svdup_laneq_f64u13__SVFloat64_t
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _f64)(zn, 1);
}

// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svdup_laneq_bf16
// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svdup_laneq_bf16u14__SVBfloat16_t
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _bf16)(zn, 3);
}
29 changes: 29 additions & 0 deletions clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,32 @@ void test_svget_svset_b(uint64_t idx, svboolx2_t tuple2, svboolx4_t tuple4, svbo
svget2_b(tuple2, idx); // expected-error {{argument to 'svget2_b' must be a constant integer}}
svget4_b(tuple4, idx); // expected-error {{argument to 'svget4_b' must be a constant integer}}
}

__attribute__((target("+sve2p1")))
void test_svdup_laneq(){
svuint8_t zn_u8;
svuint16_t zn_u16;
svuint32_t zn_u32;
svuint64_t zn_u64;
svint8_t zn_s8;
svint16_t zn_s16;
svint32_t zn_s32;
svint64_t zn_s64;
svfloat16_t zn_f16;
svfloat32_t zn_f32;
svfloat64_t zn_f64;
svbfloat16_t zn_bf16;

svdup_laneq_u8(zn_u8,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
svdup_laneq_u16(zn_u16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svdup_laneq_u32(zn_u32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svdup_laneq_u64(zn_u64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
svdup_laneq_s8(zn_s8,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
svdup_laneq_s16(zn_s16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svdup_laneq_s32(zn_s32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svdup_laneq_s64(zn_s64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
svdup_laneq_f16(zn_f16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svdup_laneq_f32(zn_f32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svdup_laneq_f64(zn_f64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
svdup_laneq_bf16(zn_bf16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
}
7 changes: 7 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
LLVMSubdivide2VectorType<0>,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;

class SVE2_1VectorArgIndexed_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;

class AdvSIMD_SVE_CDOT_LANE_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
Expand Down Expand Up @@ -1913,6 +1919,7 @@ def int_aarch64_sve_clastb : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_clastb_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
def int_aarch64_sve_compact : AdvSIMD_Pred1VectorArg_Intrinsic;
def int_aarch64_sve_dupq_lane : AdvSIMD_SVE_DUPQ_Intrinsic;
def int_aarch64_sve_dup_laneq : SVE2_1VectorArgIndexed_Intrinsic;
def int_aarch64_sve_ext : AdvSIMD_2VectorArgIndexed_Intrinsic;
def int_aarch64_sve_sel : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_lasta : AdvSIMD_SVE_Reduce_Intrinsic;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -4101,7 +4101,7 @@ defm FMINNMQV : sve2p1_fp_reduction_q<0b101, "fminnmqv", int_aarch64_sve_fminnmq
defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>;
defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>;

defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
defm DUPQ_ZZI : sve2p1_dupq<"dupq", int_aarch64_sve_dup_laneq>;
defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>;

defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;
Expand Down
20 changes: 15 additions & 5 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -9893,23 +9893,33 @@ class sve2p1_dupq<bits<5> ind_tsz, string mnemonic, ZPRRegOp zprty, Operand ityp
let hasSideEffects = 0;
}

multiclass sve2p1_dupq<string mnemonic> {
def _B : sve2p1_dupq<{?, ?, ?, ?, 1}, mnemonic, ZPR8, VectorIndexB32b> {
multiclass sve2p1_dupq<string mnemonic, SDPatternOperator Op> {
def _B : sve2p1_dupq<{?, ?, ?, ?, 1}, mnemonic, ZPR8, VectorIndexB32b_timm> {
bits<4> index;
let Inst{20-17} = index;
}
def _H : sve2p1_dupq<{?, ?, ?, 1, 0}, mnemonic, ZPR16, VectorIndexH32b> {
def _H : sve2p1_dupq<{?, ?, ?, 1, 0}, mnemonic, ZPR16, VectorIndexH32b_timm> {
bits<3> index;
let Inst{20-18} = index;
}
def _S : sve2p1_dupq<{?, ?, 1, 0, 0}, mnemonic, ZPR32, VectorIndexS32b> {
def _S : sve2p1_dupq<{?, ?, 1, 0, 0}, mnemonic, ZPR32, VectorIndexS32b_timm> {
bits<2> index;
let Inst{20-19} = index;
}
def _D : sve2p1_dupq<{?, 1, 0, 0, 0}, mnemonic, ZPR64, VectorIndexD32b> {
def _D : sve2p1_dupq<{?, 1, 0, 0, 0}, mnemonic, ZPR64, VectorIndexD32b_timm> {
bits<1> index;
let Inst{20} = index;
}

def : SVE_2_Op_Imm_Pat<nxv16i8, Op, nxv16i8, i32, VectorIndexB32b_timm, !cast<Instruction>(NAME # _B)>;
def : SVE_2_Op_Imm_Pat<nxv8i16, Op, nxv8i16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Imm_Pat<nxv4i32, Op, nxv4i32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Imm_Pat<nxv2i64, Op, nxv2i64, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;

def : SVE_2_Op_Imm_Pat<nxv8f16, Op, nxv8f16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Imm_Pat<nxv4f32, Op, nxv4f32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Imm_Pat<nxv2f64, Op, nxv2f64, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;
def : SVE_2_Op_Imm_Pat<nxv8bf16, Op, nxv8bf16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
}


Expand Down

0 comments on commit 2b4d818

Please sign in to comment.