-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SVE2.1][Clang][LLVM]Add 128bits builtin in Clang and LLVM intrinisc #71930
[SVE2.1][Clang][LLVM]Add 128bits builtin in Clang and LLVM intrinisc #71930
Conversation
This patch implements the builtins in Clang and the LLVM-IR intrinsic for the following: EXTQ // Variants are also available for: // _s8, _s16, _u16, _s32, _u32, _s64, _u64 // _bf16, _f16, _f32, _f64 svuint8_t svextq_lane[_u8](svuint8_t zdn, TBLQ and TBXQ // Variants are also available for: // _u8, _u16, _s16, _u32, _s32, _u64, _s64 // _bf16, _f16, _f32, _f64 svint8_t svtblq[_s8](svint8_t zn, svuint8_t zm); svint8_t svtbxq[_s8](svint8_t zn, svuint8_t zm); UZPQ1, UZPQ2, ZIPQ1 and ZIPQ2 // Variants are also available for: // _s8, _u16, _s16, _u32, _s32, _u64, _s64 // _bf16, _f16, _f32, _f64 svuint8_t svuzpq1[_u8](svuint8_t zn, svuint8_t zm); svuint8_t svuzpq2[_u8](svuint8_t zn, svuint8_t zm); svuint8_t svzipq1[_u8](svuint8_t zn, svuint8_t zm); svuint8_t svzipq2[_u8](svuint8_t zn, svuint8_t zm); PMOV // Variants are available for: // _s8, _u16, _s16, _s32, _u32, _s64, _u64 svbool_t svpmov_lane[_u8](svuint8_t zn, uint64_t imm); svbool_t svpmov[_u8](svuint8_t zn); // The immediate is zero svuint8_t svpmov_u8_z(svbool_t pn); // The immediate is zero // Variants are available for: // _s16, _s32, _u32, _s64, _u64 svuint16_t svpmov_lane[_u16]_m(svuint16_t zd, svbool_t pn, uint64_t imm); According to the PR#257[1] [1]ARM-software/acle#257 Co-author by: Hassnaa Hamdi <hassnaa.hamdi@arm.com>
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-ir Author: None (CarolineConcatto) ChangesThis patch implements the builtins in Clang EXTQ TBLQ and TBXQ UZPQ1, UZPQ2, ZIPQ1 and ZIPQ2 PMOV // Variants are available for: According to the PR#257[1] Co-author by: Hassnaa Hamdi <hassnaa.hamdi@arm.com> Patch is 190.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71930.diff 26 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 3d4c2129565903d..c377a0b89c1d591 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1992,3 +1992,36 @@ let TargetGuard = "sme2" in {
def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
}
+
+let TargetGuard = "sve2p1" in {
+ // ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
+ def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq1", [], []>;
+ def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq2", [], []>;
+ def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq1", [], []>;
+ def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq2", [], []>;
+ // TBLQ, TBXQ
+ def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">;
+ def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
+ // EXTQ
+ def EXTQ : SInst<"svextq_lane[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq_lane", [], [ImmCheck<2, ImmCheck0_15>]>;
+ // PMOV
+ // Move to Pred
+ multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
+ def _LANE : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
+ def _ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
+ }
+ defm SVPMOV_B_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
+ defm SVPMOV_H_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
+ defm SVPMOV_S_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
+ defm SVPMOV_D_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
+
+ // Move to Vector
+ multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
+ def _M : SInst<name # "_lane[_{d}]", "ddPk", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
+ def _Z : SInst<name # "_{d}_z", "dP", types, MergeNone, intrinsic # "_zeroing", flags, []>;
+ }
+ def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP", "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [], []>;
+ defm SVPMOV_TO_VEC_LANE_H : PMOV_TO_VEC<"svpmov", "sUs", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_1>;
+ defm SVPMOV_TO_VEC_LANE_S : PMOV_TO_VEC<"svpmov", "iUi", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_3>;
+ defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[], ImmCheck1_7>;
+}
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 22a2a3c5434d657..21dac067ab66e61 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -249,6 +249,9 @@ def ImmCheck0_0 : ImmCheckType<16>; // 0..0
def ImmCheck0_15 : ImmCheckType<17>; // 0..15
def ImmCheck0_255 : ImmCheckType<18>; // 0..255
def ImmCheck2_4_Mul2 : ImmCheckType<19>; // 2, 4
+def ImmCheck1_1 : ImmCheckType<20>; // 1..1
+def ImmCheck1_3 : ImmCheckType<21>; // 1..3
+def ImmCheck1_7 : ImmCheckType<22>; // 1..7
class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
int Arg = arg;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ae588db02bbe722..9dfff132cd88db3 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3052,6 +3052,18 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
HasError = true;
break;
+ case SVETypeFlags::ImmCheck1_1:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_3:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_7:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
+ HasError = true;
+ break;
case SVETypeFlags::ImmCheckExtract:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
(2048 / ElementSizeInBits) - 1))
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
new file mode 100644
index 000000000000000..c49f8c838ace373
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -0,0 +1,213 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svextq_lane_u8(svuint8_t zn, svuint8_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u8,,)(zn, zm, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svextq_lane_s8(svint8_t zn, svint8_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s8,,)(zn, zm, 4);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svextq_lane_u16(svuint16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u16,,)(zn, zm, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svextq_lane_s16(svint16_t zn, svint16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s16,,)(zn, zm, 5);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svextq_lane_u32(svuint32_t zn, svuint32_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u32,,)(zn, zm, 2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svextq_lane_s32(svint32_t zn, svint32_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s32,,)(zn, zm, 6);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svextq_lane_u64(svuint64_t zn, svuint64_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u64,,)(zn, zm, 3);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svextq_lane_s64(svint64_t zn, svint64_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s64,,)(zn, zm, 7);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svextq_lane_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svextq_lane_f16(svfloat16_t zn, svfloat16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _f16,,)(zn, zm, 8);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svextq_lane_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svextq_lane_f32(svfloat32_t zn, svfloat32_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _f32,,)(zn, zm, 9);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svextq_lane_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svextq_lane_f64(svfloat64_t zn, svfloat64_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _f64,,)(zn, zm, 10);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svextq_lane_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svextq_lane_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _bf16,,)(zn, zm, 11);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
new file mode 100644
index 000000000000000..84f058ad8c16d35
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
@@ -0,0 +1,304 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u8
+// C...
[truncated]
|
@llvm/pr-subscribers-clang Author: None (CarolineConcatto) ChangesThis patch implements the builtins in Clang EXTQ TBLQ and TBXQ UZPQ1, UZPQ2, ZIPQ1 and ZIPQ2 PMOV // Variants are available for: According to the PR#257[1] Co-author by: Hassnaa Hamdi <hassnaa.hamdi@arm.com> Patch is 190.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71930.diff 26 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 3d4c2129565903d..c377a0b89c1d591 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1992,3 +1992,36 @@ let TargetGuard = "sme2" in {
def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
}
+
+let TargetGuard = "sve2p1" in {
+ // ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
+ def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq1", [], []>;
+ def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq2", [], []>;
+ def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq1", [], []>;
+ def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq2", [], []>;
+ // TBLQ, TBXQ
+ def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">;
+ def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
+ // EXTQ
+ def EXTQ : SInst<"svextq_lane[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq_lane", [], [ImmCheck<2, ImmCheck0_15>]>;
+ // PMOV
+ // Move to Pred
+ multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
+ def _LANE : SInst<name # "_lane[_{d}]", "Pdk", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
+ def _ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
+ }
+ defm SVPMOV_B_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
+ defm SVPMOV_H_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
+ defm SVPMOV_S_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
+ defm SVPMOV_D_TO_PRED_LANE : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
+
+ // Move to Vector
+ multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
+ def _M : SInst<name # "_lane[_{d}]", "ddPk", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
+ def _Z : SInst<name # "_{d}_z", "dP", types, MergeNone, intrinsic # "_zeroing", flags, []>;
+ }
+ def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP", "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [], []>;
+ defm SVPMOV_TO_VEC_LANE_H : PMOV_TO_VEC<"svpmov", "sUs", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_1>;
+ defm SVPMOV_TO_VEC_LANE_S : PMOV_TO_VEC<"svpmov", "iUi", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_3>;
+ defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[], ImmCheck1_7>;
+}
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 22a2a3c5434d657..21dac067ab66e61 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -249,6 +249,9 @@ def ImmCheck0_0 : ImmCheckType<16>; // 0..0
def ImmCheck0_15 : ImmCheckType<17>; // 0..15
def ImmCheck0_255 : ImmCheckType<18>; // 0..255
def ImmCheck2_4_Mul2 : ImmCheckType<19>; // 2, 4
+def ImmCheck1_1 : ImmCheckType<20>; // 1..1
+def ImmCheck1_3 : ImmCheckType<21>; // 1..3
+def ImmCheck1_7 : ImmCheckType<22>; // 1..7
class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
int Arg = arg;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ae588db02bbe722..9dfff132cd88db3 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3052,6 +3052,18 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
HasError = true;
break;
+ case SVETypeFlags::ImmCheck1_1:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_3:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_7:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
+ HasError = true;
+ break;
case SVETypeFlags::ImmCheckExtract:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
(2048 / ElementSizeInBits) - 1))
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
new file mode 100644
index 000000000000000..c49f8c838ace373
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -0,0 +1,213 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tu11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svextq_lane_u8(svuint8_t zn, svuint8_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u8,,)(zn, zm, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tu10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svextq_lane_s8(svint8_t zn, svint8_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s8,,)(zn, zm, 4);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tu12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svextq_lane_u16(svuint16_t zn, svuint16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u16,,)(zn, zm, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tu11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svextq_lane_s16(svint16_t zn, svint16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s16,,)(zn, zm, 5);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tu12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svextq_lane_u32(svuint32_t zn, svuint32_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u32,,)(zn, zm, 2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tu11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svextq_lane_s32(svint32_t zn, svint32_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s32,,)(zn, zm, 6);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tu12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svextq_lane_u64(svuint64_t zn, svuint64_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _u64,,)(zn, zm, 3);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tu11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svextq_lane_s64(svint64_t zn, svint64_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _s64,,)(zn, zm, 7);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svextq_lane_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tu13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svextq_lane_f16(svfloat16_t zn, svfloat16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _f16,,)(zn, zm, 8);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svextq_lane_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tu13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svextq_lane_f32(svfloat32_t zn, svfloat32_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _f32,,)(zn, zm, 9);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svextq_lane_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tu13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svextq_lane_f64(svfloat64_t zn, svfloat64_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _f64,,)(zn, zm, 10);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svextq_lane_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBFloat16_tu14__SVBFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svextq_lane_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+ return SVE_ACLE_FUNC(svextq_lane, _bf16,,)(zn, zm, 11);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
new file mode 100644
index 000000000000000..84f058ad8c16d35
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
@@ -0,0 +1,304 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u8
+// C...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In addition to the code comments:
In the (tentative) commit message the prototype svuint8_t svextq_lane[_u8](svuint8_t zdn,
is cut short.
The prototype of svint8_t svtbxq[_s8](svint8_t zn, svuint8_t zm);
is missing an parameter.
The attribution tag should be Co-authored-by:
clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq1.c
Outdated
Show resolved
Hide resolved
clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uzpq2.c
Outdated
Show resolved
Hide resolved
clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq1.c
Outdated
Show resolved
Hide resolved
clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_zipq2.c
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, cheers!
This patch implements the builtins in Clang
and the LLVM-IR intrinsic for the following:
EXTQ
// Variants are also available for:
// _s8, _s16, _u16, _s32, _u32, _s64, _u64
// _bf16, _f16, _f32, _f64
svuint8_t svextq_lane[_u8](svuint8_t zdn, svuint8_t zm, uint64_t imm);
TBLQ and TBXQ
// Variants are also available for:
// _u8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svint8_t svtblq[_s8](svint8_t zn, svuint8_t zm);
svint8_t svtbxq[_s8](svint8_t zn, svuint8_t zm);
UZPQ1, UZPQ2, ZIPQ1 and ZIPQ2
// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svuint8_t svuzpq1[_u8](svuint8_t zn, svuint8_t zm); svuint8_t svuzpq2[_u8](svuint8_t zn, svuint8_t zm); svuint8_t svzipq1[_u8](svuint8_t zn, svuint8_t zm); svuint8_t svzipq2[_u8](svuint8_t zn, svuint8_t zm);
PMOV
// Variants are available for:
// _s8, _u16, _s16, _s32, _u32, _s64, _u64
svbool_t svpmov_lane[_u8](svuint8_t zn, uint64_t imm); svbool_t svpmov[_u8](svuint8_t zn); // The immediate is zero svuint8_t svpmov_u8_z(svbool_t pn); // The immediate is zero
// Variants are available for:
// _s16, _s32, _u32, _s64, _u64
svuint16_t svpmov_lane[_u16]_m(svuint16_t zd, svbool_t pn, uint64_t imm);
According to the PR#257[1]
[1]ARM-software/acle#257
Co-authored-by: Hassnaa Hamdi hassnaa.hamdi@arm.com