-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AArch64][Clang] Update predication of SVE2-AES/PMULL Pair Intrinsics and add Test Coverage #153825
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…and add test coverage This patch updates the predication guards for SVE2 AES and PMULL Pair intrinsics to support execution in streaming mode. - Updated tablegen (arm_sve.td) - Updated existing llvm/clang codegen tests to support streaming - Updated existing Sema tests' diagnostics to reflect updated guard
@llvm/pr-subscribers-backend-aarch64 Author: Amina Chabane (Amichaxx) ChangesThis patch updates the predication guards for SVE2 AES and PMULL Pair intrinsics to support execution in streaming mode.
Patch is 26.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153825.diff 9 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 7513a3e79bdf7..0114999f277c4 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1828,17 +1828,19 @@ let SVETargetGuard = "sve2,lut", SMETargetGuard = "sme2,lut" in {
////////////////////////////////////////////////////////////////////////////////
// SVE2 - Optional
-let SVETargetGuard = "sve2,sve-aes", SMETargetGuard = InvalidMode in {
-def SVAESD : SInst<"svaesd[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aesd", [IsOverloadNone]>;
-def SVAESIMC : SInst<"svaesimc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesimc", [IsOverloadNone]>;
-def SVAESE : SInst<"svaese[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aese", [IsOverloadNone]>;
-def SVAESMC : SInst<"svaesmc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesmc", [IsOverloadNone]>;
+let SVETargetGuard = "sve2,sve-aes", SMETargetGuard = "ssve-aes" in {
+def SVAESD : SInst<"svaesd[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aesd", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVAESIMC : SInst<"svaesimc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesimc", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVAESE : SInst<"svaese[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aese", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVAESMC : SInst<"svaesmc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesmc", [IsOverloadNone, VerifyRuntimeMode]>;
+}
-def SVPMULLB_PAIR_U64 : SInst<"svpmullb_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullb_pair">;
-def SVPMULLB_PAIR_N_U64 : SInst<"svpmullb_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullb_pair">;
+let SVETargetGuard = "sve2,sve-aes", SMETargetGuard = "ssve-aes|sve-aes,sme-fa64" in {
+def SVPMULLB_PAIR_U64 : SInst<"svpmullb_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullb_pair", [VerifyRuntimeMode]>;
+def SVPMULLB_PAIR_N_U64 : SInst<"svpmullb_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullb_pair", [VerifyRuntimeMode]>;
-def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullt_pair">;
-def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair">;
+def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>;
+def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>;
}
let SVETargetGuard = "sve-sha3", SMETargetGuard = "sve-sha3,sme2p1" in {
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesd.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesd.c
index 0839b32fecb78..17f4a54460297 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesd.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesd.c
@@ -4,6 +4,11 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
// REQUIRES: aarch64-registered-target
#include <arm_sve.h>
@@ -15,6 +20,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
// CHECK-LABEL: @test_svaesd_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
@@ -25,7 +36,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svaesd_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svaesd_u8(svuint8_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaesd,_u8,,)(op1, op2);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aese.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aese.c
index 08ca748c96fe7..d17754cea2e28 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aese.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aese.c
@@ -4,6 +4,11 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
// REQUIRES: aarch64-registered-target
#include <arm_sve.h>
@@ -15,6 +20,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#if defined(__ARM_FEATURE_SME)
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
// CHECK-LABEL: @test_svaese_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
@@ -25,7 +36,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svaese_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svaese_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
{
return SVE_ACLE_FUNC(svaese,_u8,,)(op1, op2);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesimc.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesimc.c
index 78d3debad4b34..3e12f7a376f01 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesimc.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesimc.c
@@ -1,8 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
// REQUIRES: aarch64-registered-target
@@ -15,6 +21,13 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#if defined(__ARM_FEATURE_SME)
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
// CHECK-LABEL: @test_svaesimc_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesimc(<vscale x 16 x i8> [[OP:%.*]])
@@ -25,7 +38,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesimc(<vscale x 16 x i8> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svaesimc_u8(svuint8_t op)
+svuint8_t test_svaesimc_u8(svuint8_t op) MODE_ATTR
{
return SVE_ACLE_FUNC(svaesimc,_u8,,)(op);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesmc.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesmc.c
index 48d130174788a..249eaba246220 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesmc.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesmc.c
@@ -4,6 +4,11 @@
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
// REQUIRES: aarch64-registered-target
#include <arm_sve.h>
@@ -15,6 +20,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#if defined(__ARM_FEATURE_SME)
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
// CHECK-LABEL: @test_svaesmc_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesmc(<vscale x 16 x i8> [[OP:%.*]])
@@ -25,7 +36,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesmc(<vscale x 16 x i8> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svaesmc_u8(svuint8_t op)
+svuint8_t test_svaesmc_u8(svuint8_t op) MODE_ATTR
{
return SVE_ACLE_FUNC(svaesmc,_u8,,)(op);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c
index 09583f98393a3..13e62739cffe3 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c
@@ -6,6 +6,16 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
#include <arm_sve.h>
#ifdef SVE_OVERLOADED_FORMS
@@ -15,6 +25,19 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+
+//
+//
+// SME-CHECK-LABEL: @_Z22test_svpmullb_pair_u64u12__SVUint64_tS_(
+// SME-CHECK-NEXT: entry:
+// SME-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// SME-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
// CHECK-LABEL: @test_svpmullb_pair_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
@@ -25,11 +48,20 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svpmullb_pair_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svpmullb_pair_u64(svuint64_t op1, svuint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svpmullb_pair,_u64,,)(op1, op2);
}
+
+//
+//
+// SME-CHECK-LABEL: @_Z24test_svpmullb_pair_n_u64u12__SVUint64_tm(
+// SME-CHECK-NEXT: entry:
+// SME-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
+// SME-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// SME-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// SME-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
// CHECK-LABEL: @test_svpmullb_pair_n_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
@@ -44,7 +76,7 @@ svuint64_t test_svpmullb_pair_u64(svuint64_t op1, svuint64_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svpmullb_pair_n_u64(svuint64_t op1, uint64_t op2)
+svuint64_t test_svpmullb_pair_n_u64(svuint64_t op1, uint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svpmullb_pair,_n_u64,,)(op1, op2);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullt_128.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullt_128.c
index a4ffc3165ec8b..f07ba2f585c00 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullt_128.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullt_128.c
@@ -6,6 +6,16 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
#include <arm_sve.h>
#ifdef SVE_OVE...
[truncated]
|
@llvm/pr-subscribers-clang Author: Amina Chabane (Amichaxx) ChangesThis patch updates the predication guards for SVE2 AES and PMULL Pair intrinsics to support execution in streaming mode.
Patch is 26.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153825.diff 9 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 7513a3e79bdf7..0114999f277c4 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1828,17 +1828,19 @@ let SVETargetGuard = "sve2,lut", SMETargetGuard = "sme2,lut" in {
////////////////////////////////////////////////////////////////////////////////
// SVE2 - Optional
-let SVETargetGuard = "sve2,sve-aes", SMETargetGuard = InvalidMode in {
-def SVAESD : SInst<"svaesd[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aesd", [IsOverloadNone]>;
-def SVAESIMC : SInst<"svaesimc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesimc", [IsOverloadNone]>;
-def SVAESE : SInst<"svaese[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aese", [IsOverloadNone]>;
-def SVAESMC : SInst<"svaesmc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesmc", [IsOverloadNone]>;
+let SVETargetGuard = "sve2,sve-aes", SMETargetGuard = "ssve-aes" in {
+def SVAESD : SInst<"svaesd[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aesd", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVAESIMC : SInst<"svaesimc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesimc", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVAESE : SInst<"svaese[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aese", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVAESMC : SInst<"svaesmc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesmc", [IsOverloadNone, VerifyRuntimeMode]>;
+}
-def SVPMULLB_PAIR_U64 : SInst<"svpmullb_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullb_pair">;
-def SVPMULLB_PAIR_N_U64 : SInst<"svpmullb_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullb_pair">;
+let SVETargetGuard = "sve2,sve-aes", SMETargetGuard = "ssve-aes|sve-aes,sme-fa64" in {
+def SVPMULLB_PAIR_U64 : SInst<"svpmullb_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullb_pair", [VerifyRuntimeMode]>;
+def SVPMULLB_PAIR_N_U64 : SInst<"svpmullb_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullb_pair", [VerifyRuntimeMode]>;
-def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullt_pair">;
-def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair">;
+def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>;
+def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>;
}
let SVETargetGuard = "sve-sha3", SMETargetGuard = "sve-sha3,sme2p1" in {
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesd.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesd.c
index 0839b32fecb78..17f4a54460297 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesd.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesd.c
@@ -4,6 +4,11 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
// REQUIRES: aarch64-registered-target
#include <arm_sve.h>
@@ -15,6 +20,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
// CHECK-LABEL: @test_svaesd_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
@@ -25,7 +36,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svaesd_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svaesd_u8(svuint8_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaesd,_u8,,)(op1, op2);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aese.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aese.c
index 08ca748c96fe7..d17754cea2e28 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aese.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aese.c
@@ -4,6 +4,11 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
// REQUIRES: aarch64-registered-target
#include <arm_sve.h>
@@ -15,6 +20,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#if defined(__ARM_FEATURE_SME)
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
// CHECK-LABEL: @test_svaese_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
@@ -25,7 +36,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svaese_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svaese_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
{
return SVE_ACLE_FUNC(svaese,_u8,,)(op1, op2);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesimc.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesimc.c
index 78d3debad4b34..3e12f7a376f01 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesimc.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesimc.c
@@ -1,8 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
// REQUIRES: aarch64-registered-target
@@ -15,6 +21,13 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#if defined(__ARM_FEATURE_SME)
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
// CHECK-LABEL: @test_svaesimc_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesimc(<vscale x 16 x i8> [[OP:%.*]])
@@ -25,7 +38,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesimc(<vscale x 16 x i8> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svaesimc_u8(svuint8_t op)
+svuint8_t test_svaesimc_u8(svuint8_t op) MODE_ATTR
{
return SVE_ACLE_FUNC(svaesimc,_u8,,)(op);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesmc.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesmc.c
index 48d130174788a..249eaba246220 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesmc.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_aesmc.c
@@ -4,6 +4,11 @@
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
// REQUIRES: aarch64-registered-target
#include <arm_sve.h>
@@ -15,6 +20,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#if defined(__ARM_FEATURE_SME)
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
// CHECK-LABEL: @test_svaesmc_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesmc(<vscale x 16 x i8> [[OP:%.*]])
@@ -25,7 +36,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesmc(<vscale x 16 x i8> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svaesmc_u8(svuint8_t op)
+svuint8_t test_svaesmc_u8(svuint8_t op) MODE_ATTR
{
return SVE_ACLE_FUNC(svaesmc,_u8,,)(op);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c
index 09583f98393a3..13e62739cffe3 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c
@@ -6,6 +6,16 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
#include <arm_sve.h>
#ifdef SVE_OVERLOADED_FORMS
@@ -15,6 +25,19 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+
+//
+//
+// SME-CHECK-LABEL: @_Z22test_svpmullb_pair_u64u12__SVUint64_tS_(
+// SME-CHECK-NEXT: entry:
+// SME-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// SME-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
// CHECK-LABEL: @test_svpmullb_pair_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
@@ -25,11 +48,20 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svpmullb_pair_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svpmullb_pair_u64(svuint64_t op1, svuint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svpmullb_pair,_u64,,)(op1, op2);
}
+
+//
+//
+// SME-CHECK-LABEL: @_Z24test_svpmullb_pair_n_u64u12__SVUint64_tm(
+// SME-CHECK-NEXT: entry:
+// SME-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
+// SME-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// SME-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// SME-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
// CHECK-LABEL: @test_svpmullb_pair_n_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
@@ -44,7 +76,7 @@ svuint64_t test_svpmullb_pair_u64(svuint64_t op1, svuint64_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svpmullb_pair_n_u64(svuint64_t op1, uint64_t op2)
+svuint64_t test_svpmullb_pair_n_u64(svuint64_t op1, uint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svpmullb_pair,_n_u64,,)(op1, op2);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullt_128.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullt_128.c
index a4ffc3165ec8b..f07ba2f585c00 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullt_128.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullt_128.c
@@ -6,6 +6,16 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme-fa64 -target-feature +sve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
#include <arm_sve.h>
#ifdef SVE_OVE...
[truncated]
|
clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c
Outdated
Show resolved
Hide resolved
clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullt_128.c
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic-128.ll
Outdated
Show resolved
Hide resolved
38b79a0
to
cb09687
Compare
…ics/acle_sve2_pmullt_128.c and sve2-intrinsics-polynomial-arithmetic-128.ll - Removed redundant guard in arm_sve.td - Reverted non-streaming runlines in acle_sve2_aesimc.c
dfc05f8
to
f2e2335
Compare
clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_pmullb_128.c
Outdated
Show resolved
Hide resolved
clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp
Outdated
Show resolved
Hide resolved
…ics/acle_sve2_pmullt_128.c and sve2-intrinsics-polynomial-arithmetic-128.ll - Updated guard in arm_sve.td - Reverted original run lines in acle_sve2_aesimc.c
f2e2335
to
b496794
Compare
Thank you for reviewing @Lukacma. |
I take it this can be merged now ? |
I believe so. I don't have merge access, though. |
This patch updates the predication guards for SVE2 AES and PMULL Pair intrinsics to support execution in streaming mode.