Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64][SME2] Add __arm_streaming_compatible attribute to CLAMP bui… #76712

Closed
wants to merge 1 commit into from

Conversation

dtemirbulatov
Copy link
Contributor

…ltins.

Patch-by: Caroline Concatto caroline.concatto@arm.com

…ltins.

Patch-by: Caroline Concatto <caroline.concatto@arm.com>
@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Jan 2, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Jan 2, 2024

@llvm/pr-subscribers-clang

Author: Dinar Temirbulatov (dtemirbulatov)

Changes

…ltins.

Patch-by: Caroline Concatto <caroline.concatto@arm.com>


Full diff: https://github.com/llvm/llvm-project/pull/76712.diff

4 Files Affected:

  • (modified) clang/include/clang/Basic/arm_sve.td (+2-2)
  • (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c (+10-4)
  • (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c (+12-5)
  • (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c (+12-5)
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 91f62c4c76339d..9b88be633aa55c 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2052,8 +2052,8 @@ def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  MergeNone
 }
 
 let TargetGuard = "sve2p1|sme" in {
-def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil",     MergeNone, "aarch64_sve_sclamp", [], []>;
-def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>;
+def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil",     MergeNone, "aarch64_sve_sclamp", [IsStreamingCompatible], []>;
+def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [IsStreamingCompatible], []>;
 
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">;
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
index 5d8c5b7b8a18c6..fd397292d1f2b6 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
@@ -10,7 +10,7 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve \
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -DTEST_SME2 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
@@ -22,6 +22,12 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
+#ifndef TEST_SME2
+#define ATTR
+#else
+#define ATTR __arm_streaming_compatible
+#endif
+
 // CHECK-LABEL: @test_svclamp_f16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fclamp.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
@@ -32,7 +38,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fclamp.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) {
+svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _f16, , )(op1, op2, op3);
 }
 
@@ -46,7 +52,7 @@ svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fclamp.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) {
+svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _f32, , )(op1, op2, op3);
 }
 
@@ -60,7 +66,7 @@ svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fclamp.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svclamp_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) {
+svfloat64_t test_svclamp_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _f64, , )(op1, op2, op3);
 }
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
index 8c63a7455c79f4..54c4c1c1679d7a 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
@@ -10,6 +10,8 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 \
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
@@ -20,6 +22,12 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
+#ifndef TEST_SME2
+#define ATTR
+#else
+#define ATTR __arm_streaming_compatible
+#endif
+
 // CHECK-LABEL: @test_svclamp_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
@@ -30,7 +38,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) {
+svint8_t test_svclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _s8, , )(op1, op2, op3);
 }
 
@@ -44,7 +52,7 @@ svint8_t test_svclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) {
+svint16_t test_svclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _s16, , )(op1, op2, op3);
 }
 
@@ -58,7 +66,7 @@ svint16_t test_svclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) {
+svint32_t test_svclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _s32, , )(op1, op2, op3);
 }
 
@@ -72,7 +80,6 @@ svint32_t test_svclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svclamp_s64(svint64_t op1, svint64_t op2, svint64_t op3) {
+svint64_t test_svclamp_s64(svint64_t op1, svint64_t op2, svint64_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _s64, , )(op1, op2, op3);
 }
-
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c
index b8789862488777..ecbb08679f2c53 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c
@@ -10,6 +10,8 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -DTEST_SME2 \
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
@@ -20,6 +22,12 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
+#ifndef TEST_SME2
+#define ATTR
+#else
+#define ATTR __arm_streaming_compatible
+#endif
+
 // CHECK-LABEL: @test_svclamp_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
@@ -30,7 +38,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) {
+svuint8_t test_svclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _u8, , )(op1, op2, op3);
 }
 
@@ -44,7 +52,7 @@ svuint8_t test_svclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) {
+svuint16_t test_svclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _u16, , )(op1, op2, op3);
 }
 
@@ -58,7 +66,7 @@ svuint16_t test_svclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) {
+svuint32_t test_svclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _u32, , )(op1, op2, op3);
 }
 
@@ -72,7 +80,6 @@ svuint32_t test_svclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svclamp_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3) {
+svuint64_t test_svclamp_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _u64, , )(op1, op2, op3);
 }
-

Copy link
Collaborator

@sdesmalen-arm sdesmalen-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps I'm missing something obvious here, but it looks like this patch is implementing the opposite of #75958 ?

@dtemirbulatov
Copy link
Contributor Author

I missed that implemtation in #75958, Closing then.

@dtemirbulatov dtemirbulatov deleted the sme2-sme2p1-clamp branch January 2, 2024 15:51
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants