-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Clang][LLVM][AArch64] Add support for FCVTXNT, FCVTLT, {B}FCVTNT int… #170356
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…rinsics
This patch adds support in Clang for these assembly instructions
FCVTXNT, FCVTLT, {B}FCVTNT
By implementing these prototypes:
// Variant is available for _f64_f32
svfloat32_t svcvtlt_f32[_f16]_z (svbool_t pg, svfloat16_t op);
// Variants are available for:
// _f32_f64, _bf16_f32
svfloat16_t svcvtnt_f16[_f32]_z (svfloat16_t even, svbool_t pg, svfloat32_t op);
svfloat32_t svcvtxnt_f32[_f64]_z (svfloat32_t even, svbool_t pg, svfloat64_t op);
according to the ACLE[1]
[1] ARM-software/acle#412
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-aarch64 Author: None (CarolineConcatto) Changes…rinsics This patch adds support in Clang for these assembly instructions FCVTXNT, FCVTLT, {B}FCVTNT // Variant is available for _f64_f32 // Variants are available for: svfloat32_t svcvtxnt_f32[_f64]_z (svfloat32_t even, svbool_t pg, svfloat64_t op); according to the ACLE[1] Full diff: https://github.com/llvm/llvm-project/pull/170356.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index d2b7b78b9970f..37438ff7ed8c3 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -968,6 +968,18 @@ def SVCVTXNT_F32_F64 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aar
// SVCVTXNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp
}
+let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sve2p2|sme2p2" in {
+
+def SVCVTNT_Z_F16_F32 : SInst<"svcvtnt_f16[_f32]_z", "hhPd", "f", MergeNone, "aarch64_sve_fcvtnt_z_f16f32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTNT_Z_F32_F64 : SInst<"svcvtnt_f32[_f64]_z", "hhPd", "d", MergeNone, "aarch64_sve_fcvtnt_z_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTNT_Z_BF16_F32 : SInst<"svcvtnt_bf16[_f32]_z", "$$Pd", "f", MergeNone, "aarch64_sve_fcvtnt_z_bf16f32", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTXNT_Z_F32_F64 : SInst<"svcvtxnt_f32[_f64]_z", "MMPd", "d", MergeNone, "aarch64_sve_fcvtxnt_z_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTLT_Z_F32_F16 : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, "aarch64_sve_fcvtlt_f32f16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "aarch64_sve_fcvtlt_f64f32", [IsOverloadNone, VerifyRuntimeMode]>;
+
+}
////////////////////////////////////////////////////////////////////////////////
// Permutations and selection
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c b/clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c
new file mode 100644
index 0000000000000..7a77fb9a86a7e
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c
@@ -0,0 +1,138 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p2 -target-feature +sve2p2 \
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p2 -target-feature +sve2p2 \
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+//
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
+// CHECK-LABEL: @test_svcvtnt_f16_f32_z(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svcvtnt_f16_f32_zu13__SVFloat16_tu10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svcvtnt_f16_f32_z(svfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
+{
+ return SVE_ACLE_FUNC(svcvtnt_f16,_f32,_z,)(inactive, pg, op);
+}
+
+// CHECK-LABEL: @test_svcvtnt_bf16_f32_z(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_zu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svcvtnt_bf16_f32_z(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
+{
+ return SVE_ACLE_FUNC(svcvtnt_bf16,_f32,_z,)(inactive, pg, op);
+}
+
+// CHECK-LABEL: @test_svcvtnt_f32_f64_z(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svcvtnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svcvtnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
+{
+ return SVE_ACLE_FUNC(svcvtnt_f32,_f64,_z,)(inactive, pg, op);
+}
+
+
+
+// CHECK-LABEL: @test_svcvtxnt_f32_f64_z(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svcvtxnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svcvtxnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
+{
+ return SVE_ACLE_FUNC(svcvtxnt_f32,_f64,_z,)(inactive, pg, op);
+}
+
+// CHECK-LABEL: @test_svcvtlt_f32_f16_z(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svcvtlt_f32_f16_zu10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svcvtlt_f32_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
+{
+ return SVE_ACLE_FUNC(svcvtlt_f32,_f16,_z,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcvtlt_f64_f32_z(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svcvtlt_f64_f32_zu10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svcvtlt_f64_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
+{
+ return SVE_ACLE_FUNC(svcvtlt_f64,_f32,_z,)(pg, op);
+}
+
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 77fdb8295faa8..f748dbc160a24 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2211,6 +2211,7 @@ def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1
def int_aarch64_sve_fcvt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
def int_aarch64_sve_fcvtnt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_z_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
@@ -2228,10 +2229,13 @@ def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1
def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_z_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtnt_z_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtxnt_z_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e99b3f8ff07e0..bb42a6cc5679a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4552,8 +4552,8 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
// SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
- defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">;
- def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>;
+ defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt", "int_aarch64_sve_fcvtnt_z">;
+ defm FCVTXNT_ZPzZ : sve_float_convert_top<"fcvtxnt", int_aarch64_sve_fcvtxnt_z_f32f64>;
// Placing even
defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
@@ -4561,8 +4561,8 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">;
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
- def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>;
- defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
+ defm BFCVTNT_ZPzZ_StoH : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>;
+ defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
// Floating-point convert to integer, zeroing predicate
defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs", "int_aarch64_sve_fcvtzs", AArch64fcvtzs_mt>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index c63ae8660cad2..25b0bebed0fec 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2907,9 +2907,12 @@ multiclass sve2_fp_convert_up_long_z<string asm, string op> {
defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
-multiclass sve2_fp_convert_down_narrow_z<string asm> {
+multiclass sve2_fp_convert_down_narrow_z<string asm, string op> {
def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>;
def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>;
+
+ def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
+ def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
//===----------------------------------------------------------------------===//
@@ -9551,10 +9554,16 @@ multiclass sve_bfloat_convert<string asm, SDPatternOperator op, SDPatternOperato
def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
}
-multiclass sve_bfloat_convert_top<string asm, SDPatternOperator op> {
- def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>;
+multiclass sve_bfloat_convert_top<string asm, SDPatternOperator ir_op, bit op = true, bit destructive = op> {
+ def NAME : sve2_fp_convert_precision<0b1010, op, asm, ZPR16, ZPR32, destructive>;
- def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<nxv8bf16, ir_op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+}
+
+multiclass sve_float_convert_top<string asm, SDPatternOperator ir_op> {
+ def _StoD : sve2_fp_convert_precision<0b0010, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>;
+
+ def : SVE_3_Op_Pat<nxv4f32, ir_op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _StoD)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll
new file mode 100644
index 0000000000000..d55d72bcf7e2a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p2 -force-streaming < %s | FileCheck %s
+
+
+;FCVTNT, BFCVTNT
+define <vscale x 8 x half> @fcvtnt_f16_f32_z(<vscale x 8 x half> %even, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtnt_f16_f32_z:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnt z0.h, p0/z, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> %even,
+ <vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x bfloat> @fcvtnt_bf16_f32_z(<vscale x 8 x bfloat> %even, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtnt_bf16_f32_z:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bfcvtnt z0.h, p0/z, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> %even,
+ <vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 4 x float> @fcvtnt_f32_f64_z(<vscale x 4 x float> %even, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtnt_f32_f64_z:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnt z0.s, p0/z, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> %even,
+ <vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+;FCVTXNT
+
+
+define <vscale x 4 x float> @fcvtxnt_f32_f64_z(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtxnt_f32_f64_z:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtxnt z0.s, p0/z, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> %a,
+ <vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull request overview
This pull request adds support for SVE2.2 floating-point conversion intrinsics (FCVTXNT, FCVTLT, FCVTNT, and BFCVTNT) in Clang and LLVM for AArch64. The implementation provides new assembly instructions for narrow/top conversions and long/top conversions with zeroing predicates.
Key changes:
- Added new intrinsic definitions for
svcvtnt_*_z,svcvtxnt_*_z, andsvcvtlt_*_zfunctions - Implemented instruction patterns and multiclass definitions in TableGen files
- Added comprehensive test coverage for all new intrinsics
Reviewed changes
Copilot reviewed 6 out of 6 changed files in this pull request and generated 2 comments.
Show a summary per file
| File | Description |
|---|---|
| llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll | Tests LLVM IR to assembly conversion for the new FP conversion instructions |
| llvm/lib/Target/AArch64/SVEInstrFormats.td | Defines instruction format patterns and multiclasses for the conversion operations |
| llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | Instantiates the instruction definitions using the new multiclasses |
| llvm/include/llvm/IR/IntrinsicsAArch64.td | Declares the new LLVM intrinsics with zeroing predicate variants |
| clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c | Tests Clang codegen for the new intrinsics with multiple configurations |
| clang/include/clang/Basic/arm_sve.td | Defines the Clang builtin function signatures and mappings |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| multiclass sve2_fp_convert_down_narrow_z<string asm, string op> { | ||
| def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>; | ||
| def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>; | ||
|
|
Copilot
AI
Dec 2, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] The blank line 2913 between the instruction definitions and the pattern definitions is unnecessary and reduces code cohesion. Consider removing it for better readability.
| // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate | ||
| def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>; | ||
| defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>; | ||
| defm BFCVTNT_ZPzZ_StoH : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>; |
Copilot
AI
Dec 2, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] The suffix '_StoH' is inconsistent with the pattern used for FCVTNT_ZPzZ and FCVTXNT_ZPzZ which have no suffix. Since sve_bfloat_convert_top only defines one variant (NAME), the suffix appears unnecessary. Consider removing it to maintain consistency with other similar definitions.
| defm BFCVTNT_ZPzZ_StoH : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>; | |
| defm BFCVTNT_ZPzZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>; |
| // CPP-CHECK-LABEL: @_Z22test_svcvtlt_f64_f32_zu10__SVBool_tu13__SVFloat32_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be calling @llvm.aarch64.sve.fcvtlt.z.f64f32 instead?
…rinsics
This patch adds support in Clang for these assembly instructions FCVTXNT, FCVTLT, {B}FCVTNT
By implementing these prototypes:
// Variant is available for _f64_f32
svfloat32_t svcvtlt_f32[_f16]_z (svbool_t pg, svfloat16_t op);
// Variants are available for:
// _f32_f64, _bf16_f32
svfloat16_t svcvtnt_f16[_f32]_z (svfloat16_t even, svbool_t pg, svfloat32_t op);
svfloat32_t svcvtxnt_f32[_f64]_z (svfloat32_t even, svbool_t pg, svfloat64_t op);
according to the ACLE[1]
[1] ARM-software/acle#412