diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index d2b7b78b9970f..37438ff7ed8c3 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -968,6 +968,18 @@ def SVCVTXNT_F32_F64 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aar // SVCVTXNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp } +let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sve2p2|sme2p2" in { + +def SVCVTNT_Z_F16_F32 : SInst<"svcvtnt_f16[_f32]_z", "hhPd", "f", MergeNone, "aarch64_sve_fcvtnt_z_f16f32", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTNT_Z_F32_F64 : SInst<"svcvtnt_f32[_f64]_z", "hhPd", "d", MergeNone, "aarch64_sve_fcvtnt_z_f32f64", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTNT_Z_BF16_F32 : SInst<"svcvtnt_bf16[_f32]_z", "$$Pd", "f", MergeNone, "aarch64_sve_fcvtnt_z_bf16f32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTXNT_Z_F32_F64 : SInst<"svcvtxnt_f32[_f64]_z", "MMPd", "d", MergeNone, "aarch64_sve_fcvtxnt_z_f32f64", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTLT_Z_F32_F16 : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, "aarch64_sve_fcvtlt_f32f16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "aarch64_sve_fcvtlt_f64f32", [IsOverloadNone, VerifyRuntimeMode]>; + +} //////////////////////////////////////////////////////////////////////////////// // Permutations and selection diff --git a/clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c b/clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c new file mode 100644 index 0000000000000..7a77fb9a86a7e --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p2-intrinsics/acle_sve2_cvtnt.c @@ -0,0 +1,138 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p2 -target-feature +sve2p2 \ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p2 -target-feature +sve2p2 \ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + + +// CHECK-LABEL: @test_svcvtnt_f16_f32_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.z.f16f32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svcvtnt_f16_f32_zu13__SVFloat16_tu10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.z.f16f32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svcvtnt_f16_f32_z(svfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR +{ + return SVE_ACLE_FUNC(svcvtnt_f16,_f32,_z,)(inactive, pg, op); +} + +// CHECK-LABEL: @test_svcvtnt_bf16_f32_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.z.bf16f32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_zu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.z.bf16f32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svcvtnt_bf16_f32_z(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR +{ + return SVE_ACLE_FUNC(svcvtnt_bf16,_f32,_z,)(inactive, pg, op); +} + +// CHECK-LABEL: @test_svcvtnt_f32_f64_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.z.f32f64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svcvtnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.z.f32f64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svcvtnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR +{ + return SVE_ACLE_FUNC(svcvtnt_f32,_f64,_z,)(inactive, pg, op); +} + + + +// CHECK-LABEL: @test_svcvtxnt_f32_f64_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtxnt.z.f32f64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z23test_svcvtxnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtxnt.z.f32f64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svcvtxnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR +{ + return SVE_ACLE_FUNC(svcvtxnt_f32,_f64,_z,)(inactive, pg, op); +} + +// CHECK-LABEL: @test_svcvtlt_f32_f16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtlt.f32f16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svcvtlt_f32_f16_zu10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtlt.f32f16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svcvtlt_f32_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR +{ + return SVE_ACLE_FUNC(svcvtlt_f32,_f16,_z,)(pg, op); +} + +// CHECK-LABEL: @test_svcvtlt_f64_f32_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtlt.f64f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svcvtlt_f64_f32_zu10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtlt.f64f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svcvtlt_f64_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR +{ + return SVE_ACLE_FUNC(svcvtlt_f64,_f32,_z,)(pg, op); +} + diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 77fdb8295faa8..f748dbc160a24 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2211,6 +2211,7 @@ def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT; def int_aarch64_sve_fcvtnt_bf16f32_v2 : Builtin_SVCVT; +def int_aarch64_sve_fcvtnt_z_bf16f32 : Builtin_SVCVT; def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT; def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT; @@ -2228,10 +2229,13 @@ def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT; def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT; def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT; +def int_aarch64_sve_fcvtnt_z_f16f32 : Builtin_SVCVT; def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT; +def int_aarch64_sve_fcvtnt_z_f32f64 : Builtin_SVCVT; -def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT; -def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT; +def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT; +def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT; +def int_aarch64_sve_fcvtxnt_z_f32f64 : Builtin_SVCVT; def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT; def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index e99b3f8ff07e0..bb42a6cc5679a 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4552,8 +4552,8 @@ let Predicates = [HasSVE2p2_or_SME2p2] in { defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">; // SVE2p2 floating-point convert precision down (placing odd), zeroing predicate - defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">; - def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>; + defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt", "int_aarch64_sve_fcvtnt_z">; + defm FCVTXNT_ZPzZ : sve_float_convert_top<"fcvtxnt", int_aarch64_sve_fcvtxnt_z_f32f64>; // Placing even defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>; @@ -4561,8 +4561,8 @@ let Predicates = [HasSVE2p2_or_SME2p2] in { defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">; // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate - def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>; - defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>; + defm BFCVTNT_ZPzZ_StoH : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>; + defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>; // Floating-point convert to integer, zeroing predicate defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs", "int_aarch64_sve_fcvtzs", AArch64fcvtzs_mt>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index c63ae8660cad2..25b0bebed0fec 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2907,9 +2907,12 @@ multiclass sve2_fp_convert_up_long_z { defm : SVE_3_Op_UndefZero_Pat(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast(NAME # _StoD)>; } -multiclass sve2_fp_convert_down_narrow_z { +multiclass sve2_fp_convert_down_narrow_z { def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>; def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>; + + def : SVE_3_Op_Pat(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast(NAME # _StoH)>; + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; } //===----------------------------------------------------------------------===// @@ -9551,10 +9554,16 @@ multiclass sve_bfloat_convert(NAME)>; } -multiclass sve_bfloat_convert_top { - def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>; +multiclass sve_bfloat_convert_top { + def NAME : sve2_fp_convert_precision<0b1010, op, asm, ZPR16, ZPR32, destructive>; - def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; +} + +multiclass sve_float_convert_top { + def _StoD : sve2_fp_convert_precision<0b0010, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>; + + def : SVE_3_Op_Pat(NAME # _StoD)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll new file mode 100644 index 0000000000000..d55d72bcf7e2a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics-fp-converts.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p2 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p2 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p2 -force-streaming < %s | FileCheck %s + + +;FCVTNT, BFCVTNT +define @fcvtnt_f16_f32_z( %even, %pg, %b) { +; CHECK-LABEL: fcvtnt_f16_f32_z: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnt z0.h, p0/z, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.z.f16f32( %even, + %pg, + %b) + ret %out +} + +define @fcvtnt_bf16_f32_z( %even, %pg, %b) { +; CHECK-LABEL: fcvtnt_bf16_f32_z: +; CHECK: // %bb.0: +; CHECK-NEXT: bfcvtnt z0.h, p0/z, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.z.bf16f32( %even, + %pg, + %b) + ret %out +} + +define @fcvtnt_f32_f64_z( %even, %pg, %b) { +; CHECK-LABEL: fcvtnt_f32_f64_z: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnt z0.s, p0/z, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.z.f32f64( %even, + %pg, + %b) + ret %out +} + +;FCVTXNT + + +define @fcvtxnt_f32_f64_z( %a, %pg, %b) { +; CHECK-LABEL: fcvtxnt_f32_f64_z: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtxnt z0.s, p0/z, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtxnt.z.f32f64( %a, + %pg, + %b) + ret %out +}