From 23653d3d12f342c4c45897866f92865bd6aafc5d Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Tue, 14 Oct 2025 12:40:51 +0100 Subject: [PATCH 1/4] [AArch64][llvm] Add support for new vcvt* intrinsics Add support for these new vcvt* intrinsics: ``` int64_t vcvts_s64_f32(float32_t); uint64_t vcvts_u64_f32(float32_t); int32_t vcvtd_s32_f64(float64_t); uint32_t vcvtd_u32_f64(float64_t); int64_t vcvtns_s64_f32(float32_t); uint64_t vcvtns_u64_f32(float32_t); int32_t vcvtnd_s32_f64(float64_t); uint32_t vcvtnd_u32_f64(float64_t); int64_t vcvtms_s64_f32(float32_t); uint64_t vcvtms_u64_f32(float32_t); int32_t vcvtmd_s32_f64(float64_t); uint32_t vcvtmd_u32_f64(float64_t); int64_t vcvtps_s64_f32(float32_t); uint64_t vcvtps_u64_f32(float32_t); int32_t vcvtpd_s32_f64(float64_t); uint32_t vcvtpd_u32_f64(float64_t); int64_t vcvtas_s64_f32(float32_t); uint64_t vcvtas_u64_f32(float32_t); int32_t vcvtad_s32_f64(float64_t); uint32_t vcvtad_u32_f64(float64_t); ``` --- clang/include/clang/Basic/arm_neon.td | 65 +++-- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 20 ++ .../CodeGen/AArch64/neon-fcvt-intrinsics.c | 225 +++++++++++++++++- 3 files changed, 288 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index ef196103035e8..315c60692dcaf 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1466,26 +1466,51 @@ def SCALAR_UCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "SUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Converts def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">; -def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "(1S)1", "Sf">; -def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "(1U)1", "Sf">; -def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "(1S)1", "Sd">; -def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "(1U)1", "Sd">; -def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "(1S)1", "Sf">; -def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "(1U)1", "Sf">; -def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "(1S)1", "Sd">; -def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "(1U)1", "Sd">; -def SCALAR_FCVTASS : SInst<"vcvta_s32", "(1S)1", "Sf">; -def SCALAR_FCVTAUS : SInst<"vcvta_u32", "(1U)1", "Sf">; -def SCALAR_FCVTASD : SInst<"vcvta_s64", "(1S)1", "Sd">; -def SCALAR_FCVTAUD : SInst<"vcvta_u64", "(1U)1", "Sd">; -def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "(1S)1", "Sf">; -def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "(1U)1", "Sf">; -def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "(1S)1", "Sd">; -def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "(1U)1", "Sd">; -def SCALAR_FCVTZSS : SInst<"vcvt_s32", "(1S)1", "Sf">; -def SCALAR_FCVTZUS : SInst<"vcvt_u32", "(1U)1", "Sf">; -def SCALAR_FCVTZSD : SInst<"vcvt_s64", "(1S)1", "Sd">; -def SCALAR_FCVTZUD : SInst<"vcvt_u64", "(1U)1", "Sd">; + +def SCALAR_FCVTN_F32toSS : SInst<"vcvtn_s32", "(1S)1", "Sf">; +def SCALAR_FCVTN_F32toUS : SInst<"vcvtn_u32", "(1U)1", "Sf">; +def SCALAR_FCVTN_F64toSS : SInst<"vcvtn_s32", "(1S)1", "Sd">; +def SCALAR_FCVTN_F64toUS : SInst<"vcvtn_u32", "(1U)1", "Sd">; +def SCALAR_FCVTN_F32toSD : SInst<"vcvtn_s64", "(1S)1", "Sf">; +def SCALAR_FCVTN_F32toUD : SInst<"vcvtn_u64", "(1U)1", "Sf">; +def SCALAR_FCVTN_F64toSD : SInst<"vcvtn_s64", "(1S)1", "Sd">; +def SCALAR_FCVTN_F64toUD : SInst<"vcvtn_u64", "(1U)1", "Sd">; + +def SCALAR_FCVTM_F32toSS : SInst<"vcvtm_s32", "(1S)1", "Sf">; +def SCALAR_FCVTM_F32toUS : SInst<"vcvtm_u32", "(1U)1", "Sf">; +def SCALAR_FCVTM_F64toSS : SInst<"vcvtm_s32", "(1S)1", "Sd">; +def SCALAR_FCVTM_F64toUS : SInst<"vcvtm_u32", "(1U)1", "Sd">; +def SCALAR_FCVTM_F32toSD : SInst<"vcvtm_s64", "(1S)1", "Sf">; +def SCALAR_FCVTM_F32toUD : SInst<"vcvtm_u64", "(1U)1", "Sf">; +def SCALAR_FCVTM_F64toSD : SInst<"vcvtm_s64", "(1S)1", "Sd">; +def SCALAR_FCVTM_F64toUD : SInst<"vcvtm_u64", "(1U)1", "Sd">; + +def SCALAR_FCVTA_F32toSS : SInst<"vcvta_s32", "(1S)1", "Sf">; +def SCALAR_FCVTA_F32toUS : SInst<"vcvta_u32", "(1U)1", "Sf">; +def SCALAR_FCVTA_F64toSS : SInst<"vcvta_s32", "(1S)1", "Sd">; +def SCALAR_FCVTA_F64toUS : SInst<"vcvta_u32", "(1U)1", "Sd">; +def SCALAR_FCVTA_F32toSD : SInst<"vcvta_s64", "(1S)1", "Sf">; +def SCALAR_FCVTA_F32toUD : SInst<"vcvta_u64", "(1U)1", "Sf">; +def SCALAR_FCVTA_F64toSD : SInst<"vcvta_s64", "(1S)1", "Sd">; +def SCALAR_FCVTA_F64toUD : SInst<"vcvta_u64", "(1U)1", "Sd">; + +def SCALAR_FCVTP_F32toSS : SInst<"vcvtp_s32", "(1S)1", "Sf">; +def SCALAR_FCVTP_F32toUS : SInst<"vcvtp_u32", "(1U)1", "Sf">; +def SCALAR_FCVTP_F64toSS : SInst<"vcvtp_s32", "(1S)1", "Sd">; +def SCALAR_FCVTP_F64toUS : SInst<"vcvtp_u32", "(1U)1", "Sd">; +def SCALAR_FCVTP_F32toSD : SInst<"vcvtp_s64", "(1S)1", "Sf">; +def SCALAR_FCVTP_F32toUD : SInst<"vcvtp_u64", "(1U)1", "Sf">; +def SCALAR_FCVTP_F64toSD : SInst<"vcvtp_s64", "(1S)1", "Sd">; +def SCALAR_FCVTP_F64toUD : SInst<"vcvtp_u64", "(1U)1", "Sd">; + +def SCALAR_FCVTZ_F32toSS : SInst<"vcvt_s32", "(1S)1", "Sf">; +def SCALAR_FCVTZ_F32toUS : SInst<"vcvt_u32", "(1U)1", "Sf">; +def SCALAR_FCVTZ_F64toSS : SInst<"vcvt_s32", "(1S)1", "Sd">; +def SCALAR_FCVTZ_F64toUS : SInst<"vcvt_u32", "(1U)1", "Sd">; +def SCALAR_FCVTZ_F32toSD : SInst<"vcvt_s64", "(1S)1", "Sf">; +def SCALAR_FCVTZ_F32toUD : SInst<"vcvt_u64", "(1U)1", "Sf">; +def SCALAR_FCVTZ_F64toSD : SInst<"vcvt_s64", "(1S)1", "Sd">; +def SCALAR_FCVTZ_F64toUD : SInst<"vcvt_u64", "(1U)1", "Sd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Estimate diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 2429a430433d7..15aa6d966db56 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -1209,35 +1209,55 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), + NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), + NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), + NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP0(vcvth_bf16_f32), + NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), + NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), + NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), + NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), + NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), + NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), + NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), diff --git a/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c b/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c index 670b65070289d..034f6be9ae2e9 100644 --- a/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c @@ -26,16 +26,38 @@ int32_t test_vcvtas_s32_f32(float32_t a) { return (int32_t)vcvtas_s32_f32(a); } -// CHECK-LABEL: define {{[^@]+}}@test_test_vcvtad_s64_f64 +// CHECK-LABEL: define {{[^@]+}}@test_vcvtad_s64_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VCVTAD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double [[A]]) // CHECK-NEXT: ret i64 [[VCVTAD_S64_F64_I]] // -int64_t test_test_vcvtad_s64_f64(float64_t a) { +int64_t test_vcvtad_s64_f64(float64_t a) { return (int64_t)vcvtad_s64_f64(a); } +// CHECK-LABEL: define {{[^@]+}}@test_vcvtas_s64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTAS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTAS_S64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +int64_t test_vcvtas_s64_f32(float32_t a) { + return (int64_t)vcvtas_s64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtad_s32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTAD_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTAD_S32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int32_t test_vcvtad_s32_f64(float64_t a) { + return (int32_t)vcvtad_s32_f64(a); +} + // CHECK-LABEL: define {{[^@]+}}@test_vcvtas_u32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -56,6 +78,28 @@ uint64_t test_vcvtad_u64_f64(float64_t a) { return (uint64_t)vcvtad_u64_f64(a); } +// CHECK-LABEL: define {{[^@]+}}@test_vcvtas_u64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTAS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTAS_U64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +uint64_t test_vcvtas_u64_f32(float32_t a) { + return (uint64_t)vcvtas_u64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtad_u32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTAD_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTAD_U32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +uint32_t test_vcvtad_u32_f64(float64_t a) { + return (uint32_t)vcvtad_u32_f64(a); +} + // CHECK-LABEL: define {{[^@]+}}@test_vcvtms_s32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -76,6 +120,28 @@ int64_t test_vcvtmd_s64_f64(float64_t a) { return (int64_t)vcvtmd_s64_f64(a); } +// CHECK-LABEL: define {{[^@]+}}@test_vcvtms_s64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTMS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTMS_S64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +int64_t test_vcvtms_s64_f32(float32_t a) { + return (int64_t)vcvtms_s64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtmd_s32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTMD_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTMD_S32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int32_t test_vcvtmd_s32_f64(float64_t a) { + return (int32_t)vcvtmd_s32_f64(a); +} + // CHECK-LABEL: define {{[^@]+}}@test_vcvtms_u32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -96,6 +162,28 @@ uint64_t test_vcvtmd_u64_f64(float64_t a) { return (uint64_t)vcvtmd_u64_f64(a); } +// CHECK-LABEL: define {{[^@]+}}@test_vcvtms_u64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTMS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTMS_U64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +uint64_t test_vcvtms_u64_f32(float32_t a) { + return (uint64_t)vcvtms_u64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtmd_u32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTMD_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTMD_U32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +uint32_t test_vcvtmd_u32_f64(float64_t a) { + return (uint32_t)vcvtmd_u32_f64(a); +} + // CHECK-LABEL: define {{[^@]+}}@test_vcvtns_s32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -116,6 +204,28 @@ int64_t test_vcvtnd_s64_f64(float64_t a) { return (int64_t)vcvtnd_s64_f64(a); } +// CHECK-LABEL: define {{[^@]+}}@test_vcvtns_s64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTNS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTNS_S64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +int64_t test_vcvtns_s64_f32(float32_t a) { + return (int64_t)vcvtns_s64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtnd_s32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTND_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTND_S32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int32_t test_vcvtnd_s32_f64(float64_t a) { + return (int32_t)vcvtnd_s32_f64(a); +} + // CHECK-LABEL: define {{[^@]+}}@test_vcvtns_u32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -136,6 +246,28 @@ uint64_t test_vcvtnd_u64_f64(float64_t a) { return (uint64_t)vcvtnd_u64_f64(a); } +// CHECK-LABEL: define {{[^@]+}}@test_vcvtns_u64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTNS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTNS_U64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +uint64_t test_vcvtns_u64_f32(float32_t a) { + return (uint64_t)vcvtns_u64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtnd_u32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTND_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTND_U32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +uint32_t test_vcvtnd_u32_f64(float64_t a) { + return (uint32_t)vcvtnd_u32_f64(a); +} + // CHECK-LABEL: define {{[^@]+}}@test_vcvtps_s32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -156,6 +288,28 @@ int64_t test_vcvtpd_s64_f64(float64_t a) { return (int64_t)vcvtpd_s64_f64(a); } +// CHECK-LABEL: define {{[^@]+}}@test_vcvtps_s64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTPS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTPS_S64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +int64_t test_vcvtps_s64_f32(float32_t a) { + return (int64_t)vcvtps_s64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtpd_s32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTPD_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTPD_S32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int32_t test_vcvtpd_s32_f64(float64_t a) { + return (int32_t)vcvtpd_s32_f64(a); +} + // CHECK-LABEL: define {{[^@]+}}@test_vcvtps_u32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -176,6 +330,28 @@ uint64_t test_vcvtpd_u64_f64(float64_t a) { return (uint64_t)vcvtpd_u64_f64(a); } +// CHECK-LABEL: define {{[^@]+}}@test_vcvtps_u64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTPS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTPS_U64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +uint64_t test_vcvtps_u64_f32(float32_t a) { + return (uint64_t)vcvtps_u64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtpd_u32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTPD_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTPD_U32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +uint32_t test_vcvtpd_u32_f64(float64_t a) { + return (uint32_t)vcvtpd_u32_f64(a); +} + // CHECK-LABEL: define {{[^@]+}}@test_vcvts_s32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -196,6 +372,28 @@ int64_t test_vcvtd_s64_f64(float64_t a) { return (int64_t)vcvtd_s64_f64(a); } +// CHECK-LABEL: define {{[^@]+}}@test_vcvts_s64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTS_S64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +int64_t test_vcvts_s64_f32(float32_t a) { + return (int64_t)vcvts_s64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtd_s32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTD_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTD_S32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int32_t test_vcvtd_s32_f64(float64_t a) { + return (int32_t)vcvtd_s32_f64(a); +} + // CHECK-LABEL: define {{[^@]+}}@test_vcvts_u32_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: @@ -215,3 +413,26 @@ uint32_t test_vcvts_u32_f32(float32_t a) { uint64_t test_vcvtd_u64_f64(float64_t a) { return (uint64_t)vcvtd_u64_f64(a); } + +// CHECK-LABEL: define {{[^@]+}}@test_vcvts_u64_f32 +// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTS_U64_F32_I]] to i64 +// CHECK-NEXT: ret i64 [[CONV]] +// +uint64_t test_vcvts_u64_f32(float32_t a) { + return (uint64_t)vcvts_u64_f32(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vcvtd_u32_f64 +// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VCVTD_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double [[A]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTD_U32_F64_I]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +uint32_t test_vcvtd_u32_f64(float64_t a) { + return (uint32_t)vcvtd_u32_f64(a); +} + From d2f2b17e391a01a56a45bae9febc72f959a2e82d Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Wed, 15 Oct 2025 16:53:42 +0100 Subject: [PATCH 2/4] fixup! [AArch64][llvm] Add support for new vcvt* intrinsics Turn clang-format off for NEONMAP*() macros --- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 15aa6d966db56..4ebcbca2cc56e 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -590,6 +590,7 @@ struct ARMVectorIntrinsicInfo { Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ TypeModifier } +// clang-format off static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0), NEONMAP0(splat_lane_v), @@ -1442,6 +1443,7 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType), NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), }; +// clang-format on // Some intrinsics are equivalent for codegen. static const std::pair NEONEquivalentIntrinsicMap[] = { From 8983c905861f6a12ffdc47452f85bcc95b62d42b Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Wed, 15 Oct 2025 20:35:55 +0100 Subject: [PATCH 3/4] fixup! [AArch64][llvm] Add support for new vcvt* intrinsics Double or halve element width in arm_neon.td, to ensure no conversion is done when calling intrinsic. --- clang/include/clang/Basic/arm_neon.td | 40 +++---- .../CodeGen/AArch64/neon-fcvt-intrinsics.c | 100 +++++++----------- 2 files changed, 60 insertions(+), 80 deletions(-) diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 315c60692dcaf..65cf5ee4af6cd 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1469,46 +1469,46 @@ def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">; def SCALAR_FCVTN_F32toSS : SInst<"vcvtn_s32", "(1S)1", "Sf">; def SCALAR_FCVTN_F32toUS : SInst<"vcvtn_u32", "(1U)1", "Sf">; -def SCALAR_FCVTN_F64toSS : SInst<"vcvtn_s32", "(1S)1", "Sd">; -def SCALAR_FCVTN_F64toUS : SInst<"vcvtn_u32", "(1U)1", "Sd">; -def SCALAR_FCVTN_F32toSD : SInst<"vcvtn_s64", "(1S)1", "Sf">; -def SCALAR_FCVTN_F32toUD : SInst<"vcvtn_u64", "(1U)1", "Sf">; +def SCALAR_FCVTN_F64toSS : SInst<"vcvtn_s32", "(1S<)1", "Sd">; +def SCALAR_FCVTN_F64toUS : SInst<"vcvtn_u32", "(1U<)1", "Sd">; +def SCALAR_FCVTN_F32toSD : SInst<"vcvtn_s64", "(1S>)1", "Sf">; +def SCALAR_FCVTN_F32toUD : SInst<"vcvtn_u64", "(1U>)1", "Sf">; def SCALAR_FCVTN_F64toSD : SInst<"vcvtn_s64", "(1S)1", "Sd">; def SCALAR_FCVTN_F64toUD : SInst<"vcvtn_u64", "(1U)1", "Sd">; def SCALAR_FCVTM_F32toSS : SInst<"vcvtm_s32", "(1S)1", "Sf">; def SCALAR_FCVTM_F32toUS : SInst<"vcvtm_u32", "(1U)1", "Sf">; -def SCALAR_FCVTM_F64toSS : SInst<"vcvtm_s32", "(1S)1", "Sd">; -def SCALAR_FCVTM_F64toUS : SInst<"vcvtm_u32", "(1U)1", "Sd">; -def SCALAR_FCVTM_F32toSD : SInst<"vcvtm_s64", "(1S)1", "Sf">; -def SCALAR_FCVTM_F32toUD : SInst<"vcvtm_u64", "(1U)1", "Sf">; +def SCALAR_FCVTM_F64toSS : SInst<"vcvtm_s32", "(1S<)1", "Sd">; +def SCALAR_FCVTM_F64toUS : SInst<"vcvtm_u32", "(1U<)1", "Sd">; +def SCALAR_FCVTM_F32toSD : SInst<"vcvtm_s64", "(1S>)1", "Sf">; +def SCALAR_FCVTM_F32toUD : SInst<"vcvtm_u64", "(1U>)1", "Sf">; def SCALAR_FCVTM_F64toSD : SInst<"vcvtm_s64", "(1S)1", "Sd">; def SCALAR_FCVTM_F64toUD : SInst<"vcvtm_u64", "(1U)1", "Sd">; def SCALAR_FCVTA_F32toSS : SInst<"vcvta_s32", "(1S)1", "Sf">; def SCALAR_FCVTA_F32toUS : SInst<"vcvta_u32", "(1U)1", "Sf">; -def SCALAR_FCVTA_F64toSS : SInst<"vcvta_s32", "(1S)1", "Sd">; -def SCALAR_FCVTA_F64toUS : SInst<"vcvta_u32", "(1U)1", "Sd">; -def SCALAR_FCVTA_F32toSD : SInst<"vcvta_s64", "(1S)1", "Sf">; -def SCALAR_FCVTA_F32toUD : SInst<"vcvta_u64", "(1U)1", "Sf">; +def SCALAR_FCVTA_F64toSS : SInst<"vcvta_s32", "(1S<)1", "Sd">; +def SCALAR_FCVTA_F64toUS : SInst<"vcvta_u32", "(1U<)1", "Sd">; +def SCALAR_FCVTA_F32toSD : SInst<"vcvta_s64", "(1S>)1", "Sf">; +def SCALAR_FCVTA_F32toUD : SInst<"vcvta_u64", "(1U>)1", "Sf">; def SCALAR_FCVTA_F64toSD : SInst<"vcvta_s64", "(1S)1", "Sd">; def SCALAR_FCVTA_F64toUD : SInst<"vcvta_u64", "(1U)1", "Sd">; def SCALAR_FCVTP_F32toSS : SInst<"vcvtp_s32", "(1S)1", "Sf">; def SCALAR_FCVTP_F32toUS : SInst<"vcvtp_u32", "(1U)1", "Sf">; -def SCALAR_FCVTP_F64toSS : SInst<"vcvtp_s32", "(1S)1", "Sd">; -def SCALAR_FCVTP_F64toUS : SInst<"vcvtp_u32", "(1U)1", "Sd">; -def SCALAR_FCVTP_F32toSD : SInst<"vcvtp_s64", "(1S)1", "Sf">; -def SCALAR_FCVTP_F32toUD : SInst<"vcvtp_u64", "(1U)1", "Sf">; +def SCALAR_FCVTP_F64toSS : SInst<"vcvtp_s32", "(1S<)1", "Sd">; +def SCALAR_FCVTP_F64toUS : SInst<"vcvtp_u32", "(1U<)1", "Sd">; +def SCALAR_FCVTP_F32toSD : SInst<"vcvtp_s64", "(1S>)1", "Sf">; +def SCALAR_FCVTP_F32toUD : SInst<"vcvtp_u64", "(1U>)1", "Sf">; def SCALAR_FCVTP_F64toSD : SInst<"vcvtp_s64", "(1S)1", "Sd">; def SCALAR_FCVTP_F64toUD : SInst<"vcvtp_u64", "(1U)1", "Sd">; def SCALAR_FCVTZ_F32toSS : SInst<"vcvt_s32", "(1S)1", "Sf">; def SCALAR_FCVTZ_F32toUS : SInst<"vcvt_u32", "(1U)1", "Sf">; -def SCALAR_FCVTZ_F64toSS : SInst<"vcvt_s32", "(1S)1", "Sd">; -def SCALAR_FCVTZ_F64toUS : SInst<"vcvt_u32", "(1U)1", "Sd">; -def SCALAR_FCVTZ_F32toSD : SInst<"vcvt_s64", "(1S)1", "Sf">; -def SCALAR_FCVTZ_F32toUD : SInst<"vcvt_u64", "(1U)1", "Sf">; +def SCALAR_FCVTZ_F64toSS : SInst<"vcvt_s32", "(1S<)1", "Sd">; +def SCALAR_FCVTZ_F64toUS : SInst<"vcvt_u32", "(1U<)1", "Sd">; +def SCALAR_FCVTZ_F32toSD : SInst<"vcvt_s64", "(1S>)1", "Sf">; +def SCALAR_FCVTZ_F32toUD : SInst<"vcvt_u64", "(1U>)1", "Sf">; def SCALAR_FCVTZ_F64toSD : SInst<"vcvt_s64", "(1S)1", "Sd">; def SCALAR_FCVTZ_F64toUD : SInst<"vcvt_u64", "(1U)1", "Sd">; diff --git a/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c b/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c index 034f6be9ae2e9..929df94aa60ef 100644 --- a/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c @@ -39,9 +39,8 @@ int64_t test_vcvtad_s64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtas_s64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTAS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTAS_S64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTAS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTAS_S64_F32_I]] // int64_t test_vcvtas_s64_f32(float32_t a) { return (int64_t)vcvtas_s64_f32(a); @@ -50,9 +49,8 @@ int64_t test_vcvtas_s64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtad_s32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTAD_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTAD_S32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTAD_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTAD_S32_F64_I]] // int32_t test_vcvtad_s32_f64(float64_t a) { return (int32_t)vcvtad_s32_f64(a); @@ -81,9 +79,8 @@ uint64_t test_vcvtad_u64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtas_u64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTAS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTAS_U64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTAS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTAS_U64_F32_I]] // uint64_t test_vcvtas_u64_f32(float32_t a) { return (uint64_t)vcvtas_u64_f32(a); @@ -92,9 +89,8 @@ uint64_t test_vcvtas_u64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtad_u32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTAD_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTAD_U32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTAD_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTAD_U32_F64_I]] // uint32_t test_vcvtad_u32_f64(float64_t a) { return (uint32_t)vcvtad_u32_f64(a); @@ -123,9 +119,8 @@ int64_t test_vcvtmd_s64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtms_s64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTMS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTMS_S64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTMS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTMS_S64_F32_I]] // int64_t test_vcvtms_s64_f32(float32_t a) { return (int64_t)vcvtms_s64_f32(a); @@ -134,9 +129,8 @@ int64_t test_vcvtms_s64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtmd_s32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTMD_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTMD_S32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTMD_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTMD_S32_F64_I]] // int32_t test_vcvtmd_s32_f64(float64_t a) { return (int32_t)vcvtmd_s32_f64(a); @@ -165,9 +159,8 @@ uint64_t test_vcvtmd_u64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtms_u64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTMS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTMS_U64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTMS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTMS_U64_F32_I]] // uint64_t test_vcvtms_u64_f32(float32_t a) { return (uint64_t)vcvtms_u64_f32(a); @@ -176,9 +169,8 @@ uint64_t test_vcvtms_u64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtmd_u32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTMD_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTMD_U32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTMD_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTMD_U32_F64_I]] // uint32_t test_vcvtmd_u32_f64(float64_t a) { return (uint32_t)vcvtmd_u32_f64(a); @@ -207,9 +199,8 @@ int64_t test_vcvtnd_s64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtns_s64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTNS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTNS_S64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTNS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTNS_S64_F32_I]] // int64_t test_vcvtns_s64_f32(float32_t a) { return (int64_t)vcvtns_s64_f32(a); @@ -218,9 +209,8 @@ int64_t test_vcvtns_s64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtnd_s32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTND_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTND_S32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTND_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTND_S32_F64_I]] // int32_t test_vcvtnd_s32_f64(float64_t a) { return (int32_t)vcvtnd_s32_f64(a); @@ -249,9 +239,8 @@ uint64_t test_vcvtnd_u64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtns_u64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTNS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTNS_U64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTNS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTNS_U64_F32_I]] // uint64_t test_vcvtns_u64_f32(float32_t a) { return (uint64_t)vcvtns_u64_f32(a); @@ -260,9 +249,8 @@ uint64_t test_vcvtns_u64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtnd_u32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTND_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTND_U32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTND_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTND_U32_F64_I]] // uint32_t test_vcvtnd_u32_f64(float64_t a) { return (uint32_t)vcvtnd_u32_f64(a); @@ -291,9 +279,8 @@ int64_t test_vcvtpd_s64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtps_s64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTPS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTPS_S64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTPS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTPS_S64_F32_I]] // int64_t test_vcvtps_s64_f32(float32_t a) { return (int64_t)vcvtps_s64_f32(a); @@ -302,9 +289,8 @@ int64_t test_vcvtps_s64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtpd_s32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTPD_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTPD_S32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTPD_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTPD_S32_F64_I]] // int32_t test_vcvtpd_s32_f64(float64_t a) { return (int32_t)vcvtpd_s32_f64(a); @@ -333,9 +319,8 @@ uint64_t test_vcvtpd_u64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtps_u64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTPS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTPS_U64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTPS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTPS_U64_F32_I]] // uint64_t test_vcvtps_u64_f32(float32_t a) { return (uint64_t)vcvtps_u64_f32(a); @@ -344,9 +329,8 @@ uint64_t test_vcvtps_u64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtpd_u32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTPD_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTPD_U32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTPD_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTPD_U32_F64_I]] // uint32_t test_vcvtpd_u32_f64(float64_t a) { return (uint32_t)vcvtpd_u32_f64(a); @@ -375,9 +359,8 @@ int64_t test_vcvtd_s64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvts_s64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTS_S64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[VCVTS_S64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTS_S64_F32_I]] // int64_t test_vcvts_s64_f32(float32_t a) { return (int64_t)vcvts_s64_f32(a); @@ -386,9 +369,8 @@ int64_t test_vcvts_s64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtd_s32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTD_S32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTD_S32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTD_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTD_S32_F64_I]] // int32_t test_vcvtd_s32_f64(float64_t a) { return (int32_t)vcvtd_s32_f64(a); @@ -417,9 +399,8 @@ uint64_t test_vcvtd_u64_f64(float64_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvts_u64_f32 // CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTS_U64_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = zext i32 [[VCVTS_U64_F32_I]] to i64 -// CHECK-NEXT: ret i64 [[CONV]] +// CHECK-NEXT: [[VCVTS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float [[A]]) +// CHECK-NEXT: ret i64 [[VCVTS_U64_F32_I]] // uint64_t test_vcvts_u64_f32(float32_t a) { return (uint64_t)vcvts_u64_f32(a); @@ -428,9 +409,8 @@ uint64_t test_vcvts_u64_f32(float32_t a) { // CHECK-LABEL: define {{[^@]+}}@test_vcvtd_u32_f64 // CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VCVTD_U32_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double [[A]]) -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VCVTD_U32_F64_I]] to i32 -// CHECK-NEXT: ret i32 [[CONV]] +// CHECK-NEXT: [[VCVTD_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double [[A]]) +// CHECK-NEXT: ret i32 [[VCVTD_U32_F64_I]] // uint32_t test_vcvtd_u32_f64(float64_t a) { return (uint32_t)vcvtd_u32_f64(a); From 43650930bf13ea2934b07eacb8fcca08b5cd6ba9 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Wed, 15 Oct 2025 23:38:59 +0100 Subject: [PATCH 4/4] fixup! [AArch64][llvm] Add support for new vcvt* intrinsics Add a mention of new intrinsics to clang/docs/ReleaseNotes.rst --- clang/docs/ReleaseNotes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 65b086caf3652..e8cf24aa64f20 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -509,6 +509,8 @@ X86 Support Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ +- More intrinsics for the following AArch64 instructions: + FCVTZ[US], FCVTN[US], FCVTM[US], FCVTP[US], FCVTA[US] Android Support ^^^^^^^^^^^^^^^