From 26573705858fd4dc0f204b2864db7c51a9043034 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 2 Sep 2025 06:18:28 +0100 Subject: [PATCH] [AArch64] Guard fptosi+sitofp patterns with one use checks. Otherwise we can end up with more instructions, needing to emit both `fcvtzu w0, s0` and fcvtzu s0, s0`. --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 16 ++++++++++------ llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll | 18 ++++++------------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index ce40e202f30f5..62b26b5239365 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6706,20 +6706,24 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), // Some float -> int -> float conversion patterns for which we want to keep the // int values in FP registers using the corresponding NEON instructions to // avoid more costly int <-> fp register transfers. +let HasOneUse = 1 in { +def any_fp_to_sint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_sint $src0)>; +def any_fp_to_uint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_uint $src0)>; +} let Predicates = [HasNEONandIsSME2p2StreamingSafe] in { -def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), +def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint_oneuse f64:$Rn)))), (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; -def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), +def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f32:$Rn)))), (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; -def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), +def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint_oneuse f64:$Rn)))), (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; -def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), +def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f32:$Rn)))), (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in { -def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), +def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f16:$Rn)))), (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; -def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), +def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f16:$Rn)))), (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; } diff --git a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll index d4caf64294f45..1207de746894b 100644 --- a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll +++ b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll @@ -122,9 +122,8 @@ entry: define i64 @testu_f64_multiuse(double %x) { ; CHECK-LABEL: testu_f64_multiuse: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu d1, d0 ; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: ucvtf d1, d1 +; CHECK-NEXT: ucvtf d1, x8 ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: csel x0, x8, xzr, eq ; CHECK-NEXT: ret @@ -139,9 +138,8 @@ entry: define i32 @testu_f32_multiuse(float %x) { ; CHECK-LABEL: testu_f32_multiuse: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu s1, s0 ; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: ucvtf s1, s1 +; CHECK-NEXT: ucvtf s1, w8 ; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: csel w0, w8, wzr, eq ; CHECK-NEXT: ret @@ -156,9 +154,8 @@ entry: define i32 @testu_f16_multiuse(half %x) { ; CHECK-LABEL: testu_f16_multiuse: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu h1, h0 ; CHECK-NEXT: fcvtzu w8, h0 -; CHECK-NEXT: ucvtf h1, h1 +; CHECK-NEXT: ucvtf h1, w8 ; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: csel w0, w8, wzr, eq ; CHECK-NEXT: ret @@ -173,9 +170,8 @@ entry: define i64 @tests_f64_multiuse(double %x) { ; CHECK-LABEL: tests_f64_multiuse: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs d1, d0 ; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: scvtf d1, d1 +; CHECK-NEXT: scvtf d1, x8 ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: csel x0, x8, xzr, eq ; CHECK-NEXT: ret @@ -190,9 +186,8 @@ entry: define i32 @tests_f32_multiuse(float %x) { ; CHECK-LABEL: tests_f32_multiuse: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs s1, s0 ; CHECK-NEXT: fcvtzs w8, s0 -; CHECK-NEXT: scvtf s1, s1 +; CHECK-NEXT: scvtf s1, w8 ; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: csel w0, w8, wzr, eq ; CHECK-NEXT: ret @@ -207,9 +202,8 @@ entry: define i32 @tests_f16_multiuse(half %x) { ; CHECK-LABEL: tests_f16_multiuse: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs h1, h0 ; CHECK-NEXT: fcvtzs w8, h0 -; CHECK-NEXT: scvtf h1, h1 +; CHECK-NEXT: scvtf h1, w8 ; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: csel w0, w8, wzr, eq ; CHECK-NEXT: ret