Skip to content

Conversation

@Lukacma
Copy link
Contributor

@Lukacma Lukacma commented Dec 18, 2025

This patch adds patterns for lowering FCVT intrinsics followed by scalar_to_vector node into SIMD FCVT instructions. This is done to prevent extra moves from being generated when GPR version would be used.

@llvmbot
Copy link
Member

llvmbot commented Dec 18, 2025

@llvm/pr-subscribers-backend-aarch64

Author: None (Lukacma)

Changes

This patch adds patterns for lowering FCVT intrinsics followed by scalar_to_vector node into SIMD FCVT instructions. This is done to prevent extra moves from being generated when GPR version would be used.


Patch is 77.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172837.diff

9 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+16-1)
  • (modified) llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll (+170)
  • (added) llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s (+1515)
  • (modified) llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll (+333-1)
  • (modified) llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll (+1-2)
  • (modified) llvm/test/CodeGen/AArch64/arm64-neon-copy.ll (+17-40)
  • (modified) llvm/test/CodeGen/AArch64/arm64-vcvt.ll (+8-20)
  • (modified) llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll (+2-4)
  • (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll (+2-4)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c22929f379dfc..447fd9ef66343 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6563,12 +6563,19 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
             (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
   def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
             (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+
+  def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f16 FPR16:$Rn))))),
+            (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
+  def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f32 FPR32:$Rn))))),
+            (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
   }
   def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
             (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
   def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
             (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
-            
+
+  def : Pat<(v1i64 (scalar_to_vector (i64 (OpN (f64 FPR64:$Rn))))),
+            (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
 }
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
 defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
@@ -6611,12 +6618,20 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
             (!cast<Instruction>(INST # DSr) $Rn)>;
   def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))), 
             (!cast<Instruction>(INST # SDr) $Rn)>;
+
+  def : Pat<(v1i64 (scalar_to_vector (i64 (round f16:$Rn)))), 
+            (!cast<Instruction>(INST # DHr) $Rn)>;
+  def : Pat<(v1i64 (scalar_to_vector (i64 (round f32:$Rn)))), 
+            (!cast<Instruction>(INST # DSr) $Rn)>;
   }
   def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))), 
             (!cast<Instruction>(INST # v1i32) $Rn)>;
   def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))), 
             (!cast<Instruction>(INST # v1i64) $Rn)>;
 
+  def : Pat<(v1i64 (scalar_to_vector (i64 (round f64:$Rn)))), 
+            (!cast<Instruction>(INST # v1i64) $Rn)>;
+
   let Predicates = [HasFullFP16] in {
   def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
             (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
index a729772f2897a..ebaca00d2cdb9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -15,6 +15,10 @@
 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd
 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd
 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_h_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzs_scalar_to_vector_s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_h_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcvtzu_scalar_to_vector_s_strict
 
 ;
 ; FPTOI
@@ -1941,3 +1945,169 @@ define double @fcvtzu_dd_simd(double %a) {
   %bc = bitcast i64 %i to double
   ret double %bc
 }
+
+;
+; FPTOI scalar_to_vector
+;
+
+define <1 x i64> @fcvtzs_scalar_to_vector_h(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = fptosi half %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_scalar_to_vector_s(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = fptosi float %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_scalar_to_vector_d(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_d:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    ret
+  %val = fptosi double %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_h(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %val = fptoui half %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_s(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %val = fptoui float %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_d(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_d:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu d0, d0
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ret
+  %val = fptoui double %a to i64
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+;
+; FPTOI scalar_to_vector strictfp
+;
+
+define <1 x i64> @fcvtzs_scalar_to_vector_h_strict(half %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_h_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_h_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict")
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzs_scalar_to_vector_s_strict(float %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzs_scalar_to_vector_s_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzs x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzs_scalar_to_vector_s_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict")
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_h_strict(half %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_h_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, h0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_h_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, h0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict")
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
+
+define <1 x i64> @fcvtzu_scalar_to_vector_s_strict(float %x) {
+; CHECK-NOFPRCVT-LABEL: fcvtzu_scalar_to_vector_s_strict:
+; CHECK-NOFPRCVT:       // %bb.0:
+; CHECK-NOFPRCVT-NEXT:    fcvtzu x8, s0
+; CHECK-NOFPRCVT-NEXT:    fmov d0, x8
+; CHECK-NOFPRCVT-NEXT:    ret
+;
+; CHECK-LABEL: fcvtzu_scalar_to_vector_s_strict:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu d0, s0
+; CHECK-NEXT:    ret
+  %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict")
+  %vec = insertelement <1 x i64> poison, i64 %val, i32 0
+  ret <1 x i64> %vec
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
new file mode 100644
index 0000000000000..0850b306e8c79
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s
@@ -0,0 +1,1515 @@
+	.file	"arm64-cvt-simd-fptoi.ll"
+	.text
+	.globl	test_fptosi_f16_i32_simd        // -- Begin function test_fptosi_f16_i32_simd
+	.p2align	2
+	.type	test_fptosi_f16_i32_simd,@function
+test_fptosi_f16_i32_simd:               // @test_fptosi_f16_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, h0
+	ret
+.Lfunc_end0:
+	.size	test_fptosi_f16_i32_simd, .Lfunc_end0-test_fptosi_f16_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f16_i64_simd        // -- Begin function test_fptosi_f16_i64_simd
+	.p2align	2
+	.type	test_fptosi_f16_i64_simd,@function
+test_fptosi_f16_i64_simd:               // @test_fptosi_f16_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, h0
+	ret
+.Lfunc_end1:
+	.size	test_fptosi_f16_i64_simd, .Lfunc_end1-test_fptosi_f16_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f64_i32_simd        // -- Begin function test_fptosi_f64_i32_simd
+	.p2align	2
+	.type	test_fptosi_f64_i32_simd,@function
+test_fptosi_f64_i32_simd:               // @test_fptosi_f64_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, d0
+	ret
+.Lfunc_end2:
+	.size	test_fptosi_f64_i32_simd, .Lfunc_end2-test_fptosi_f64_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f32_i64_simd        // -- Begin function test_fptosi_f32_i64_simd
+	.p2align	2
+	.type	test_fptosi_f32_i64_simd,@function
+test_fptosi_f32_i64_simd:               // @test_fptosi_f32_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, s0
+	ret
+.Lfunc_end3:
+	.size	test_fptosi_f32_i64_simd, .Lfunc_end3-test_fptosi_f32_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f64_i64_simd        // -- Begin function test_fptosi_f64_i64_simd
+	.p2align	2
+	.type	test_fptosi_f64_i64_simd,@function
+test_fptosi_f64_i64_simd:               // @test_fptosi_f64_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end4:
+	.size	test_fptosi_f64_i64_simd, .Lfunc_end4-test_fptosi_f64_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptosi_f32_i32_simd        // -- Begin function test_fptosi_f32_i32_simd
+	.p2align	2
+	.type	test_fptosi_f32_i32_simd,@function
+test_fptosi_f32_i32_simd:               // @test_fptosi_f32_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, s0
+	ret
+.Lfunc_end5:
+	.size	test_fptosi_f32_i32_simd, .Lfunc_end5-test_fptosi_f32_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f16_i32_simd        // -- Begin function test_fptoui_f16_i32_simd
+	.p2align	2
+	.type	test_fptoui_f16_i32_simd,@function
+test_fptoui_f16_i32_simd:               // @test_fptoui_f16_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, h0
+	ret
+.Lfunc_end6:
+	.size	test_fptoui_f16_i32_simd, .Lfunc_end6-test_fptoui_f16_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f16_i64_simd        // -- Begin function test_fptoui_f16_i64_simd
+	.p2align	2
+	.type	test_fptoui_f16_i64_simd,@function
+test_fptoui_f16_i64_simd:               // @test_fptoui_f16_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, h0
+	ret
+.Lfunc_end7:
+	.size	test_fptoui_f16_i64_simd, .Lfunc_end7-test_fptoui_f16_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f64_i32_simd        // -- Begin function test_fptoui_f64_i32_simd
+	.p2align	2
+	.type	test_fptoui_f64_i32_simd,@function
+test_fptoui_f64_i32_simd:               // @test_fptoui_f64_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, d0
+	ret
+.Lfunc_end8:
+	.size	test_fptoui_f64_i32_simd, .Lfunc_end8-test_fptoui_f64_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f32_i64_simd        // -- Begin function test_fptoui_f32_i64_simd
+	.p2align	2
+	.type	test_fptoui_f32_i64_simd,@function
+test_fptoui_f32_i64_simd:               // @test_fptoui_f32_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, s0
+	ret
+.Lfunc_end9:
+	.size	test_fptoui_f32_i64_simd, .Lfunc_end9-test_fptoui_f32_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f64_i64_simd        // -- Begin function test_fptoui_f64_i64_simd
+	.p2align	2
+	.type	test_fptoui_f64_i64_simd,@function
+test_fptoui_f64_i64_simd:               // @test_fptoui_f64_i64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, d0
+	ret
+.Lfunc_end10:
+	.size	test_fptoui_f64_i64_simd, .Lfunc_end10-test_fptoui_f64_i64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_fptoui_f32_i32_simd        // -- Begin function test_fptoui_f32_i32_simd
+	.p2align	2
+	.type	test_fptoui_f32_i32_simd,@function
+test_fptoui_f32_i32_simd:               // @test_fptoui_f32_i32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, s0
+	ret
+.Lfunc_end11:
+	.size	test_fptoui_f32_i32_simd, .Lfunc_end11-test_fptoui_f32_i32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i32_f16_simd             // -- Begin function fptosi_i32_f16_simd
+	.p2align	2
+	.type	fptosi_i32_f16_simd,@function
+fptosi_i32_f16_simd:                    // @fptosi_i32_f16_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, h0
+	ret
+.Lfunc_end12:
+	.size	fptosi_i32_f16_simd, .Lfunc_end12-fptosi_i32_f16_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i64_f16_simd             // -- Begin function fptosi_i64_f16_simd
+	.p2align	2
+	.type	fptosi_i64_f16_simd,@function
+fptosi_i64_f16_simd:                    // @fptosi_i64_f16_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, h0
+	ret
+.Lfunc_end13:
+	.size	fptosi_i64_f16_simd, .Lfunc_end13-fptosi_i64_f16_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i64_f32_simd             // -- Begin function fptosi_i64_f32_simd
+	.p2align	2
+	.type	fptosi_i64_f32_simd,@function
+fptosi_i64_f32_simd:                    // @fptosi_i64_f32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, s0
+	ret
+.Lfunc_end14:
+	.size	fptosi_i64_f32_simd, .Lfunc_end14-fptosi_i64_f32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i32_f64_simd             // -- Begin function fptosi_i32_f64_simd
+	.p2align	2
+	.type	fptosi_i32_f64_simd,@function
+fptosi_i32_f64_simd:                    // @fptosi_i32_f64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, d0
+	ret
+.Lfunc_end15:
+	.size	fptosi_i32_f64_simd, .Lfunc_end15-fptosi_i32_f64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i64_f64_simd             // -- Begin function fptosi_i64_f64_simd
+	.p2align	2
+	.type	fptosi_i64_f64_simd,@function
+fptosi_i64_f64_simd:                    // @fptosi_i64_f64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	d0, d0
+	ret
+.Lfunc_end16:
+	.size	fptosi_i64_f64_simd, .Lfunc_end16-fptosi_i64_f64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptosi_i32_f32_simd             // -- Begin function fptosi_i32_f32_simd
+	.p2align	2
+	.type	fptosi_i32_f32_simd,@function
+fptosi_i32_f32_simd:                    // @fptosi_i32_f32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzs	s0, s0
+	ret
+.Lfunc_end17:
+	.size	fptosi_i32_f32_simd, .Lfunc_end17-fptosi_i32_f32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i32_f16_simd             // -- Begin function fptoui_i32_f16_simd
+	.p2align	2
+	.type	fptoui_i32_f16_simd,@function
+fptoui_i32_f16_simd:                    // @fptoui_i32_f16_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, h0
+	ret
+.Lfunc_end18:
+	.size	fptoui_i32_f16_simd, .Lfunc_end18-fptoui_i32_f16_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i64_f16_simd             // -- Begin function fptoui_i64_f16_simd
+	.p2align	2
+	.type	fptoui_i64_f16_simd,@function
+fptoui_i64_f16_simd:                    // @fptoui_i64_f16_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, h0
+	ret
+.Lfunc_end19:
+	.size	fptoui_i64_f16_simd, .Lfunc_end19-fptoui_i64_f16_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i64_f32_simd             // -- Begin function fptoui_i64_f32_simd
+	.p2align	2
+	.type	fptoui_i64_f32_simd,@function
+fptoui_i64_f32_simd:                    // @fptoui_i64_f32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, s0
+	ret
+.Lfunc_end20:
+	.size	fptoui_i64_f32_simd, .Lfunc_end20-fptoui_i64_f32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i32_f64_simd             // -- Begin function fptoui_i32_f64_simd
+	.p2align	2
+	.type	fptoui_i32_f64_simd,@function
+fptoui_i32_f64_simd:                    // @fptoui_i32_f64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, d0
+	ret
+.Lfunc_end21:
+	.size	fptoui_i32_f64_simd, .Lfunc_end21-fptoui_i32_f64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i64_f64_simd             // -- Begin function fptoui_i64_f64_simd
+	.p2align	2
+	.type	fptoui_i64_f64_simd,@function
+fptoui_i64_f64_simd:                    // @fptoui_i64_f64_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	d0, d0
+	ret
+.Lfunc_end22:
+	.size	fptoui_i64_f64_simd, .Lfunc_end22-fptoui_i64_f64_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fptoui_i32_f32_simd             // -- Begin function fptoui_i32_f32_simd
+	.p2align	2
+	.type	fptoui_i32_f32_simd,@function
+fptoui_i32_f32_simd:                    // @fptoui_i32_f32_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtzu	s0, s0
+	ret
+.Lfunc_end23:
+	.size	fptoui_i32_f32_simd, .Lfunc_end23-fptoui_i32_f32_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_ds_round_simd            // -- Begin function fcvtas_ds_round_simd
+	.p2align	2
+	.type	fcvtas_ds_round_simd,@function
+fcvtas_ds_round_simd:                   // @fcvtas_ds_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	d0, s0
+	ret
+.Lfunc_end24:
+	.size	fcvtas_ds_round_simd, .Lfunc_end24-fcvtas_ds_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_sd_round_simd            // -- Begin function fcvtas_sd_round_simd
+	.p2align	2
+	.type	fcvtas_sd_round_simd,@function
+fcvtas_sd_round_simd:                   // @fcvtas_sd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	s0, d0
+	ret
+.Lfunc_end25:
+	.size	fcvtas_sd_round_simd, .Lfunc_end25-fcvtas_sd_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_ss_round_simd            // -- Begin function fcvtas_ss_round_simd
+	.p2align	2
+	.type	fcvtas_ss_round_simd,@function
+fcvtas_ss_round_simd:                   // @fcvtas_ss_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	s0, s0
+	ret
+.Lfunc_end26:
+	.size	fcvtas_ss_round_simd, .Lfunc_end26-fcvtas_ss_round_simd
+	.cfi_endproc
+                                        // -- End function
+	.globl	fcvtas_dd_round_simd            // -- Begin function fcvtas_dd_round_simd
+	.p2align	2
+	.type	fcvtas_dd_round_simd,@function
+fcvtas_dd_round_simd:                   // @fcvtas_dd_round_simd
+	.cfi_startproc
+// %bb.0:
+	fcvtas	d0, d0
+	ret
+.Lfunc_end27:
+	.size	fcvtas_dd_round_sim...
[truncated]

Copy link
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

This pull request optimizes floating-point conversion operations for SIMD instructions on AArch64. The optimization eliminates unnecessary register moves between GPR and FPR registers when converting floating-point values to integers and then moving them to vector registers. The patch adds patterns for lowering FCVT intrinsics followed by scalar_to_vector nodes directly into SIMD FCVT instructions.

  • Adds patterns to emit direct SIMD FCVT instructions instead of GPR conversions followed by register moves
  • Updates test expectations to reflect the optimized instruction sequences (removes extra fmov instructions)
  • Covers both standard and strict floating-point conversion intrinsics

Reviewed changes

Copilot reviewed 9 out of 9 changed files in this pull request and generated 18 comments.

Show a summary per file
File Description
llvm/lib/Target/AArch64/AArch64InstrInfo.td Adds instruction selection patterns for scalar_to_vector following FCVT operations
llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll Updates test expectations to verify SIMD FCVT instructions are used
llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll Updates test expectations for constrained FP conversions
llvm/test/CodeGen/AArch64/arm64-vcvt.ll Removes FIXME comments and updates tests for improved code generation
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll Updates test expectations for bitcast and FCVT sequences
llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll Updates test expectations for fixed-point conversion optimizations
llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll Adds comprehensive test coverage for new scalar_to_vector patterns with various FCVT intrinsics
llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.s Adds assembly test file for generated SIMD conversion instructions
llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll Adds LLVM IR test coverage for scalar_to_vector patterns with fptosi/fptoui and strict variants

💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.

@github-actions
Copy link

github-actions bot commented Dec 18, 2025

🪟 Windows x64 Test Results

  • 128783 tests passed
  • 2828 tests skipped

✅ The build succeeded and all tests passed.

@github-actions
Copy link

github-actions bot commented Dec 18, 2025

🐧 Linux x64 Test Results

  • 187631 tests passed
  • 4972 tests skipped

✅ The build succeeded and all tests passed.

}

;
; Intriniscs (scalar_to_vector)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
; Intriniscs (scalar_to_vector)
; Intrinsics (scalar_to_vector)

ret <2 x i64> %tmp3
}

; FIXME: Generate "fcvtzs d0, d0"?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these FIXMEs be removed now?

}



Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: extra lines between tests

; CHECK: // %bb.0:
; CHECK-NEXT: fcvtms d0, h0
; CHECK-NEXT: ret
%vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
%vcvtmh_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)

; CHECK: // %bb.0:
; CHECK-NEXT: fcvtas d0, s0
; CHECK-NEXT: ret
%i = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth updating the tests with %i to something like %vcvtah_s64_f32 to match the others?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants