Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64] Alter latency of FCSEL under Cortex-A510 #80178

Merged
merged 1 commit into from
Feb 1, 2024

Conversation

davemgreen
Copy link
Collaborator

As per the Cortex-A510 software optimization guide, the latency of a fcsel should be 3 not 4. It would previously get the latency from WriteF.

As per the Cortex-A510 software optimization guide, the latency of a fcsel
should be 3 not 4. It would previously get the latency from WriteF.
@llvmbot
Copy link
Collaborator

llvmbot commented Jan 31, 2024

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

As per the Cortex-A510 software optimization guide, the latency of a fcsel should be 3 not 4. It would previously get the latency from WriteF.


Patch is 25.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80178.diff

6 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64SchedA510.td (+2)
  • (modified) llvm/test/CodeGen/AArch64/select_fmf.ll (+8-8)
  • (modified) llvm/test/CodeGen/AArch64/tbl-loops.ll (+4-4)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll (+70-70)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll (+70-70)
  • (modified) llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s (+2-2)
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 1b66d6bb8fbd4..5e36b6f4d34a2 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -394,6 +394,8 @@ def : InstRW<[CortexA510WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
 def : InstRW<[CortexA510WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
 def : InstRW<[CortexA510WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
 
+def : InstRW<[CortexA510WriteFPALU_F3], (instrs FCSELHrrr, FCSELSrrr, FCSELDrrr)>;
+
 // 4.15. Advanced SIMD integer instructions
 // ASIMD absolute diff
 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 5479e5f3b88d2..92d8676ca04be 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -9,11 +9,11 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnm s4, s1, s2
 ; CHECK-NEXT:    fcmp s1, s2
-; CHECK-NEXT:    fmaxnm s1, s0, s3
+; CHECK-NEXT:    fmaxnm s2, s0, s3
+; CHECK-NEXT:    fmov s1, #0.50000000
 ; CHECK-NEXT:    fccmp s4, s0, #4, lt
-; CHECK-NEXT:    fmov s4, #0.50000000
-; CHECK-NEXT:    fcsel s2, s1, s0, gt
-; CHECK-NEXT:    fadd s1, s0, s4
+; CHECK-NEXT:    fadd s1, s0, s1
+; CHECK-NEXT:    fcsel s2, s2, s0, gt
 ; CHECK-NEXT:    fadd s4, s1, s2
 ; CHECK-NEXT:    fcmp s4, s1
 ; CHECK-NEXT:    b.le .LBB0_2
@@ -67,11 +67,11 @@ define float @select_select_fold_select_or(float %w, float %x, float %y, float %
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnm s4, s1, s2
 ; CHECK-NEXT:    fcmp s1, s2
-; CHECK-NEXT:    fmaxnm s1, s0, s3
+; CHECK-NEXT:    fmaxnm s2, s0, s3
+; CHECK-NEXT:    fmov s1, #0.50000000
 ; CHECK-NEXT:    fccmp s4, s0, #0, ge
-; CHECK-NEXT:    fmov s4, #0.50000000
-; CHECK-NEXT:    fcsel s2, s0, s1, gt
-; CHECK-NEXT:    fadd s1, s0, s4
+; CHECK-NEXT:    fadd s1, s0, s1
+; CHECK-NEXT:    fcsel s2, s0, s2, gt
 ; CHECK-NEXT:    fadd s4, s1, s2
 ; CHECK-NEXT:    fcmp s4, s1
 ; CHECK-NEXT:    b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index 365fe03ab0b08..4f8a4f7aede3e 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -562,25 +562,25 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    fcmp s3, s1
 ; CHECK-NEXT:    fcsel s4, s1, s3, gt
 ; CHECK-NEXT:    fcmp s3, #0.0
-; CHECK-NEXT:    ldp s3, s5, [x8, #8]
 ; CHECK-NEXT:    fcvtzs w11, s2
+; CHECK-NEXT:    ldp s3, s5, [x8, #8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    fcsel s4, s0, s4, mi
 ; CHECK-NEXT:    fcmp s3, s1
 ; CHECK-NEXT:    strb w11, [x9]
+; CHECK-NEXT:    fcvtzs w12, s4
 ; CHECK-NEXT:    fcsel s6, s1, s3, gt
 ; CHECK-NEXT:    fcmp s3, #0.0
-; CHECK-NEXT:    fcvtzs w12, s4
 ; CHECK-NEXT:    fcsel s3, s0, s6, mi
 ; CHECK-NEXT:    fcmp s5, s1
 ; CHECK-NEXT:    strb w12, [x9, #1]
 ; CHECK-NEXT:    fcsel s6, s1, s5, gt
 ; CHECK-NEXT:    fcmp s5, #0.0
 ; CHECK-NEXT:    fcvtzs w13, s3
-; CHECK-NEXT:    fcsel s5, s0, s6, mi
+; CHECK-NEXT:    fcsel s2, s0, s6, mi
 ; CHECK-NEXT:    subs w10, w10, #1
 ; CHECK-NEXT:    strb w13, [x9, #2]
-; CHECK-NEXT:    fcvtzs w14, s5
+; CHECK-NEXT:    fcvtzs w14, s2
 ; CHECK-NEXT:    strb w14, [x9, #3]
 ; CHECK-NEXT:    add x9, x9, #4
 ; CHECK-NEXT:    b.ne .LBB3_6
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 4f1e3fdc34fcd..16b34cce93293 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[3]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[4]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[5]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[6]
 ; CHECK-NOFP-SD-NEXT:    mov h1, v1.h[7]
 ; CHECK-NOFP-SD-NEXT:    mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    fcmp s0, s1
+; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s0, s0, s1, gt
 ; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
-; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h0, s0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, gt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, gt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    ldr h2, [x8, :lo12:.LCPI14_0]
 ; CHECK-NOFP-NEXT:    mov w8, #-8388608 // =0xff800000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcsel s1, s4, s1, gt
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    ret
@@ -527,6 +527,7 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, gt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -534,8 +535,8 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, gt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
@@ -544,50 +545,49 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    mov w8, #57344 // =0xe000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-NEXT:    movk w8, #51071, lsl #16
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcsel s1, s4, s1, gt
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index a2bfc3c438da3..497109dfeaf09 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[3]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[4]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[5]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[6]
 ; CHECK-NOFP-SD-NEXT:    mov h1, v1.h[7]
 ; CHECK-NOFP-SD-NEXT:    mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    fcmp s0, s1
+; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s0, s0, s1, lt
 ; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
-; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h0, s0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, lt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, lt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    ldr h2, [x8, :lo12:.LCPI14_0]
 ; CHECK-NOFP-NEXT:    mov w8, #2139095040 // =0x7f800000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, lt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+...
[truncated]

@harviniriawan
Copy link
Contributor

Thank you Dave

Copy link
Collaborator

@sjoerdmeijer sjoerdmeijer left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I also see 3 in the SWOG.

@davemgreen davemgreen merged commit 5d41788 into llvm:main Feb 1, 2024
4 of 5 checks passed
@davemgreen davemgreen deleted the gh-a510-fcsel3 branch February 1, 2024 13:42
smithp35 pushed a commit to smithp35/llvm-project that referenced this pull request Feb 1, 2024
As per the Cortex-A510 software optimization guide, the latency of a
fcsel should be 3 not 4. It would previously get the latency from
WriteF.
carlosgalvezp pushed a commit to carlosgalvezp/llvm-project that referenced this pull request Feb 1, 2024
As per the Cortex-A510 software optimization guide, the latency of a
fcsel should be 3 not 4. It would previously get the latency from
WriteF.
agozillon pushed a commit to agozillon/llvm-project that referenced this pull request Feb 5, 2024
As per the Cortex-A510 software optimization guide, the latency of a
fcsel should be 3 not 4. It would previously get the latency from
WriteF.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants