[SVE] Restrict cmp+and->pred_cmp isel to instances where the and is t…

…he sole user of the compare. Without the single use restriction we may replace the and with a more costly duplicated compare. Differential Revision: https://reviews.llvm.org/D145755
llvm · Mar 13, 2023 · 3870857 · 3870857
1 parent 58825d2
commit 3870857
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 18 deletions.
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -47,6 +47,10 @@ def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
 ]>;
 
 def AArch64setcc_z : SDNode<"AArch64ISD::SETCC_MERGE_ZERO", SDT_AArch64Setcc>;
+def AArch64setcc_z_oneuse : PatFrag<(ops node:$pg, node:$op1, node:$op2, node:$cc),
+                                    (AArch64setcc_z node:$pg, node:$op1, node:$op2, node:$cc), [{
+  return N->hasOneUse();
+}]>;
 
 def SVEPatternOperand : AsmOperandClass {
   let Name = "SVEPattern";
@@ -5028,9 +5032,9 @@ multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
             (cmp $Op1, $Op2, $Op3)>;
   def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)),
             (cmp $Op1, $Op3, $Op2)>;
-  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
+  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
             (cmp $Pg, $Op2, $Op3)>;
-  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
+  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
             (cmp $Pg, $Op3, $Op2)>;
 }
 
@@ -5040,9 +5044,9 @@ multiclass SVE_SETCC_Pat_With_Zero<CondCode cc, CondCode invcc, ValueType predvt
             (cmp $Op1, $Op2)>;
   def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)),
             (cmp $Op1, $Op2)>;
-  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
+  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
             (cmp $Pg, $Op1)>;
-  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
+  def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
             (cmp $Pg, $Op1)>;
 }
 
@@ -5126,13 +5130,13 @@ multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
                                     commuted_cc)),
             (cmp $Pg, $Zs1, immtype:$imm)>;
   def : Pat<(predvt (and predvt:$Pg,
-                         (AArch64setcc_z (predvt (AArch64ptrue 31)),
+                         (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
                                          (intvt ZPR:$Zs1),
                                          (intvt (splat_vector (immtype:$imm))),
                                          cc))),
             (cmp $Pg, $Zs1, immtype:$imm)>;
   def : Pat<(predvt (and predvt:$Pg,
-                         (AArch64setcc_z (predvt (AArch64ptrue 31)),
+                         (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
                                          (intvt (splat_vector (immtype:$imm))),
                                          (intvt ZPR:$Zs1),
                                          commuted_cc))),

diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -493,8 +493,8 @@ define %svboolx2 @and_of_multiuse_fcmp_ogt(<vscale x 4 x i1> %pg, <vscale x 4 x
 ; CHECK-LABEL: and_of_multiuse_fcmp_ogt:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    fcmgt p1.s, p1/z, z0.s, z1.s
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %cmp = fcmp ogt <vscale x 4 x float> %x, %y
   %and = and <vscale x 4 x i1> %pg, %cmp
@@ -507,8 +507,8 @@ define %svboolx2 @and_of_multiuse_fcmp_ogt_zero(<vscale x 4 x i1> %pg, <vscale x
 ; CHECK-LABEL: and_of_multiuse_fcmp_ogt_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fcmgt p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %cmp = fcmp ogt <vscale x 4 x float> %x, zeroinitializer
   %and = and <vscale x 4 x i1> %pg, %cmp
@@ -521,8 +521,8 @@ define %svboolx2 @and_of_multiuse_fcmp_olt(<vscale x 4 x i1> %pg, <vscale x 4 x
 ; CHECK-LABEL: and_of_multiuse_fcmp_olt:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fcmgt p0.s, p0/z, z1.s, z0.s
 ; CHECK-NEXT:    fcmgt p1.s, p1/z, z1.s, z0.s
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %cmp = fcmp olt <vscale x 4 x float> %x, %y
   %and = and <vscale x 4 x i1> %pg, %cmp
@@ -535,8 +535,8 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x
 ; CHECK-LABEL: and_of_multiuse_fcmp_olt_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fcmlt p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fcmlt p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %cmp = fcmp olt <vscale x 4 x float> %x, zeroinitializer
   %and = and <vscale x 4 x i1> %pg, %cmp

diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
@@ -23,12 +23,11 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
 ; CHECK-NEXT:    sxtw z5.d, p0/m, z6.d
 ; CHECK-NEXT:    smin z4.d, p0/m, z4.d, z5.d
 ; CHECK-NEXT:    cmpne p1.d, p0/z, z4.d, #0
-; CHECK-NEXT:    ld1w { z5.d }, p1/z, [x1]
+; CHECK-NEXT:    ld1w { z4.d }, p1/z, [x1]
 ; CHECK-NEXT:    ld1w { z0.d }, p1/z, [z0.d]
-; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z4.s
 ; CHECK-NEXT:    fcmge p2.s, p0/z, z0.s, z3.s
-; CHECK-NEXT:    not p2.b, p0/z, p2.b
-; CHECK-NEXT:    cmpne p2.d, p2/z, z4.d, #0
+; CHECK-NEXT:    bic p2.b, p1/z, p1.b, p2.b
 ; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    add z2.d, p1/m, z2.d, z1.d
 ; CHECK-NEXT:    uaddv d0, p0, z2.d

diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
@@ -1223,8 +1223,8 @@ define %svboolx2 @and_of_multiuse_icmp_sle(<vscale x 4 x i1> %a, <vscale x 4 x i
 ; CHECK-LABEL: and_of_multiuse_icmp_sle:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    cmpge p0.s, p0/z, z1.s, z0.s
 ; CHECK-NEXT:    cmpge p1.s, p1/z, z1.s, z0.s
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %cmp = icmp sle <vscale x 4 x i32> %b, %c
   %and = and <vscale x 4 x i1> %a, %cmp
@@ -1237,8 +1237,8 @@ define %svboolx2 @and_of_multiuse_icmp_sle_imm(<vscale x 4 x i1> %a, <vscale x 4
 ; CHECK-LABEL: and_of_multiuse_icmp_sle_imm:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    cmple p0.s, p0/z, z0.s, #1
 ; CHECK-NEXT:    cmple p1.s, p1/z, z0.s, #1
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %imm = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i64 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %cmp = icmp sle <vscale x 4 x i32> %b, %imm
@@ -1252,8 +1252,8 @@ define %svboolx2 @and_of_multiuse_icmp_ugt(<vscale x 4 x i1> %a, <vscale x 4 x i
 ; CHECK-LABEL: and_of_multiuse_icmp_ugt:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    cmphi p1.s, p1/z, z0.s, z1.s
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %cmp = icmp ugt <vscale x 4 x i32> %b, %c
   %and = and <vscale x 4 x i1> %a, %cmp
@@ -1266,8 +1266,8 @@ define %svboolx2 @and_of_multiuse_icmp_ugt_imm(<vscale x 4 x i1> %a, <vscale x 4
 ; CHECK-LABEL: and_of_multiuse_icmp_ugt_imm:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, #1
 ; CHECK-NEXT:    cmphi p1.s, p1/z, z0.s, #1
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %imm = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i64 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %cmp = icmp ugt <vscale x 4 x i32> %b, %imm