Skip to content

Commit

Permalink
[SVE] Restrict cmp+and->pred_cmp isel to instances where the and is t…
Browse files Browse the repository at this point in the history
…he sole user of the compare.

Without the single use restriction we may replace the and with a
more costly duplicated compare.

Differential Revision: https://reviews.llvm.org/D145755
  • Loading branch information
paulwalker-arm committed Mar 13, 2023
1 parent 58825d2 commit 3870857
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 18 deletions.
16 changes: 10 additions & 6 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Expand Up @@ -47,6 +47,10 @@ def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
]>;

def AArch64setcc_z : SDNode<"AArch64ISD::SETCC_MERGE_ZERO", SDT_AArch64Setcc>;
def AArch64setcc_z_oneuse : PatFrag<(ops node:$pg, node:$op1, node:$op2, node:$cc),
(AArch64setcc_z node:$pg, node:$op1, node:$op2, node:$cc), [{
return N->hasOneUse();
}]>;

def SVEPatternOperand : AsmOperandClass {
let Name = "SVEPattern";
Expand Down Expand Up @@ -5028,9 +5032,9 @@ multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
(cmp $Op1, $Op2, $Op3)>;
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)),
(cmp $Op1, $Op3, $Op2)>;
def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
(cmp $Pg, $Op2, $Op3)>;
def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
(cmp $Pg, $Op3, $Op2)>;
}

Expand All @@ -5040,9 +5044,9 @@ multiclass SVE_SETCC_Pat_With_Zero<CondCode cc, CondCode invcc, ValueType predvt
(cmp $Op1, $Op2)>;
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)),
(cmp $Op1, $Op2)>;
def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
(cmp $Pg, $Op1)>;
def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
(cmp $Pg, $Op1)>;
}

Expand Down Expand Up @@ -5126,13 +5130,13 @@ multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
commuted_cc)),
(cmp $Pg, $Zs1, immtype:$imm)>;
def : Pat<(predvt (and predvt:$Pg,
(AArch64setcc_z (predvt (AArch64ptrue 31)),
(AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
(intvt ZPR:$Zs1),
(intvt (splat_vector (immtype:$imm))),
cc))),
(cmp $Pg, $Zs1, immtype:$imm)>;
def : Pat<(predvt (and predvt:$Pg,
(AArch64setcc_z (predvt (AArch64ptrue 31)),
(AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
(intvt (splat_vector (immtype:$imm))),
(intvt ZPR:$Zs1),
commuted_cc))),
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sve-fcmp.ll
Expand Up @@ -493,8 +493,8 @@ define %svboolx2 @and_of_multiuse_fcmp_ogt(<vscale x 4 x i1> %pg, <vscale x 4 x
; CHECK-LABEL: and_of_multiuse_fcmp_ogt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, z1.s
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp ogt <vscale x 4 x float> %x, %y
%and = and <vscale x 4 x i1> %pg, %cmp
Expand All @@ -507,8 +507,8 @@ define %svboolx2 @and_of_multiuse_fcmp_ogt_zero(<vscale x 4 x i1> %pg, <vscale x
; CHECK-LABEL: and_of_multiuse_fcmp_ogt_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, #0.0
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp ogt <vscale x 4 x float> %x, zeroinitializer
%and = and <vscale x 4 x i1> %pg, %cmp
Expand All @@ -521,8 +521,8 @@ define %svboolx2 @and_of_multiuse_fcmp_olt(<vscale x 4 x i1> %pg, <vscale x 4 x
; CHECK-LABEL: and_of_multiuse_fcmp_olt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: fcmgt p1.s, p1/z, z1.s, z0.s
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp olt <vscale x 4 x float> %x, %y
%and = and <vscale x 4 x i1> %pg, %cmp
Expand All @@ -535,8 +535,8 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x
; CHECK-LABEL: and_of_multiuse_fcmp_olt_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fcmlt p1.s, p1/z, z0.s, #0.0
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp olt <vscale x 4 x float> %x, zeroinitializer
%and = and <vscale x 4 x i1> %pg, %cmp
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
Expand Up @@ -23,12 +23,11 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
; CHECK-NEXT: sxtw z5.d, p0/m, z6.d
; CHECK-NEXT: smin z4.d, p0/m, z4.d, z5.d
; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0
; CHECK-NEXT: ld1w { z5.d }, p1/z, [x1]
; CHECK-NEXT: ld1w { z4.d }, p1/z, [x1]
; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d]
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z5.s
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
; CHECK-NEXT: not p2.b, p0/z, p2.b
; CHECK-NEXT: cmpne p2.d, p2/z, z4.d, #0
; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b
; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0
; CHECK-NEXT: add z2.d, p1/m, z2.d, z1.d
; CHECK-NEXT: uaddv d0, p0, z2.d
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
Expand Up @@ -1223,8 +1223,8 @@ define %svboolx2 @and_of_multiuse_icmp_sle(<vscale x 4 x i1> %a, <vscale x 4 x i
; CHECK-LABEL: and_of_multiuse_icmp_sle:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: cmpge p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: cmpge p1.s, p1/z, z1.s, z0.s
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = icmp sle <vscale x 4 x i32> %b, %c
%and = and <vscale x 4 x i1> %a, %cmp
Expand All @@ -1237,8 +1237,8 @@ define %svboolx2 @and_of_multiuse_icmp_sle_imm(<vscale x 4 x i1> %a, <vscale x 4
; CHECK-LABEL: and_of_multiuse_icmp_sle_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: cmple p0.s, p0/z, z0.s, #1
; CHECK-NEXT: cmple p1.s, p1/z, z0.s, #1
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%imm = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i64 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp sle <vscale x 4 x i32> %b, %imm
Expand All @@ -1252,8 +1252,8 @@ define %svboolx2 @and_of_multiuse_icmp_ugt(<vscale x 4 x i1> %a, <vscale x 4 x i
; CHECK-LABEL: and_of_multiuse_icmp_ugt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cmphi p1.s, p1/z, z0.s, z1.s
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = icmp ugt <vscale x 4 x i32> %b, %c
%and = and <vscale x 4 x i1> %a, %cmp
Expand All @@ -1266,8 +1266,8 @@ define %svboolx2 @and_of_multiuse_icmp_ugt_imm(<vscale x 4 x i1> %a, <vscale x 4
; CHECK-LABEL: and_of_multiuse_icmp_ugt_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, #1
; CHECK-NEXT: cmphi p1.s, p1/z, z0.s, #1
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%imm = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i64 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp ugt <vscale x 4 x i32> %b, %imm
Expand Down

0 comments on commit 3870857

Please sign in to comment.