Skip to content

Commit

Permalink
[AArch64] Increase AddedComplexity of BIC
Browse files Browse the repository at this point in the history
This diff adjusts AddedComplexity of BIC to bump its position
in the list of patterns to make LLVM pick it instead of MVN + AND.
MVN + AND requires 2 cycles, so does e.g. MOV + BIC, but the latter
outperforms the former if the instructions producing the operands of
BIC can be issued in parallel.

One may consider the following example:

ldur x15, [x0, #2] # 4 cycles
mvn x10, x15 # 1 cycle (depends on ldur)
and x9, x10, #0x8080808080808080

vs.

ldur x15, [x0, #2] # 4 cycles
mov x9, #0x8080808080808080 # 1 cycle (can be executed in parallel with ldur)
bic x9, x9, x15. # 1 cycle

Test plan: ninja check-all

Differential revision: https://reviews.llvm.org/D133345
  • Loading branch information
alexander-shaposhnikov committed Sep 6, 2022
1 parent 31f434e commit 6a2442e
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 21 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Expand Up @@ -2994,8 +2994,8 @@ class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>

// Split from LogicalImm as not all instructions have both.
multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
SDPatternOperator OpNode> {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
SDPatternOperator OpNode, int AddedComplexityVal = 0> {
let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = AddedComplexityVal in {
def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -2097,7 +2097,7 @@ defm BICS : LogicalRegS<0b11, 1, "bics",
BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
defm AND : LogicalReg<0b00, 0, "and", and>;
defm BIC : LogicalReg<0b00, 1, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
defm EON : LogicalReg<0b10, 1, "eon",
BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
defm EOR : LogicalReg<0b10, 0, "eor", xor>;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/addsub.ll
Expand Up @@ -373,8 +373,8 @@ declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
define i1 @uadd_add(i8 %a, i8 %b, i8* %p) {
; CHECK-LABEL: uadd_add:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn w8, w0
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: mov w8, #255
; CHECK-NEXT: bic w8, w8, w0
; CHECK-NEXT: add w8, w8, w1, uxtb
; CHECK-NEXT: lsr w0, w8, #8
; CHECK-NEXT: add w8, w8, #1
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
Expand Up @@ -5,10 +5,10 @@ define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) {
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.8b, v0.8b, #0
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: umaxv b0, v0.8b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: mvn w8, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: ret
%cmp1 = icmp eq <8 x i8> %a, zeroinitializer
%cast = bitcast <8 x i1> %cmp1 to i8
Expand All @@ -20,10 +20,10 @@ define i1 @combine_setcc_eq_vecreduce_or_v16i1(<16 x i8> %a) {
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: umaxv b0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: mvn w8, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: ret
%cmp1 = icmp eq <16 x i8> %a, zeroinitializer
%cast = bitcast <16 x i1> %cmp1 to i16
Expand All @@ -35,12 +35,12 @@ define i1 @combine_setcc_eq_vecreduce_or_v32i1(<32 x i8> %a) {
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v1.16b, v1.16b, #0
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umaxv b0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: mvn w8, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: ret
%cmp1 = icmp eq <32 x i8> %a, zeroinitializer
%cast = bitcast <32 x i1> %cmp1 to i32
Expand All @@ -52,16 +52,16 @@ define i1 @combine_setcc_eq_vecreduce_or_v64i1(<64 x i8> %a) {
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v64i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v2.16b, v2.16b, #0
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: cmeq v3.16b, v3.16b, #0
; CHECK-NEXT: cmeq v1.16b, v1.16b, #0
; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umaxv b0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: mvn w8, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: ret
%cmp1 = icmp eq <64 x i8> %a, zeroinitializer
%cast = bitcast <64 x i1> %cmp1 to i64
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/select_const.ll
Expand Up @@ -9,8 +9,8 @@
define i32 @select_0_or_1(i1 %cond) {
; CHECK-LABEL: select_0_or_1:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn w8, w0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: bic w0, w8, w0
; CHECK-NEXT: ret
%sel = select i1 %cond, i32 0, i32 1
ret i32 %sel
Expand All @@ -28,8 +28,8 @@ define i32 @select_0_or_1_zeroext(i1 zeroext %cond) {
define i32 @select_0_or_1_signext(i1 signext %cond) {
; CHECK-LABEL: select_0_or_1_signext:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn w8, w0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: bic w0, w8, w0
; CHECK-NEXT: ret
%sel = select i1 %cond, i32 0, i32 1
ret i32 %sel
Expand Down

0 comments on commit 6a2442e

Please sign in to comment.