Skip to content

Commit

Permalink
[AArch64] Combine concat(binop, binop) into binop(concat, concat) (#8…
Browse files Browse the repository at this point in the history
…9911)

This generalizes the existing combine for concat(radd, radd) to any
binops. For much the same reason as the existing code, pushing the
concat up through the tree are hopefully quicker (or the same) as the
existing two half-vector operations, and can help combine away the
concat.
  • Loading branch information
davemgreen committed Apr 25, 2024
1 parent 03b1a0c commit 76ea5fe
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 44 deletions.
12 changes: 5 additions & 7 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18663,14 +18663,12 @@ static SDValue performConcatVectorsCombine(SDNode *N,
if (DCI.isBeforeLegalizeOps())
return SDValue();

// Optimise concat_vectors of two [us]avgceils or [us]avgfloors with a 128-bit
// destination size, combine into an avg of two contacts of the source
// vectors. eg: concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c),
// concat(b, d))
// Optimise concat_vectors of two identical binops with a 128-bit destination
// size, combine into an binop of two contacts of the source vectors. eg:
// concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d))
if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() &&
(N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS ||
N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS) &&
N0->hasOneUse() && N1->hasOneUse()) {
DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() &&
N1->hasOneUse()) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
SDValue N10 = N1->getOperand(0);
Expand Down
81 changes: 50 additions & 31 deletions llvm/test/CodeGen/AArch64/concatbinop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
define <8 x i16> @concat_add(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; CHECK-LABEL: concat_add:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.4h, v2.4h, v3.4h
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: mov v1.d[1], v3.d[0]
; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%x = add <4 x i16> %a, %b
%y = add <4 x i16> %c, %d
Expand All @@ -33,13 +37,9 @@ define <8 x i16> @concat_addtunc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
; CHECK-LABEL: concat_addtunc2:
; CHECK: // %bb.0:
; CHECK-NEXT: xtn v1.4h, v1.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: xtn v2.4h, v2.4s
; CHECK-NEXT: xtn v3.4h, v3.4s
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: add v1.4h, v2.4h, v3.4h
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: uzp1 v1.8h, v1.8h, v3.8h
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%at = trunc <4 x i32> %a to <4 x i16>
%bt = trunc <4 x i32> %b to <4 x i16>
Expand All @@ -54,9 +54,13 @@ define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; CHECK-LABEL: concat_sub:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v2.4h, v2.4h, v3.4h
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: mov v1.d[1], v3.d[0]
; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%x = sub <4 x i16> %a, %b
%y = sub <4 x i16> %c, %d
Expand All @@ -67,9 +71,13 @@ define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; CHECK-LABEL: concat_mul:
; CHECK: // %bb.0:
; CHECK-NEXT: mul v2.4h, v2.4h, v3.4h
; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: mov v1.d[1], v3.d[0]
; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%x = mul <4 x i16> %a, %b
%y = mul <4 x i16> %c, %d
Expand All @@ -80,9 +88,13 @@ define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; CHECK-LABEL: concat_xor:
; CHECK: // %bb.0:
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: mov v1.d[1], v3.d[0]
; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%x = xor <4 x i16> %a, %b
%y = xor <4 x i16> %c, %d
Expand All @@ -93,9 +105,13 @@ define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
; CHECK-LABEL: concat_fadd:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h
; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: mov v1.d[1], v3.d[0]
; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%x = fadd <4 x half> %a, %b
%y = fadd <4 x half> %c, %d
Expand All @@ -106,9 +122,13 @@ define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
; CHECK-LABEL: concat_fmul:
; CHECK: // %bb.0:
; CHECK-NEXT: fmul v2.4h, v2.4h, v3.4h
; CHECK-NEXT: fmul v0.4h, v0.4h, v1.4h
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: mov v1.d[1], v3.d[0]
; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: fmul v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%x = fmul <4 x half> %a, %b
%y = fmul <4 x half> %c, %d
Expand All @@ -119,9 +139,13 @@ define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
define <8 x half> @concat_min(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
; CHECK-LABEL: concat_min:
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm v2.4h, v2.4h, v3.4h
; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: mov v1.d[1], v3.d[0]
; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
%y = call <4 x half> @llvm.minnum.v4f16(<4 x half> %c, <4 x half> %d)
Expand All @@ -146,21 +170,16 @@ define <16 x i8> @signOf_neon(ptr nocapture noundef readonly %a, ptr nocapture n
; CHECK-LABEL: signOf_neon:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: movi v0.8b, #1
; CHECK-NEXT: movi v0.16b, #1
; CHECK-NEXT: ldp q3, q4, [x1]
; CHECK-NEXT: cmhi v5.8h, v1.8h, v3.8h
; CHECK-NEXT: cmhi v6.8h, v2.8h, v4.8h
; CHECK-NEXT: cmhi v1.8h, v3.8h, v1.8h
; CHECK-NEXT: cmhi v2.8h, v4.8h, v2.8h
; CHECK-NEXT: xtn v3.8b, v5.8h
; CHECK-NEXT: xtn v4.8b, v6.8h
; CHECK-NEXT: xtn v1.8b, v1.8h
; CHECK-NEXT: xtn v2.8b, v2.8h
; CHECK-NEXT: and v3.8b, v3.8b, v0.8b
; CHECK-NEXT: and v4.8b, v4.8b, v0.8b
; CHECK-NEXT: orr v0.8b, v3.8b, v1.8b
; CHECK-NEXT: orr v1.8b, v4.8b, v2.8b
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: uzp1 v3.16b, v5.16b, v6.16b
; CHECK-NEXT: uzp1 v1.16b, v1.16b, v2.16b
; CHECK-NEXT: and v0.16b, v3.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
entry:
%0 = load <8 x i16>, ptr %a, align 2
Expand Down
14 changes: 8 additions & 6 deletions llvm/test/CodeGen/AArch64/vecreduce-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2825,10 +2825,11 @@ entry:
define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
; CHECK-SD-LABEL: add_pair_v2i16_v2i64_zext:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: movi v2.2d, #0x00ffff0000ffff
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-SD-NEXT: uaddlv d0, v0.4s
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
Expand Down Expand Up @@ -3578,10 +3579,11 @@ entry:
define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
; CHECK-SD-LABEL: add_pair_v2i8_v2i64_zext:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff
; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: movi v2.2d, #0x0000ff000000ff
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-SD-NEXT: uaddlv d0, v0.4s
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
Expand Down

0 comments on commit 76ea5fe

Please sign in to comment.