Skip to content

Commit

Permalink
[AArch64] Handle any extend whilst lowering addw/addl/subw/subl
Browse files Browse the repository at this point in the history
This adds an extra tablegen PatFrag, zanyext, which matches either any
extend or zext and uses that in the aarch64 backend to handle any
extends in addw/addl/subw/subl patterns.

Differential Revision: https://reviews.llvm.org/D93833
  • Loading branch information
davemgreen committed Jan 6, 2021
1 parent 4839378 commit a9b6440
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 72 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Expand Up @@ -920,6 +920,10 @@ def not : PatFrag<(ops node:$in), (xor node:$in, -1)>;
def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>;
def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>;

def zanyext : PatFrags<(ops node:$op),
[(zext node:$op),
(anyext node:$op)]>;

// null_frag - The null pattern operator is used in multiclass instantiations
// which accept an SDPatternOperator for use in matching patterns for internal
// definitions. When expanding a pattern, if the null fragment is referenced
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -4765,18 +4765,18 @@ defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
AArch64uabd>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
BinOpFrag<(add node:$LHS, (zext node:$RHS))>>;
BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>;
BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;

// Additional patterns for SMULL and UMULL
multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
Expand Down
84 changes: 24 additions & 60 deletions llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
Expand Up @@ -103,9 +103,7 @@ entry:
define <8 x i16> @test_vaddl_a8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vaddl_a8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
; CHECK-NEXT: bic v0.8h, #255, lsl #8
; CHECK-NEXT: ret
entry:
Expand All @@ -119,9 +117,7 @@ entry:
define <4 x i32> @test_vaddl_a16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vaddl_a16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand All @@ -136,9 +132,7 @@ entry:
define <2 x i64> @test_vaddl_a32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vaddl_a32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down Expand Up @@ -237,9 +231,7 @@ entry:
define <8 x i16> @test_vaddl_high_a8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vaddl_high_a8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
; CHECK-NEXT: bic v0.8h, #255, lsl #8
; CHECK-NEXT: ret
entry:
Expand All @@ -255,9 +247,7 @@ entry:
define <4 x i32> @test_vaddl_high_a16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddl_high_a16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v1.8h
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand All @@ -274,9 +264,7 @@ entry:
define <2 x i64> @test_vaddl_high_a32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddl_high_a32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: uaddl2 v0.2d, v0.4s, v1.4s
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down Expand Up @@ -359,8 +347,7 @@ entry:
define <8 x i16> @test_vaddw_a8(<8 x i16> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vaddw_a8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
; CHECK-NEXT: bic v0.8h, #255, lsl #8
; CHECK-NEXT: ret
entry:
Expand All @@ -373,8 +360,7 @@ entry:
define <4 x i32> @test_vaddw_a16(<4 x i32> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vaddw_a16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand All @@ -388,8 +374,7 @@ entry:
define <2 x i64> @test_vaddw_a32(<2 x i64> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vaddw_a32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-NEXT: add v0.2d, v1.2d, v0.2d
; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down Expand Up @@ -475,8 +460,7 @@ entry:
define <8 x i16> @test_vaddw_high_a8(<8 x i16> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vaddw_high_a8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
; CHECK-NEXT: uaddw2 v0.8h, v0.8h, v1.16b
; CHECK-NEXT: bic v0.8h, #255, lsl #8
; CHECK-NEXT: ret
entry:
Expand All @@ -490,8 +474,7 @@ entry:
define <4 x i32> @test_vaddw_high_a16(<4 x i32> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddw_high_a16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand All @@ -506,8 +489,7 @@ entry:
define <2 x i64> @test_vaddw_high_a32(<2 x i64> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddw_high_a32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
; CHECK-NEXT: add v0.2d, v1.2d, v0.2d
; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v1.4s
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down Expand Up @@ -594,9 +576,7 @@ entry:
define <8 x i16> @test_vsubl_a8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vsubl_a8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b
; CHECK-NEXT: bic v0.8h, #255, lsl #8
; CHECK-NEXT: ret
entry:
Expand All @@ -610,9 +590,7 @@ entry:
define <4 x i32> @test_vsubl_a16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vsubl_a16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: usubl v0.4s, v0.4h, v1.4h
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand All @@ -627,9 +605,7 @@ entry:
define <2 x i64> @test_vsubl_a32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vsubl_a32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: usubl v0.2d, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down Expand Up @@ -728,9 +704,7 @@ entry:
define <8 x i16> @test_vsubl_high_a8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsubl_high_a8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: usubl2 v0.8h, v0.16b, v1.16b
; CHECK-NEXT: bic v0.8h, #255, lsl #8
; CHECK-NEXT: ret
entry:
Expand All @@ -746,9 +720,7 @@ entry:
define <4 x i32> @test_vsubl_high_a16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubl_high_a16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: usubl2 v0.4s, v0.8h, v1.8h
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand All @@ -765,9 +737,7 @@ entry:
define <2 x i64> @test_vsubl_high_a32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubl_high_a32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: usubl2 v0.2d, v0.4s, v1.4s
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down Expand Up @@ -850,8 +820,7 @@ entry:
define <8 x i16> @test_vsubw_a8(<8 x i16> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vsubw_a8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: usubw v0.8h, v0.8h, v1.8b
; CHECK-NEXT: bic v0.8h, #255, lsl #8
; CHECK-NEXT: ret
entry:
Expand All @@ -864,8 +833,7 @@ entry:
define <4 x i32> @test_vsubw_a16(<4 x i32> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vsubw_a16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: usubw v0.4s, v0.4s, v1.4h
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand All @@ -879,8 +847,7 @@ entry:
define <2 x i64> @test_vsubw_a32(<2 x i64> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vsubw_a32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: usubw v0.2d, v0.2d, v1.2s
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down Expand Up @@ -966,8 +933,7 @@ entry:
define <8 x i16> @test_vsubw_high_a8(<8 x i16> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsubw_high_a8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: usubw2 v0.8h, v0.8h, v1.16b
; CHECK-NEXT: bic v0.8h, #255, lsl #8
; CHECK-NEXT: ret
entry:
Expand All @@ -981,8 +947,7 @@ entry:
define <4 x i32> @test_vsubw_high_a16(<4 x i32> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubw_high_a16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: usubw2 v0.4s, v0.4s, v1.8h
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand All @@ -997,8 +962,7 @@ entry:
define <2 x i64> @test_vsubw_high_a32(<2 x i64> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubw_high_a32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: usubw2 v0.2d, v0.2d, v1.4s
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/lowerMUL-newload.ll
Expand Up @@ -21,10 +21,10 @@ entry:
define <4 x i32> @mlai16_and(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) {
; CHECK-LABEL: mlai16_and:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: umlal v2.4s, v1.4h, v0.4h
; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v2.16b, v0.16b
; CHECK-NEXT: umull v0.4s, v1.4h, v0.4h
; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
entry:
%v0 = sext <4 x i16> %vec0 to <4 x i32>
Expand Down Expand Up @@ -157,10 +157,10 @@ entry:
define <2 x i64> @mlai32_and(<2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> %vec2) {
; CHECK-LABEL: mlai32_and:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-NEXT: umlal v2.2d, v1.2s, v0.2s
; CHECK-NEXT: movi v0.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v2.16b, v0.16b
; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
; CHECK-NEXT: uaddw v0.2d, v0.2d, v2.2s
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
entry:
%v0 = sext <2 x i32> %vec0 to <2 x i64>
Expand Down

0 comments on commit a9b6440

Please sign in to comment.