Skip to content

Commit

Permalink
[AArch64] Remove copy instruction between uaddlv and dup
Browse files Browse the repository at this point in the history
If there are copy instructions between uaddlv and dup for transfer from gpr to
fpr, try to remove them with duplane.

Differential Revision: https://reviews.llvm.org/D159267
  • Loading branch information
jaykang10 committed Sep 5, 2023
1 parent d17e6cc commit 67fc0d3
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 4 deletions.
15 changes: 15 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2428,6 +2428,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::UADDLV)
MAKE_CASE(AArch64ISD::SDOT)
MAKE_CASE(AArch64ISD::UDOT)
MAKE_CASE(AArch64ISD::SMINV)
Expand Down Expand Up @@ -5323,6 +5324,20 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
Op.getOperand(1), Op.getOperand(2));
}
case Intrinsic::aarch64_neon_uaddlv: {
EVT OpVT = Op.getOperand(1).getValueType();
EVT ResVT = Op.getValueType();
if (ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8)) {
// In order to avoid insert_subvector, used v4i32 than v2i32.
SDValue UADDLV =
DAG.getNode(AArch64ISD::UADDLV, dl, MVT::v4i32, Op.getOperand(1));
SDValue EXTRACT_VEC_ELT =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, UADDLV,
DAG.getConstant(0, dl, MVT::i64));
return EXTRACT_VEC_ELT;
}
return SDValue();
}
}
}

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,9 @@ enum NodeType : unsigned {
SADDV,
UADDV,

// Unsigned sum Long across Vector
UADDLV,

// Add Pairwise of two vectors
ADDP,
// Add Long Pairwise
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,7 @@ def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;

def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
[(abdu node:$lhs, node:$rhs),
Expand Down Expand Up @@ -6461,6 +6462,12 @@ def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op)))
(v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
ssub))>;

def : Pat<(v4i32 (AArch64uaddlv (v8i8 V64:$Rn))),
(v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$Rn), hsub))>;

def : Pat<(v4i32 (AArch64uaddlv (v16i8 V128:$Rn))),
(v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$Rn), hsub))>;

// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
// In effect, opNode is the same as (scalar_to_vector (IntNode)).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ define void @insert_vec_v2i32_uaddlv_from_v16i8(ptr %0) {
; CHECK-NEXT: movi.2d v1, #0000000000000000
; CHECK-NEXT: uaddlv.16b h0, v0
; CHECK-NEXT: mov.s v1[0], v0[0]
; CHECK-NEXT: ucvtf.2s v1, v1
; CHECK-NEXT: str d1, [x0]
; CHECK-NEXT: ucvtf.2s v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret

entry:
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/dp1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,7 @@ define void @ctpop_i32() {
; CHECK-SDAG-NEXT: fmov d0, x9
; CHECK-SDAG-NEXT: cnt v0.8b, v0.8b
; CHECK-SDAG-NEXT: uaddlv h0, v0.8b
; CHECK-SDAG-NEXT: fmov w9, s0
; CHECK-SDAG-NEXT: str w9, [x8]
; CHECK-SDAG-NEXT: str s0, [x8]
; CHECK-SDAG-NEXT: ret
;
; CHECK-GISEL-LABEL: ctpop_i32:
Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/AArch64/neon-addlv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,21 @@ entry:
%0 = and i32 %vaddlv.i, 65535
ret i32 %0
}

define dso_local <8 x i8> @bar(<8 x i8> noundef %a) local_unnamed_addr #0 {
; CHECK-LABEL: bar:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uaddlv h0, v0.8b
; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: rshrn v0.8b, v0.8h, #3
; CHECK-NEXT: ret
entry:
%vaddlv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
%0 = trunc i32 %vaddlv.i to i16
%vecinit.i = insertelement <8 x i16> undef, i16 %0, i64 0
%vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> poison, <8 x i32> zeroinitializer
%vrshrn_n2 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %vecinit7.i, i32 3)
ret <8 x i8> %vrshrn_n2
}

declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)

0 comments on commit 67fc0d3

Please sign in to comment.