diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1e209839f45e7..2bb8e43243066 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2428,6 +2428,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FCMLTz) MAKE_CASE(AArch64ISD::SADDV) MAKE_CASE(AArch64ISD::UADDV) + MAKE_CASE(AArch64ISD::UADDLV) MAKE_CASE(AArch64ISD::SDOT) MAKE_CASE(AArch64ISD::UDOT) MAKE_CASE(AArch64ISD::SMINV) @@ -5323,6 +5324,20 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID, Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::aarch64_neon_uaddlv: { + EVT OpVT = Op.getOperand(1).getValueType(); + EVT ResVT = Op.getValueType(); + if (ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8)) { + // In order to avoid insert_subvector, used v4i32 than v2i32. + SDValue UADDLV = + DAG.getNode(AArch64ISD::UADDLV, dl, MVT::v4i32, Op.getOperand(1)); + SDValue EXTRACT_VEC_ELT = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, UADDLV, + DAG.getConstant(0, dl, MVT::i64)); + return EXTRACT_VEC_ELT; + } + return SDValue(); + } } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 6e92b2fcab075..67c344318e0d3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -242,6 +242,9 @@ enum NodeType : unsigned { SADDV, UADDV, + // Unsigned sum Long across Vector + UADDLV, + // Add Pairwise of two vectors ADDP, // Add Long Pairwise diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 5bdb1d9ffc6d9..4a1f46f2576ae 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -752,6 +752,7 @@ def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; +def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>; def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), [(abdu node:$lhs, node:$rhs), @@ -6461,6 +6462,12 @@ def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))) (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)), ssub))>; +def : Pat<(v4i32 (AArch64uaddlv (v8i8 V64:$Rn))), + (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$Rn), hsub))>; + +def : Pat<(v4i32 (AArch64uaddlv (v16i8 V128:$Rn))), + (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$Rn), hsub))>; + // Patterns for across-vector intrinsics, that have a node equivalent, that // returns a vector (with only the low lane defined) instead of a scalar. // In effect, opNode is the same as (scalar_to_vector (IntNode)). diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll index 665e8f90f850b..bf420700eb575 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -95,8 +95,8 @@ define void @insert_vec_v2i32_uaddlv_from_v16i8(ptr %0) { ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.16b h0, v0 ; CHECK-NEXT: mov.s v1[0], v0[0] -; CHECK-NEXT: ucvtf.2s v1, v1 -; CHECK-NEXT: str d1, [x0] +; CHECK-NEXT: ucvtf.2s v0, v1 +; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll index 27b105381aa07..bb5b19e51995a 100644 --- a/llvm/test/CodeGen/AArch64/dp1.ll +++ b/llvm/test/CodeGen/AArch64/dp1.ll @@ -205,8 +205,7 @@ define void @ctpop_i32() { ; CHECK-SDAG-NEXT: fmov d0, x9 ; CHECK-SDAG-NEXT: cnt v0.8b, v0.8b ; CHECK-SDAG-NEXT: uaddlv h0, v0.8b -; CHECK-SDAG-NEXT: fmov w9, s0 -; CHECK-SDAG-NEXT: str w9, [x8] +; CHECK-SDAG-NEXT: str s0, [x8] ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: ctpop_i32: diff --git a/llvm/test/CodeGen/AArch64/neon-addlv.ll b/llvm/test/CodeGen/AArch64/neon-addlv.ll index aaa9f9139b090..0f5a19c7a0f3b 100644 --- a/llvm/test/CodeGen/AArch64/neon-addlv.ll +++ b/llvm/test/CodeGen/AArch64/neon-addlv.ll @@ -177,3 +177,21 @@ entry: %0 = and i32 %vaddlv.i, 65535 ret i32 %0 } + +define dso_local <8 x i8> @bar(<8 x i8> noundef %a) local_unnamed_addr #0 { +; CHECK-LABEL: bar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uaddlv h0, v0.8b +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: rshrn v0.8b, v0.8h, #3 +; CHECK-NEXT: ret +entry: + %vaddlv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) + %0 = trunc i32 %vaddlv.i to i16 + %vecinit.i = insertelement <8 x i16> undef, i16 %0, i64 0 + %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> poison, <8 x i32> zeroinitializer + %vrshrn_n2 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %vecinit7.i, i32 3) + ret <8 x i8> %vrshrn_n2 +} + +declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)