diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 09b31616e0882..4b399fe9f06a1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27403,6 +27403,15 @@ static SDValue performMULLCombine(SDNode *N, static SDValue performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + SDLoc DL(N); + + // If a DUP(Op0) already exists, reuse it for the scalar_to_vector. + if (DCI.isAfterLegalizeDAG()) { + if (SDNode *LN = DCI.DAG.getNodeIfExists(AArch64ISD::DUP, N->getVTList(), + N->getOperand(0))) + return SDValue(LN, 0); + } + // Let's do below transform. // // t34: v4i32 = AArch64ISD::UADDLV t2 @@ -27439,7 +27448,6 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); // Let's generate new sequence with AArch64ISD::NVCAST. - SDLoc DL(N); SDValue EXTRACT_SUBVEC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, UADDLV, DAG.getConstant(0, DL, MVT::i64)); diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index cdde11042462b..afa4c96b8ce89 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -1371,11 +1371,10 @@ define noundef <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %sc ; CHECK-SD-NEXT: lsr x9, x0, #16 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 ; CHECK-SD-NEXT: dup v4.8h, w0 -; CHECK-SD-NEXT: dup v1.8h, w9 -; CHECK-SD-NEXT: fmov s3, w9 -; CHECK-SD-NEXT: sqneg v2.8h, v1.8h -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] -; CHECK-SD-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v1.16b +; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI14_0] +; CHECK-SD-NEXT: dup v2.8h, w9 +; CHECK-SD-NEXT: sqneg v1.8h, v2.8h +; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b ; CHECK-SD-NEXT: rev32 v2.8h, v0.8h ; CHECK-SD-NEXT: sqdmull v3.4s, v0.4h, v4.4h ; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v4.8h