diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 569ad8b337db4..a151f3de170a5 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -659,10 +659,10 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, // Find most common element to initialize vector with. This is to avoid // unnecessary vinsert/valign for cases where the same value is present // many times. Creates a histogram of the vector's elements to find the - // most common element n. + // most common element. assert(4*Words.size() == Subtarget.getVectorLength()); - int VecHist[32]; - int n = 0; + SmallVector VecHist(32); + int MaxAt = 0; for (unsigned i = 0; i != NumWords; ++i) { VecHist[i] = 0; if (Words[i].isUndef()) @@ -671,60 +671,29 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, if (Words[i] == Words[j]) VecHist[i]++; - if (VecHist[i] > VecHist[n]) - n = i; + if (VecHist[i] > VecHist[MaxAt]) + MaxAt = i; } - SDValue HalfV = getZero(dl, VecTy, DAG); - if (VecHist[n] > 1) { - SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]); - HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy, - {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)}); - } - SDValue HalfV0 = HalfV; - SDValue HalfV1 = HalfV; - - // Construct two halves in parallel, then or them together. Rn and Rm count - // number of rotations needed before the next element. One last rotation is - // performed post-loop to position the last element. - int Rn = 0, Rm = 0; - SDValue Sn, Sm; - SDValue N = HalfV0; - SDValue M = HalfV1; - for (unsigned i = 0; i != NumWords/2; ++i) { - + // If each value is different, don't do splat, just insert them one by one. + bool NoSplat = VecHist[MaxAt] <= 1; + SDValue RotV = NoSplat + ? DAG.getUNDEF(VecTy) + : DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[MaxAt]); + int Rn = 0; + for (unsigned i = 0; i != NumWords; ++i) { // Rotate by element count since last insertion. - if (Words[i] != Words[n] || VecHist[n] <= 1) { - Sn = DAG.getConstant(Rn, dl, MVT::i32); - HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); - N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, - {HalfV0, Words[i]}); + if (NoSplat || Words[i] != Words[MaxAt]) { + RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, + {RotV, DAG.getConstant(Rn, dl, MVT::i32)}); + RotV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, Words[i]}); Rn = 0; } - if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) { - Sm = DAG.getConstant(Rm, dl, MVT::i32); - HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); - M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, - {HalfV1, Words[i+NumWords/2]}); - Rm = 0; - } Rn += 4; - Rm += 4; } // Perform last rotation. - Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32); - Sm = DAG.getConstant(Rm, dl, MVT::i32); - HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); - HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); - - SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0); - SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1); - - SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1}); - - SDValue OutV = - DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV); - return OutV; + return DAG.getNode(HexagonISD::VROR, dl, VecTy, + {RotV, DAG.getConstant(Rn, dl, MVT::i32)}); } SDValue diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll index e6b8445f51217..159001c113011 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll @@ -6,35 +6,31 @@ define <32 x i32> @fred(i32 %a0) #0 { ; CHECK: .cfi_startproc ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r3:2 = combine(#20,#9) -; CHECK-NEXT: v0 = vxor(v0,v0) -; CHECK-NEXT: r1 = #24 -; CHECK-NEXT: r4 = #12 +; CHECK-NEXT: r3:2 = combine(#76,#7) +; CHECK-NEXT: r1 = #12 +; CHECK-NEXT: r4 = #9 ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vror(v0,r1) +; CHECK-NEXT: v0 = vror(v0,r1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1.w = vinsert(r2) -; CHECK-NEXT: r4 = #7 -; CHECK-NEXT: r2 = #116 -; CHECK-NEXT: v0 = vror(v0,r4) +; CHECK-NEXT: v0.w = vinsert(r2) +; CHECK-NEXT: r2 = #20 ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.w = vinsert(r4) +; CHECK-NEXT: v0 = vror(v0,r3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vror(v1,r3) +; CHECK-NEXT: v0.w = vinsert(r4) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1.w = vinsert(r0) ; CHECK-NEXT: v0 = vror(v0,r2) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vror(v1,r3) +; CHECK-NEXT: v0.w = vinsert(r0) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0 = vor(v0,v1) +; CHECK-NEXT: v0 = vror(v0,r2) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } %v0 = insertelement <32 x i32> undef, i32 undef, i32 0