diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cc0e164ab99fe1..2a8b3cee5c8a36 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6618,8 +6618,10 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU, // either VU as the original vector for IE2 or V as the original vector for // IE1. do { - if (IE2 == VU || IE1 == V) - return true; + if (IE2 == VU) + return VU->hasOneUse(); + if (IE1 == V) + return V->hasOneUse(); if (IE1) { if ((IE1 != VU && !IE1->hasOneUse()) || getInsertIndex(IE1).getValueOr(Idx2) == Idx2) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll index 0d3c7809e868e3..77174151e96350 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll @@ -36,3 +36,41 @@ define void @test() { store <2 x float> zeroinitializer, ptr null, align 4 ret void } + +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr undef, i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr undef, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT_I5_I10:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP9]], i64 1 +; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13]], ptr null, align 4 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13_2:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP10]], i64 1 +; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13_2]], ptr null, align 4 +; CHECK-NEXT: ret void +; + %1 = getelementptr inbounds float, ptr undef, i32 2 + %2 = load float, ptr %1, align 4 + %3 = load float, ptr undef, align 4 + %4 = fsub float %2, %3 + %5 = getelementptr inbounds float, ptr undef, i32 3 + %6 = load float, ptr %5, align 4 + %7 = getelementptr inbounds float, ptr undef, i32 1 + %8 = load float, ptr %7, align 4 + %9 = fsub float %6, %8 + %10 = fcmp olt float %9, %4 + %.sroa.0.0.vec.insert.i5.i10 = insertelement <2 x float> undef, float %3, i64 0 + %.sroa.0.4.vec.insert.i10.i13 = insertelement <2 x float> %.sroa.0.0.vec.insert.i5.i10, float %8, i64 1 + store <2 x float> %.sroa.0.4.vec.insert.i10.i13, ptr null, align 4 + %.sroa.0.4.vec.insert.i10.i13.2 = insertelement <2 x float> %.sroa.0.0.vec.insert.i5.i10, float %6, i64 1 + store <2 x float> %.sroa.0.4.vec.insert.i10.i13.2, ptr null, align 4 + ret void +} +