diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4b3cc9e41bbd6..77362d610a0c8 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5806,21 +5806,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (TreeEntry *E = getTreeEntry(S.OpValue)) { LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n"); if (!E->isSame(VL)) { - if (!doesNotNeedToBeScheduled(S.OpValue)) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); - if (TryToFindDuplicates(S)) - newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndicies); - return; - } - } else { - // Record the reuse of the tree node. FIXME, currently this is only used - // to properly draw the graph rather than for the actual vectorization. - E->UserTreeIndices.push_back(UserTreeIdx); - LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue - << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); + if (TryToFindDuplicates(S)) + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } + // Record the reuse of the tree node. FIXME, currently this is only used to + // properly draw the graph rather than for the actual vectorization. + E->UserTreeIndices.push_back(UserTreeIdx); + LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue + << ".\n"); + return; } // Check that none of the instructions in the bundle are already in the tree. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll index 3b54b0a058d0c..87292700317ea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll @@ -83,12 +83,13 @@ define void @test2(double %0) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[TMP4:%.*]] ; CHECK: 4: -; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> , [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> , [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fsub double 1.000000e+00, [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> , [[TMP3]] ; CHECK-NEXT: br label [[DOTBACKEDGE:%.*]] ; CHECK: .backedge: -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = fcmp olt <2 x double> [[TMP8]], zeroinitializer ; CHECK-NEXT: br label [[TMP4]] ; br label %2