diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8fbd4cea04c03..3882fe633363b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9059,6 +9059,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, // blocks. if (auto *PHI = dyn_cast(TEUseEI.UserTE->getMainOp())) { TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx); + TEInsertPt = TEInsertBlock->getTerminator(); } else { TEInsertBlock = TEInsertPt->getParent(); } @@ -9122,9 +9123,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, const Instruction *InsertPt = UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator() : &getLastInstructionInBundle(UseEI.UserTE); - if (!UserPHI && TEInsertPt == InsertPt) { - // If 2 gathers are operands of the same non-PHI entry, - // compare operands indices, use the earlier one as the base. + if (TEInsertPt == InsertPt) { + // If 2 gathers are operands of the same entry (regardless of wether + // user is PHI or else), compare operands indices, use the earlier one + // as the base. if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx) continue; // If the user instruction is used for some reason in different @@ -11250,7 +11252,18 @@ Value *BoUpSLP::vectorizeTree( TE->VectorizedValue = nullptr; auto *UserI = cast(TE->UserTreeIndices.front().UserTE->VectorizedValue); - Builder.SetInsertPoint(PrevVec); + // If user is a PHI node, its vector code have to be inserted right before + // block terminator. Since the node was delayed, there were some unresolved + // dependencies at the moment when stab instruction was emitted. In a case + // when any of these dependencies turn out an operand of another PHI, coming + // from this same block, position of a stab instruction will become invalid. + // The is because source vector that supposed to feed this gather node was + // inserted at the end of the block [after stab instruction]. So we need + // to adjust insertion point again to the end of block. + if (isa(UserI)) + Builder.SetInsertPoint(PrevVec->getParent()->getTerminator()); + else + Builder.SetInsertPoint(PrevVec); Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false); PrevVec->replaceAllUsesWith(Vec); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll index 8860cd4d8d141..1cef1032bf5d9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll @@ -15,7 +15,7 @@ define void @test() { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> , float [[DOTPRE_PRE]], i32 0 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[BB2:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[BB2:%.*]] ] ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP9:%.*]], [[BB2]] ] @@ -29,9 +29,8 @@ define void @test() { ; CHECK-NEXT: tail call void @foo(float [[MUL]]) ; CHECK-NEXT: [[I2:%.*]] = load float, ptr poison, align 4 ; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> poison, float [[I2]], i32 0 +; CHECK-NEXT: [[TMP8]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0 ; CHECK-NEXT: [[TMP9]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP5]], <2 x i32> -; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll index 28e0b06f69673..e5d7ad138b4de 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll @@ -8,7 +8,7 @@ ; YAML: Function: test ; YAML: Args: ; YAML: - String: 'Stores SLP vectorized with cost ' -; YAML: - Cost: '-3' +; YAML: - Cost: '-6' ; YAML: - String: ' and with tree size ' ; YAML: - TreeSize: '14' ; YAML: ...